Image Generation with Diffusion Models
This guide demonstrates how to build powerful image generation services using state-of-the-art diffusion models like FLUX.1. You'll learn to create a complete image generation API with custom parameters, validation, and optimization.
Overview
The Chutes platform makes it easy to deploy advanced image generation models:
- FLUX.1 [dev]: 12 billion parameter rectified flow transformer
- Stable Diffusion: Various versions and fine-tuned models
- Custom Models: Support for any diffusion architecture
- GPU Optimization: Automatic scaling and memory management
Complete FLUX.1 Implementation
Input Schema Design
First, define comprehensive input validation using Pydantic:
from pydantic import BaseModel, Field
from typing import Optional
class GenerationInput(BaseModel):
prompt: str
height: int = Field(default=1024, ge=128, le=2048)
width: int = Field(default=1024, ge=128, le=2048)
num_inference_steps: int = Field(default=10, ge=1, le=30)
guidance_scale: float = Field(default=7.5, ge=1.0, le=20.0)
seed: Optional[int] = Field(default=None, ge=0, le=2**32 - 1)
# Simplified input for basic usage
class MinifiedGenerationInput(BaseModel):
prompt: str = "a beautiful mountain landscape"Custom Image Configuration
Create a pre-built image with the FLUX.1 model:
from chutes.image import Image
# Create a markdown readme from model documentation
readme = """`FLUX.1 [dev]` is a 12 billion parameter rectified flow transformer capable of generating images from text descriptions.
# Key Features
1. Cutting-edge output quality, second only to our state-of-the-art model `FLUX.1 [pro]`.
2. Competitive prompt following, matching the performance of closed source alternatives.
3. Trained using guidance distillation, making `FLUX.1 [dev]` more efficient.
4. Open weights to drive new scientific research, and empower artists to develop innovative workflows.
5. Generated outputs can be used for personal, scientific, and commercial purposes.
"""
# Use pre-built image with FLUX.1 model
image = (
Image(
username="myuser",
name="flux.1-dev",
tag="0.0.2",
readme=readme)
.from_base("parachutes/flux.1-dev:latest")
)Chute Configuration
Set up the service with appropriate hardware requirements:
from chutes.chute import Chute, NodeSelector
chute = Chute(
username="myuser",
name="FLUX.1-dev-generator",
readme=readme,
image=image,
# This model is quite large, so we'll require GPUs with at least 48GB VRAM to run it.
node_selector=NodeSelector(
gpu_count=1,
min_vram_gb_per_gpu=80, # 80GB for optimal performance
),
# Limit one request at a time.
concurrency=1,
)Model Initialization
Initialize the diffusion pipeline on startup:
@chute.on_startup()
async def initialize_pipeline(self):
"""
Initialize the pipeline, download model if necessary.
This code never runs on your machine directly, it runs on the GPU nodes
powering chutes.
"""
import torch
from diffusers import FluxPipeline
self.torch = torch
torch.cuda.empty_cache()
torch.cuda.init()
torch.cuda.set_device(0)
self.pipeline = FluxPipeline.from_pretrained(
"black-forest-labs/FLUX.1-dev",
torch_dtype=torch.bfloat16,
local_files_only=True,
cache_dir="/home/chutes/.cache/huggingface/hub",
).to("cuda")Generation Endpoint
Create the main image generation endpoint:
import uuid
from io import BytesIO
from fastapi import Response
@chute.cord(
# Expose this function via the subdomain-based chutes.ai HTTP invocation, e.g.
# this becomes https://{username}-{chute slug}.chutes.ai/generate
public_api_path="/generate",
# The function is invoked in the subdomain-based system via POSTs.
method="POST",
# Input/minimal input schemas.
input_schema=GenerationInput,
minimal_input_schema=MinifiedGenerationInput,
# Set output content type header to image/jpeg so we can return the raw image.
output_content_type="image/jpeg",
)
async def generate(self, params: GenerationInput) -> Response:
"""
Generate an image.
"""
generator = None
if params.seed is not None:
generator = self.torch.Generator(device="cuda").manual_seed(params.seed)
with self.torch.inference_mode():
result = self.pipeline(
prompt=params.prompt,
height=params.height,
width=params.width,
num_inference_steps=params.num_inference_steps,
guidance_scale=params.guidance_scale,
max_sequence_length=256,
generator=generator,
)
image = result.images[0]
buffer = BytesIO()
image.save(buffer, format="JPEG", quality=85)
buffer.seek(0)
return Response(
content=buffer.getvalue(),
media_type="image/jpeg",
headers={"Content-Disposition": f'attachment; filename="{uuid.uuid4()}.jpg"'},
)Alternative: Stable Diffusion Setup
For a more customizable approach using Stable Diffusion:
from chutes.image import Image
from chutes.chute import Chute, NodeSelector
# Build custom Stable Diffusion image
image = (
Image(username="myuser", name="stable-diffusion", tag="2.1")
.from_base("nvidia/cuda:12.4.1-runtime-ubuntu22.04")
.with_python("3.11")
.run_command("apt update && apt install -y python3 python3-pip git")
.run_command("pip3 install torch>=2.4.0 torchvision --index-url https://download.pytorch.org/whl/cu124")
.run_command("pip3 install diffusers>=0.29.0 transformers>=4.44.0 accelerate>=0.33.0")
.run_command("pip3 install fastapi uvicorn pydantic pillow")
.set_workdir("/app")
)
chute = Chute(
username="myuser",
name="stable-diffusion-xl",
image=image,
node_selector=NodeSelector(
gpu_count=1,
min_vram_gb_per_gpu=24),
concurrency=2)
@chute.on_startup()
async def load_sd_pipeline(self):
"""Load Stable Diffusion XL pipeline."""
from diffusers import StableDiffusionXLPipeline
import torch
self.pipe = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
torch_dtype=torch.float16,
use_safetensors=True).to("cuda")
# Enable memory efficient attention
self.pipe.enable_memory_efficient_attention()
@chute.cord(public_api_path="/sdxl", method="POST")
async def generate_sdxl(self, prompt: str, width: int = 1024, height: int = 1024):
"""Generate images with Stable Diffusion XL."""
images = self.pipe(
prompt,
width=width,
height=height,
num_inference_steps=20).images
# Return first image as base64
buffer = BytesIO()
images[0].save(buffer, format="PNG")
import base64
return {"image": base64.b64encode(buffer.getvalue()).decode()}Advanced Features
Batch Generation
Generate multiple images in a single request:
from typing import List
class BatchGenerationInput(BaseModel):
prompts: List[str] = Field(max_items=4) # Limit batch size
width: int = Field(default=1024, ge=512, le=2048)
height: int = Field(default=1024, ge=512, le=2048)
num_inference_steps: int = Field(default=20, ge=10, le=50)
@chute.cord(public_api_path="/batch", method="POST")
async def generate_batch(self, params: BatchGenerationInput) -> List[str]:
"""Generate multiple images from prompts."""
results = []
for prompt in params.prompts:
with self.torch.inference_mode():
result = self.pipeline(
prompt=prompt,
width=params.width,
height=params.height,
num_inference_steps=params.num_inference_steps)
# Convert to base64
buffer = BytesIO()
result.images[0].save(buffer, format="JPEG", quality=90)
b64_image = base64.b64encode(buffer.getvalue()).decode()
results.append(b64_image)
return resultsImage-to-Image Generation
Transform existing images with text prompts:
import base64
from PIL import Image as PILImage
class Img2ImgInput(BaseModel):
prompt: str
image_b64: str # Base64 encoded input image
strength: float = Field(default=0.75, ge=0.1, le=1.0)
guidance_scale: float = Field(default=7.5, ge=1.0, le=20.0)
@chute.cord(public_api_path="/img2img", method="POST")
async def image_to_image(self, params: Img2ImgInput) -> Response:
"""Transform images with text prompts."""
# Decode input image
image_data = base64.b64decode(params.image_b64)
init_image = PILImage.open(BytesIO(image_data)).convert("RGB")
# Generate transformed image
with self.torch.inference_mode():
result = self.pipeline(
prompt=params.prompt,
image=init_image,
strength=params.strength,
guidance_scale=params.guidance_scale)
# Return as JPEG
buffer = BytesIO()
result.images[0].save(buffer, format="JPEG", quality=85)
buffer.seek(0)
return Response(
content=buffer.getvalue(),
media_type="image/jpeg")Inpainting Support
Fill or edit specific regions of images:
class InpaintInput(BaseModel):
prompt: str
image_b64: str # Original image
mask_b64: str # Mask (white = inpaint, black = keep)
strength: float = Field(default=0.75, ge=0.1, le=1.0)
@chute.on_startup()
async def load_inpaint_pipeline(self):
"""Load inpainting-specific pipeline."""
from diffusers import StableDiffusionInpaintPipeline
self.inpaint_pipe = StableDiffusionInpaintPipeline.from_pretrained(
"runwayml/stable-diffusion-inpainting",
torch_dtype=torch.float16).to("cuda")
@chute.cord(public_api_path="/inpaint", method="POST")
async def inpaint(self, params: InpaintInput) -> Response:
"""Inpaint regions of images."""
# Decode images
image_data = base64.b64decode(params.image_b64)
mask_data = base64.b64decode(params.mask_b64)
image = PILImage.open(BytesIO(image_data)).convert("RGB")
mask = PILImage.open(BytesIO(mask_data)).convert("L")
# Generate inpainted result
result = self.inpaint_pipe(
prompt=params.prompt,
image=image,
mask_image=mask,
strength=params.strength)
# Return result
buffer = BytesIO()
result.images[0].save(buffer, format="PNG")
buffer.seek(0)
return Response(content=buffer.getvalue(), media_type="image/png")Deployment and Usage
Deploy Your Service
# Build and deploy the image generation service
chutes deploy my_image_gen:chute
# Monitor deployment status
chutes chutes get my-image-genUsing the API
Basic Generation
curl -X POST "https://myuser-my-image-gen.chutes.ai/generate" \
-H "Content-Type: application/json" \
-d '{
"prompt": "a majestic dragon flying over a crystal lake at sunset",
"width": 1024,
"height": 1024,
"num_inference_steps": 20,
"guidance_scale": 7.5,
"seed": 42
}' \
--output generated_image.jpgPython Client
import requests
import base64
from PIL import Image
from io import BytesIO
def generate_image(prompt, **kwargs):
"""Generate image using your Chutes service."""
url = "https://myuser-my-image-gen.chutes.ai/generate"
payload = {
"prompt": prompt,
**kwargs
}
response = requests.post(url, json=payload)
if response.status_code == 200:
# Save image
with open("generated.jpg", "wb") as f:
f.write(response.content)
# Or display in Jupyter
image = Image.open(BytesIO(response.content))
return image
else:
print(f"Error: {response.status_code}")
return None
# Generate an image
image = generate_image(
"a cyberpunk cityscape with neon lights and flying cars",
width=1920,
height=1080,
num_inference_steps=25,
seed=123
)Performance Optimization
Memory Management
# Enable memory efficient attention
self.pipeline.enable_memory_efficient_attention()
# Use attention slicing for large images
self.pipeline.enable_attention_slicing()
# Enable CPU offloading for very large models
self.pipeline.enable_model_cpu_offload()Speed Optimizations
# Compile the UNet for faster inference
self.pipeline.unet = torch.compile(self.pipeline.unet, mode="reduce-overhead")
# Use faster schedulers
from diffusers import DPMSolverMultistepScheduler
self.pipeline.scheduler = DPMSolverMultistepScheduler.from_config(
self.pipeline.scheduler.config
)Hardware Scaling
# Scale up for higher throughput
node_selector = NodeSelector(
gpu_count=2, # Multi-GPU setup
min_vram_gb_per_gpu=40)
# Or scale out with multiple instances
chute = Chute(
# ... configuration
concurrency=4, # Handle more concurrent requests
)Best Practices
1. Prompt Engineering
# Good prompts are specific and detailed
good_prompt = """
a photorealistic portrait of a wise old wizard with a long white beard,
wearing a starry blue robe, holding a glowing crystal staff,
in a mystical forest clearing with soft golden sunlight filtering through trees,
highly detailed, 8k resolution, fantasy art style
"""
# Add negative prompts to avoid unwanted elements
negative_prompt = """
blurry, low quality, deformed, ugly, bad anatomy,
watermark, signature, text, cropped
"""2. Parameter Tuning
# High quality settings
high_quality_params = {
"num_inference_steps": 50,
"guidance_scale": 7.5,
"width": 1024,
"height": 1024,
}
# Fast generation settings
fast_params = {
"num_inference_steps": 15,
"guidance_scale": 5.0,
"width": 512,
"height": 512,
}3. Error Handling
@chute.cord(public_api_path="/generate", method="POST")
async def generate_with_fallback(self, params: GenerationInput) -> Response:
"""Generate with proper error handling."""
try:
# Try high-quality generation first
result = self.pipeline(
prompt=params.prompt,
width=params.width,
height=params.height,
num_inference_steps=params.num_inference_steps)
except torch.cuda.OutOfMemoryError:
# Fallback to lower resolution
logger.warning("OOM error, reducing resolution")
result = self.pipeline(
prompt=params.prompt,
width=params.width // 2,
height=params.height // 2,
num_inference_steps=params.num_inference_steps // 2)
except Exception as e:
logger.error(f"Generation failed: {e}")
raise HTTPException(status_code=500, detail="Generation failed")
# Return image...Monitoring and Scaling
Resource Monitoring
# Check GPU utilization
chutes chutes metrics my-image-gen
# View generation logs
chutes chutes logs my-image-gen --tail 100
# Monitor request patterns
chutes chutes status my-image-genAuto-scaling Configuration
# Configure auto-scaling based on queue length
chute = Chute(
# ... other config
concurrency=2, # Base concurrency
max_replicas=5, # Scale up to 5 instances
scale_up_threshold=10, # Scale when queue > 10
scale_down_delay=300, # Wait 5 min before scaling down
)Next Steps
- Advanced Models: Experiment with ControlNet, LoRA fine-tuning
- Custom Training: Train models on your own datasets
- Integration: Build web interfaces and mobile apps
- Optimization: Implement caching and CDN distribution
For more advanced examples, see: