The @chute.cord() decorator is used to create HTTP API endpoints in Chutes applications. Cords are the primary way to expose functionality from your chute. This reference covers all parameters, patterns, and best practices.
from fastapi import HTTPException
@chute.cord(public_api_path="/generate")
async def generate_with_errors(self, prompt: str):
# Validate input
if not prompt.strip():
raise HTTPException(
status_code=400,
detail="Prompt cannot be empty"
)
if len(prompt) > 10000:
raise HTTPException(
status_code=400,
detail="Prompt too long (max 10,000 characters)"
)
try:
result = await self.model.generate(prompt)
return {"generated_text": result}
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Generation failed: {str(e)}"
)
Complete Example
from chutes.chute import Chute, NodeSelector
from chutes.image import Image
from pydantic import BaseModel, Field
from fastapi import HTTPException
from fastapi.responses import StreamingResponse
import json
image = (
Image(username="myuser", name="text-gen", tag="1.0")
.from_base("parachutes/python:3.12")
.run_command("pip install transformers torch")
)
chute = Chute(
username="myuser",
name="text-generator",
image=image,
node_selector=NodeSelector(gpu_count=1, min_vram_gb_per_gpu=16),
concurrency=4
)
class GenerationInput(BaseModel):
prompt: str = Field(..., min_length=1, max_length=10000)
max_tokens: int = Field(100, ge=1, le=2000)
temperature: float = Field(0.7, ge=0.0, le=2.0)
class SimpleInput(BaseModel):
prompt: str
@chute.on_startup()
async def load_model(self):
from transformers import pipeline
self.generator = pipeline("text-generation", model="gpt2", device=0)
@chute.cord(
public_api_path="/generate",
public_api_method="POST",
input_schema=GenerationInput,
minimal_input_schema=SimpleInput
)
async def generate(self, params: GenerationInput) -> dict:
"""Generate text from a prompt."""
result = self.generator(
params.prompt,
max_length=params.max_tokens,
temperature=params.temperature
)[0]["generated_text"]
return {
"generated_text": result,
"tokens_used": len(result.split())
}
@chute.cord(
public_api_path="/stream",
public_api_method="POST",
stream=True
)
async def stream_generate(self, prompt: str):
"""Stream text generation token by token."""
async def generate():
# Simulated streaming
words = prompt.split()
for word in words:
yield f"data: {json.dumps({'token': word + ' '})}\n\n"
yield f"data: {json.dumps({'finished': True})}\n\n"
return StreamingResponse(generate(), media_type="text/event-stream")
@chute.cord(public_api_path="/health", public_api_method="GET")
async def health(self) -> dict:
"""Health check endpoint."""
return {
"status": "healthy",
"model_loaded": hasattr(self, "generator")
}
Best Practices
1. Use Descriptive Paths
# Good
@chute.cord(public_api_path="/generate_text")
@chute.cord(public_api_path="/analyze_sentiment")
# Avoid
@chute.cord(public_api_path="/api")
@chute.cord(public_api_path="/do")
2. Choose Appropriate Methods
# GET for read-only operations
@chute.cord(public_api_path="/models", public_api_method="GET")
# POST for AI generation/processing
@chute.cord(public_api_path="/generate", public_api_method="POST")
3. Use Input Schemas for Validation
from pydantic import BaseModel, Field
class ValidatedInput(BaseModel):
prompt: str = Field(..., min_length=1, max_length=10000)
temperature: float = Field(0.7, ge=0.0, le=2.0)
@chute.cord(public_api_path="/generate", input_schema=ValidatedInput)
async def generate(self, params: ValidatedInput):
# Input is automatically validated
pass
4. Handle Errors Gracefully
@chute.cord(public_api_path="/generate")
async def generate(self, prompt: str):
if not prompt.strip():
raise HTTPException(400, "Prompt cannot be empty")
try:
return await self.model.generate(prompt)
except Exception as e:
raise HTTPException(500, f"Generation failed: {e}")