This guide walks you through building your first completely custom chute from scratch. Unlike templates, you'll learn to build every component yourself, giving you full control and understanding of the platform.
What We'll Build
We'll create a sentiment analysis API that:
🧠 Loads a custom model (DistilBERT for sentiment analysis)
🔍 Validates inputs with Pydantic schemas
🌐 Provides REST endpoints for single and batch processing
📊 Returns structured results with confidence scores
🏗️ Uses custom Docker image with optimized dependencies
# sentiment_chute.pyfrom pydantic import BaseModel, Field, validator
from typing importList, Optionalfrom enum import Enum
classSentimentLabel(str, Enum):
POSITIVE = "POSITIVE"
NEGATIVE = "NEGATIVE"
NEUTRAL = "NEUTRAL"classTextInput(BaseModel):
text: str = Field(..., min_length=1, max_length=5000, description="Text to analyze")
@validator('text')deftext_must_not_be_empty(cls, v):
ifnot v.strip():
raise ValueError('Text cannot be empty or only whitespace')
return v.strip()
classBatchTextInput(BaseModel):
texts: List[str] = Field(..., min_items=1, max_items=50, description="List of texts to analyze")
@validator('texts')defvalidate_texts(cls, v):
cleaned_texts = []
for i, text inenumerate(v):
ifnot text ornot text.strip():
raise ValueError(f'Text at index {i} cannot be empty')
iflen(text) > 5000:
raise ValueError(f'Text at index {i} is too long (max 5000 characters)')
cleaned_texts.append(text.strip())
return cleaned_texts
classSentimentResult(BaseModel):
text: str
sentiment: SentimentLabel
confidence: float = Field(..., ge=0.0, le=1.0)
processing_time: floatclassBatchSentimentResult(BaseModel):
results: List[SentimentResult]
total_texts: int
total_processing_time: float
average_confidence: float
Step 4: Build Custom Docker Image
Define a custom Docker image with all necessary dependencies:
# Add to sentiment_chute.pyfrom chutes.image import Image
# Create optimized image for sentiment analysis
image = (
Image(username="myuser", name="sentiment-chute", tag="1.0")
# Start with CUDA-enabled Ubuntu
.from_base("nvidia/cuda:12.2-runtime-ubuntu22.04")
# Install Python 3.11
.with_python("3.11")
# Install system dependencies
.run_command("""
apt-get update && apt-get install -y \\
git curl wget \\
&& rm -rf /var/lib/apt/lists/*
""")
# Install PyTorch with CUDA support
.run_command("""
pip install torch torchvision torchaudio \\
--index-url https://download.pytorch.org/whl/cu121
""")
# Install transformers and other ML dependencies
.run_command("""
pip install \\
transformers>=4.30.0 \\
accelerate>=0.20.0 \\
tokenizers>=0.13.0 \\
numpy>=1.24.0 \\
scikit-learn>=1.3.0
""")
# Set up model cache directory
.with_env("TRANSFORMERS_CACHE", "/app/models")
.with_env("HF_HOME", "/app/models")
.run_command("mkdir -p /app/models")
# Set working directory
.set_workdir("/app")
)
Step 5: Create the Chute
Now create the main chute with proper initialization:
# Add to sentiment_chute.pyfrom chutes.chute import Chute, NodeSelector
from fastapi import HTTPException
import time
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np
# Define the chute
chute = Chute(
username="myuser", # Replace with your username
name="sentiment-chute",
image=image,
tagline="Advanced sentiment analysis with confidence scoring",
readme="""
# Sentiment Analysis Chute
A production-ready sentiment analysis service using RoBERTa.
## Features
- High-accuracy sentiment classification
- Confidence scoring for each prediction
- Batch processing support
- GPU acceleration
- Input validation and error handling
## Usage
### Single Text Analysis
```bash
curl -X POST https://myuser-sentiment-chute.chutes.ai/analyze \\
-H "Content-Type: application/json" \\
-d '{"text": "I love this new AI service!"}'
```
### Batch Analysis
```bash
curl -X POST https://myuser-sentiment-chute.chutes.ai/batch \\
-H "Content-Type: application/json" \\
-d '{
"texts": [
"This is amazing!",
"Not very good...",
"It works okay I guess"
]
}'
```
## Response Format
```json
{
"text": "I love this new AI service!",
"sentiment": "POSITIVE",
"confidence": 0.9847,
"processing_time": 0.045
}
```
""",
node_selector=NodeSelector(
gpu_count=1,
min_vram_gb_per_gpu=8,
include=["rtx4090", "rtx3090", "a100"] # Prefer these GPUs
),
concurrency=4# Handle up to 4 concurrent requests
)
Step 6: Add Model Loading
Implement the startup function to load your model:
# Add to sentiment_chute.py@chute.on_startup()asyncdefload_model(self):
"""Load the sentiment analysis model and tokenizer."""print("🚀 Starting sentiment analysis chute...")
# Model configuration
model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"print(f"📥 Loading model: {model_name}")
try:
# Load tokenizerself.tokenizer = AutoTokenizer.from_pretrained(model_name)
print("✅ Tokenizer loaded successfully")
# Load modelself.model = AutoModelForSequenceClassification.from_pretrained(model_name)
print("✅ Model loaded successfully")
# Set up deviceself.device = "cuda"if torch.cuda.is_available() else"cpu"print(f"🖥️ Using device: {self.device}")
# Move model to deviceself.model.to(self.device)
self.model.eval() # Set to evaluation mode# Label mapping (specific to this model)self.label_mapping = {
"LABEL_0": "NEGATIVE",
"LABEL_1": "NEUTRAL",
"LABEL_2": "POSITIVE"
}
# Warm up the model with a dummy inputprint("🔥 Warming up model...")
dummy_text = "This is a test."awaitself._predict_sentiment(dummy_text)
print("✅ Model loaded and ready!")
except Exception as e:
print(f"❌ Error loading model: {str(e)}")
raise e
asyncdef_predict_sentiment(self, text: str) -> tuple[str, float, float]:
"""
Internal method to predict sentiment.
Returns: (sentiment_label, confidence, processing_time)
"""
start_time = time.time()
try:
# Tokenize input
inputs = self.tokenizer(
text,
return_tensors="pt",
truncation=True,
padding=True,
max_length=512
).to(self.device)
# Run inferencewith torch.no_grad():
outputs = self.model(**inputs)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
# Get predicted class and confidence
predicted_class_id = predictions.argmax().item()
confidence = predictions[0][predicted_class_id].item()
# Map to human-readable label
model_label = self.model.config.id2label[predicted_class_id]
sentiment_label = self.label_mapping.get(model_label, model_label)
processing_time = time.time() - start_time
return sentiment_label, confidence, processing_time
except Exception as e:
processing_time = time.time() - start_time
raise HTTPException(
status_code=500,
detail=f"Sentiment prediction failed: {str(e)}"
)
Step 7: Implement API Endpoints
Add your API endpoints using the @chute.cord decorator:
Add a local testing function to verify everything works:
# Add to sentiment_chute.pyif __name__ == "__main__":
import asyncio
asyncdeftest_locally():
"""Test the chute locally before deploying."""print("🧪 Testing chute locally...")
# Simulate the startup processawait load_model(chute)
# Test single analysisprint("\n📝 Testing single text analysis...")
test_input = TextInput(text="I absolutely love this new technology!")
result = await analyze_sentiment(chute, test_input)
print(f"Input: {result.text}")
print(f"Sentiment: {result.sentiment}")
print(f"Confidence: {result.confidence:.4f}")
print(f"Processing time: {result.processing_time:.4f}s")
# Test batch analysisprint("\n📝 Testing batch analysis...")
batch_input = BatchTextInput(texts=[
"This is amazing!",
"I hate this so much.",
"It's okay, nothing special.",
"Absolutely fantastic experience!"
])
batch_result = await analyze_batch(chute, batch_input)
print(f"Processed {batch_result.total_texts} texts")
print(f"Average confidence: {batch_result.average_confidence:.4f}")
print(f"Total time: {batch_result.total_processing_time:.4f}s")
for i, res inenumerate(batch_result.results):
print(f" {i+1}. '{res.text}' -> {res.sentiment} ({res.confidence:.3f})")
# Test health checkprint("\n🏥 Testing health check...")
health = await health_check(chute)
print(f"Status: {health['status']}")
print(f"Device: {health['device']}")
print("\n✅ All tests passed! Ready to deploy.")
# Run local tests
asyncio.run(test_locally())
Step 9: Complete File
Here's your complete sentiment_chute.py file structure:
curl -X POST https://myuser-sentiment-chute.chutes.ai/analyze \
-H "Content-Type: application/json" \
-d '{"text": "I absolutely love this new AI service!"}'
Expected response:
{"text":"I absolutely love this new AI service!","sentiment":"POSITIVE","confidence":0.9847,"processing_time":0.045}
Batch Analysis
curl -X POST https://myuser-sentiment-chute.chutes.ai/batch \
-H "Content-Type: application/json" \
-d '{
"texts": [
"This is amazing technology!",
"I hate waiting in long lines.",
"The weather is okay today."
]
}'
import requests
# Test your API
response = requests.post(
"https://myuser-sentiment-chute.chutes.ai/analyze",
json={"text": "I love learning about AI!"}
)
result = response.json()
print(f"Sentiment: {result['sentiment']}")
print(f"Confidence: {result['confidence']:.3f}")
What You've Learned
Congratulations! You've successfully built and deployed your first custom chute. You now understand:
Core Concepts
✅ Custom Docker images with optimized dependencies
✅ Pydantic schemas for input/output validation
✅ Model loading and management with startup hooks
✅ API endpoint creation with @chute.cord
✅ Error handling and validation
✅ Local testing before deployment
Advanced Features
✅ Batch processing for efficiency
✅ Performance monitoring with timing
✅ Health checks for monitoring
✅ GPU optimization with proper device management
✅ Resource specification with NodeSelector
Next Steps
Now that you understand the fundamentals, explore more advanced topics:
🎉 Congratulations! You've built your first custom chute from scratch. You now have the foundation to create any AI application you can imagine with Chutes!