This guide walks you through building your first completely custom chute from scratch. Unlike templates, you'll learn to build every component yourself, giving you full control and understanding of the platform.
What We'll Build
We'll create a sentiment analysis API that:
๐ง Loads a custom model (DistilBERT for sentiment analysis)
๐ Validates inputs with Pydantic schemas
๐ Provides REST endpoints for single and batch processing
๐ Returns structured results with confidence scores
๐๏ธ Uses custom Docker image with optimized dependencies
# sentiment_chute.pyfrom pydantic import BaseModel, Field, validator
from typing importListfrom enum import Enum
classSentimentLabel(str, Enum):
POSITIVE = "POSITIVE"
NEGATIVE = "NEGATIVE"
NEUTRAL = "NEUTRAL"classTextInput(BaseModel):
text: str = Field(..., min_length=1, max_length=5000, description="Text to analyze")
@validator('text')deftext_must_not_be_empty(cls, v):
ifnot v.strip():
raise ValueError('Text cannot be empty or only whitespace')
return v.strip()
classBatchTextInput(BaseModel):
texts: List[str] = Field(..., min_items=1, max_items=50, description="List of texts to analyze")
@validator('texts')defvalidate_texts(cls, v):
cleaned_texts = []
for i, text inenumerate(v):
ifnot text ornot text.strip():
raise ValueError(f'Text at index {i} cannot be empty')
iflen(text) > 5000:
raise ValueError(f'Text at index {i} is too long (max 5000 characters)')
cleaned_texts.append(text.strip())
return cleaned_texts
classSentimentResult(BaseModel):
text: str
sentiment: SentimentLabel
confidence: float = Field(..., ge=0.0, le=1.0)
processing_time: floatclassBatchSentimentResult(BaseModel):
results: List[SentimentResult]
total_texts: int
total_processing_time: float
average_confidence: float
Step 4: Build Custom Docker Image
Define a custom Docker image with all necessary dependencies:
# Add to sentiment_chute.pyfrom chutes.image import Image
# Create optimized image for sentiment analysis
image = (
Image(username="myuser", name="sentiment-chute", tag="1.0")
# Start with CUDA-enabled Ubuntu
.from_base("nvidia/cuda:12.2-runtime-ubuntu22.04")
# Install Python 3.11
.with_python("3.11")
# Install system dependencies
.run_command("""
apt-get update && apt-get install -y \\
git curl wget \\
&& rm -rf /var/lib/apt/lists/*
""")
# Install PyTorch with CUDA support
.run_command("""
pip install torch torchvision torchaudio \\
--index-url https://download.pytorch.org/whl/cu121
""")
# Install transformers and other ML dependencies
.run_command("""
pip install \\
transformers>=4.30.0 \\
accelerate>=0.20.0 \\
tokenizers>=0.13.0 \\
numpy>=1.24.0 \\
scikit-learn>=1.3.0
""")
# Set up model cache directory
.with_env("TRANSFORMERS_CACHE", "/app/models")
.with_env("HF_HOME", "/app/models")
.run_command("mkdir -p /app/models")
# Set working directory
.set_workdir("/app")
)
Step 5: Create the Chute
Now create the main chute with proper initialization:
# Add to sentiment_chute.pyfrom chutes.chute import Chute, NodeSelector
from fastapi import HTTPException
import time
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np
# Define the chute
chute = Chute(
username="myuser", # Replace with your username
name="sentiment-chute",
image=image,
tagline="Advanced sentiment analysis with confidence scoring",
readme="""
# Sentiment Analysis Chute
A production-ready sentiment analysis service using RoBERTa.
## Features
- High-accuracy sentiment classification
- Confidence scoring for each prediction
- Batch processing support
- GPU acceleration
- Input validation and error handling
## Usage
### Single Text Analysis
```bash
curl -X POST https://myuser-sentiment-chute.chutes.ai/analyze \\
-H "Content-Type: application/json" \\
-d '{"text": "I love this new AI service!"}'
Batch Analysis
curl -X POST https://myuser-sentiment-chute.chutes.ai/batch \\
-H "Content-Type: application/json" \\
-d '{
"texts": [
"This is amazing!",
"Not very good...",
"It works okay I guess"
]
}'
Response Format
{"text":"I love this new AI service!","sentiment":"POSITIVE","confidence":0.9847,"processing_time":0.045}
""",
node_selector=NodeSelector(
gpu_count=1,
min_vram_gb_per_gpu=8,
include=["rtx4090", "rtx3090", "a100"] # Prefer these GPUs
),
concurrency=4 # Handle up to 4 concurrent requests
)
## Step 6: Add Model Loading
Implement the startup function to load your model:
```python
# Add to sentiment_chute.py
@chute.on_startup()
async def load_model(self):
"""Load the sentiment analysis model and tokenizer."""
print("๐ Starting sentiment analysis chute...")
# Model configuration
model_name = "cardiffnlp/twitter-roberta-base-sentiment-latest"
print(f"๐ฅ Loading model: {model_name}")
try:
# Load tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
print("โ Tokenizer loaded successfully")
# Load model
self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
print("โ Model loaded successfully")
# Set up device
self.device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"๐ฅ๏ธ Using device: {self.device}")
# Move model to device
self.model.to(self.device)
self.model.eval() # Set to evaluation mode
# Label mapping (specific to this model)
self.label_mapping = {
"LABEL_0": "NEGATIVE",
"LABEL_1": "NEUTRAL",
"LABEL_2": "POSITIVE"
}
# Warm up the model with a dummy input
print("๐ฅ Warming up model...")
dummy_text = "This is a test."
await self._predict_sentiment(dummy_text)
print("โ Model loaded and ready!")
except Exception as e:
print(f"โ Error loading model: {str(e)}")
raise e
async def _predict_sentiment(self, text: str) -> tuple[str, float, float]:
"""
Internal method to predict sentiment.
Returns: (sentiment_label, confidence, processing_time)
"""
start_time = time.time()
try:
# Tokenize input
inputs = self.tokenizer(
text,
return_tensors="pt",
truncation=True,
padding=True,
max_length=512
).to(self.device)
# Run inference
with torch.no_grad():
outputs = self.model(**inputs)
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
# Get predicted class and confidence
predicted_class_id = predictions.argmax().item()
confidence = predictions[0][predicted_class_id].item()
# Map to human-readable label
model_label = self.model.config.id2label[predicted_class_id]
sentiment_label = self.label_mapping.get(model_label, model_label)
processing_time = time.time() - start_time
return sentiment_label, confidence, processing_time
except Exception as e:
processing_time = time.time() - start_time
raise HTTPException(
status_code=500,
detail=f"Sentiment prediction failed: {str(e)}"
)
Step 7: Implement API Endpoints
Add your API endpoints using the @chute.cord decorator:
Add a local testing function to verify everything works:
# Add to sentiment_chute.pyif __name__ == "__main__":
import asyncio
asyncdeftest_locally():
"""Test the chute locally before deploying."""print("๐งช Testing chute locally...")
# Simulate the startup processawait load_model(chute)
# Test single analysisprint("\n๐ Testing single text analysis...")
test_input = TextInput(text="I absolutely love this new technology!")
result = await analyze_sentiment(chute, test_input)
print(f"Input: {result.text}")
print(f"Sentiment: {result.sentiment}")
print(f"Confidence: {result.confidence:.4f}")
print(f"Processing time: {result.processing_time:.4f}s")
# Test batch analysisprint("\n๐ Testing batch analysis...")
batch_input = BatchTextInput(texts=[
"This is amazing!",
"I hate this so much.",
"It's okay, nothing special.",
"Absolutely fantastic experience!"
])
batch_result = await analyze_batch(chute, batch_input)
print(f"Processed {batch_result.total_texts} texts")
print(f"Average confidence: {batch_result.average_confidence:.4f}")
print(f"Total time: {batch_result.total_processing_time:.4f}s")
for i, res inenumerate(batch_result.results):
print(f" {i+1}. '{res.text}' -> {res.sentiment} ({res.confidence:.3f})")
# Test health checkprint("\n๐ฅ Testing health check...")
health = await health_check(chute)
print(f"Status: {health['status']}")
print(f"Device: {health['device']}")
print("\nโ All tests passed! Ready to deploy.")
# Run local tests
asyncio.run(test_locally())
curl -X POST https://myuser-sentiment-chute.chutes.ai/analyze \
-H "Content-Type: application/json" \
-d '{"text": "I absolutely love this new AI service!"}'
Batch Analysis
curl -X POST https://myuser-sentiment-chute.chutes.ai/batch \
-H "Content-Type: application/json" \
-d '{
"texts": [
"This is amazing technology!",
"I hate waiting in long lines.",
"The weather is okay today."
]
}'
๐ Congratulations! You've built your first custom chute from scratch. You now have the foundation to create any AI application you can imagine with Chutes!