Custom Templates Guide

This guide shows how to create reusable templates for common AI workflows, making it easy to deploy similar applications with different configurations.

Overview

Custom templates in Chutes allow you to:

Standardize Deployments: Create consistent deployment patterns
Reduce Code Duplication: Reuse common configurations
Simplify Complex Setups: Abstract away complexity for end users
Enable Team Collaboration: Share best practices across teams

Template Structure

Basic Template Function

A template is a Python function that returns a configured Chute:

from chutes.image import Image
from chutes.chute import Chute, NodeSelector
from typing import Optional, Dict, Any

def build_text_classification_template(
    username: str,
    model_name: str,
    num_labels: int,
    node_selector: Optional[NodeSelector] = None,
    **kwargs
) -> Chute:
    """
    Template for text classification models

    Args:
        username: Chutes username
        model_name: HuggingFace model name
        num_labels: Number of classification labels
        node_selector: Hardware requirements
        **kwargs: Additional chute configuration

    Returns:
        Configured Chute instance
    """

    # Default node selector
    if node_selector is None:
        node_selector = NodeSelector(
            gpu_count=1,
            min_vram_gb_per_gpu=8)

    # Build custom image
    image = (
        Image(
            username=username,
            name="text-classification",
            tag="latest",
            python_version="3.11"
        )
        .pip_install([
            "torch==2.1.0",
            "transformers==4.35.0",
            "datasets==2.14.0",
            "scikit-learn==1.3.0"
        ])
        .copy_files("./templates/text_classification", "/app")
    )

    # Create chute
    chute = Chute(
        username=username,
        name=f"text-classifier-{model_name.split('/')[-1]}",
        image=image,
        entry_file="classifier.py",
        entry_point="run",
        node_selector=node_selector,
        environment={
            "MODEL_NAME": model_name,
            "NUM_LABELS": str(num_labels)
        },
        timeout_seconds=300,
        concurrency=8,
        **kwargs
    )

    return chute

# Usage
classifier_chute = build_text_classification_template(
    username="myuser",
    model_name="bert-base-uncased",
    num_labels=3
)

Advanced Template Examples

Computer Vision Template

def build_image_classification_template(
    username: str,
    model_name: str,
    image_size: int = 224,
    batch_size: int = 16,
    use_gpu: bool = True,
    **kwargs
) -> Chute:
    """Template for image classification models"""

    # Configure hardware based on requirements
    if use_gpu:
        node_selector = NodeSelector(
            gpu_count=1,
            min_vram_gb_per_gpu=12)
    else:
        node_selector = NodeSelector(
            gpu_count=0)

    # Build image with computer vision dependencies
    image = (
        Image(
            username=username,
            name="image-classification",
            tag=f"v{model_name.replace('/', '-')}",
            python_version="3.11"
        )
        .pip_install([
            "torch==2.1.0",
            "torchvision==0.16.0",
            "timm==0.9.7",
            "pillow==10.0.1",
            "opencv-python==4.8.1.78"
        ])
        .copy_files("./templates/image_classification", "/app")
    )

    chute = Chute(
        username=username,
        name=f"image-classifier-{model_name.split('/')[-1]}",
        image=image,
        entry_file="image_classifier.py",
        entry_point="run",
        node_selector=node_selector,
        environment={
            "MODEL_NAME": model_name,
            "IMAGE_SIZE": str(image_size),
            "BATCH_SIZE": str(batch_size)
        },
        timeout_seconds=600,
        concurrency=4,
        **kwargs
    )

    return chute

# Example implementation file: templates/image_classification/image_classifier.py
"""
import os
import torch
import timm
from PIL import Image
import torchvision.transforms as transforms
from typing import List, Dict, Any
import base64
import io

class ImageClassifier:
    def __init__(self):
        self.model_name = os.environ.get("MODEL_NAME", "resnet50")
        self.image_size = int(os.environ.get("IMAGE_SIZE", "224"))
        self.batch_size = int(os.environ.get("BATCH_SIZE", "16"))

        # Load model
        self.model = timm.create_model(self.model_name, pretrained=True)
        self.model.eval()

        # Define transforms
        self.transform = transforms.Compose([
            transforms.Resize((self.image_size, self.image_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                               std=[0.229, 0.224, 0.225])
        ])

    def preprocess_image(self, image_b64: str) -> torch.Tensor:
        # Decode base64 image
        image_bytes = base64.b64decode(image_b64)
        image = Image.open(io.BytesIO(image_bytes)).convert('RGB')

        # Apply transforms
        tensor = self.transform(image)
        return tensor.unsqueeze(0)  # Add batch dimension

    def predict(self, images: List[str]) -> List[Dict[str, Any]]:
        results = []

        for i in range(0, len(images), self.batch_size):
            batch = images[i:i + self.batch_size]

            # Preprocess batch
            tensors = [self.preprocess_image(img) for img in batch]
            batch_tensor = torch.cat(tensors, dim=0)

            # Inference
            with torch.no_grad():
                outputs = self.model(batch_tensor)
                probabilities = torch.nn.functional.softmax(outputs, dim=1)

            # Process results
            for j, probs in enumerate(probabilities):
                top5_probs, top5_indices = torch.topk(probs, 5)

                results.append({
                    "predictions": [
                        {
                            "class_id": int(idx),
                            "probability": float(prob)
                        }
                        for idx, prob in zip(top5_indices, top5_probs)
                    ]
                })

        return results

# Global classifier instance
classifier = ImageClassifier()

async def run(inputs: Dict[str, Any]) -> Dict[str, Any]:
    images = inputs.get("images", [])
    if not images:
        return {"error": "No images provided"}

    results = classifier.predict(images)
    return {"results": results}
"""

LLM Chat Template

def build_llm_chat_template(
    username: str,
    model_name: str,
    max_length: int = 2048,
    temperature: float = 0.7,
    use_quantization: bool = False,
    **kwargs
) -> Chute:
    """Template for LLM chat applications"""

    # Determine hardware requirements based on model
    if "7b" in model_name.lower():
        vram_gb = 16 if not use_quantization else 8
    elif "13b" in model_name.lower():
        vram_gb = 24 if not use_quantization else 12
    elif "70b" in model_name.lower():
        vram_gb = 80 if not use_quantization else 40
    else:
        vram_gb = 16  # Default

    node_selector = NodeSelector(
        gpu_count=1,
        min_vram_gb_per_gpu=vram_gb)

    # Build image with LLM dependencies
    pip_packages = [
        "torch==2.1.0",
        "transformers==4.35.0",
        "accelerate==0.24.0"
    ]

    if use_quantization:
        pip_packages.append("bitsandbytes==0.41.0")

    image = (
        Image(
            username=username,
            name="llm-chat",
            tag=f"v{model_name.replace('/', '-')}",
            python_version="3.11"
        )
        .pip_install(pip_packages)
        .copy_files("./templates/llm_chat", "/app")
    )

    environment = {
        "MODEL_NAME": model_name,
        "MAX_LENGTH": str(max_length),
        "TEMPERATURE": str(temperature),
        "USE_QUANTIZATION": str(use_quantization).lower()
    }

    chute = Chute(
        username=username,
        name=f"llm-chat-{model_name.split('/')[-1]}",
        image=image,
        entry_file="chat_model.py",
        entry_point="run",
        node_selector=node_selector,
        environment=environment,
        timeout_seconds=300,
        concurrency=4,
        **kwargs
    )

    return chute

Multi-Model Analysis Template

def build_multi_model_analysis_template(
    username: str,
    models_config: Dict[str, Dict[str, Any]],
    enable_caching: bool = True,
    **kwargs
) -> Chute:
    """
    Template for multi-model analysis pipelines

    Args:
        username: Chutes username
        models_config: Dictionary of model configurations
            Example: {
                "sentiment": {"model": "cardiffnlp/twitter-roberta-base-sentiment"},
                "ner": {"model": "dbmdz/bert-large-cased-finetuned-conll03-english"},
                "classification": {"model": "facebook/bart-large-mnli"}
            }
        enable_caching: Whether to enable Redis caching
    """

    # Calculate resource requirements based on models
    total_models = len(models_config)
    estimated_vram = total_models * 4  # 4GB per model estimate

    node_selector = NodeSelector(
        gpu_count=1,
        min_vram_gb_per_gpu=max(16, estimated_vram)
    )

    # Build comprehensive image
    pip_packages = [
        "torch==2.1.0",
        "transformers==4.35.0",
        "datasets==2.14.0",
        "scikit-learn==1.3.0",
        "numpy==1.24.3",
        "asyncio-pool==0.6.0"
    ]

    if enable_caching:
        pip_packages.extend(["redis==5.0.0", "pickle5==0.0.12"])

    image = (
        Image(
            username=username,
            name="multi-model-analysis",
            tag="latest",
            python_version="3.11"
        )
        .pip_install(pip_packages)
        .copy_files("./templates/multi_model", "/app")
    )

    # Environment configuration
    environment = {
        "MODELS_CONFIG": json.dumps(models_config),
        "ENABLE_CACHING": str(enable_caching).lower()
    }

    if enable_caching:
        environment["REDIS_URL"] = "redis://localhost:6379"

    chute = Chute(
        username=username,
        name="multi-model-analyzer",
        image=image,
        entry_file="multi_analyzer.py",
        entry_point="run",
        node_selector=node_selector,
        environment=environment,
        timeout_seconds=600,
        concurrency=6,
        **kwargs
    )

    return chute

# Usage example
multi_model_chute = build_multi_model_analysis_template(
    username="myuser",
    models_config={
        "sentiment": {
            "model": "cardiffnlp/twitter-roberta-base-sentiment-latest",
            "task": "sentiment-analysis"
        },
        "ner": {
            "model": "dbmdz/bert-large-cased-finetuned-conll03-english",
            "task": "ner"
        },
        "classification": {
            "model": "facebook/bart-large-mnli",
            "task": "zero-shot-classification"
        }
    },
    enable_caching=True
)

Template Best Practices

1. Parameterization

Make templates flexible with good defaults:

def build_flexible_template(
    username: str,
    model_name: str,
    # Required parameters
    task_type: str,

    # Optional parameters with sensible defaults
    python_version: str = "3.11",
    timeout_seconds: int = 300,
    concurrency: int = 8,
    enable_monitoring: bool = True,
    enable_caching: bool = True,
    auto_scale: bool = False,

    # Hardware configuration
    gpu_count: int = 1,
    min_vram_gb: int = 8,

    # Advanced configuration
    environment_vars: Optional[Dict[str, str]] = None,
    custom_pip_packages: Optional[List[str]] = None,

    **kwargs
) -> Chute:
    """Highly flexible template with many configuration options"""

    # Merge environment variables
    base_env = {
        "MODEL_NAME": model_name,
        "TASK_TYPE": task_type,
        "ENABLE_MONITORING": str(enable_monitoring).lower(),
        "ENABLE_CACHING": str(enable_caching).lower()
    }

    if environment_vars:
        base_env.update(environment_vars)

    # Build pip packages list
    base_packages = [
        "torch==2.1.0",
        "transformers==4.35.0"
    ]

    if enable_monitoring:
        base_packages.append("prometheus-client==0.18.0")

    if enable_caching:
        base_packages.append("redis==5.0.0")

    if custom_pip_packages:
        base_packages.extend(custom_pip_packages)

    # Configure node selector
    node_selector = NodeSelector(
        gpu_count=gpu_count,
        min_vram_gb_per_gpu=min_vram_gb)

    # Build image
    image = (
        Image(
            username=username,
            name=f"{task_type}-model",
            tag=model_name.replace("/", "-"),
            python_version=python_version
        )
        .pip_install(base_packages)
        .copy_files(f"./templates/{task_type}", "/app")
    )

    # Create chute
    chute = Chute(
        username=username,
        name=f"{task_type}-{model_name.split('/')[-1]}",
        image=image,
        entry_file="app.py",
        entry_point="run",
        node_selector=node_selector,
        environment=base_env,
        timeout_seconds=timeout_seconds,
        concurrency=concurrency,
        auto_scale=auto_scale,
        **kwargs
    )

    return chute

2. Template Validation

Add validation to prevent common errors:

def validate_template_inputs(
    model_name: str,
    task_type: str,
    gpu_count: int,
    min_vram_gb: int
) -> None:
    """Validate template inputs"""

    # Validate model name format
    if "/" not in model_name:
        raise ValueError("model_name should be in format 'organization/model'")

    # Validate task type
    valid_tasks = ["classification", "ner", "generation", "embedding"]
    if task_type not in valid_tasks:
        raise ValueError(f"task_type must be one of {valid_tasks}")

    # Validate hardware requirements
    if gpu_count < 0 or gpu_count > 8:
        raise ValueError("gpu_count must be between 0 and 8")

    if min_vram_gb < 4 or min_vram_gb > 80:
        raise ValueError("min_vram_gb must be between 4 and 80")

    # Model-specific validation
    if "70b" in model_name.lower() and min_vram_gb < 40:
        raise ValueError("70B models require at least 40GB VRAM")

def build_validated_template(username: str, model_name: str, **kwargs) -> Chute:
    """Template with input validation"""

    # Extract and validate key parameters
    task_type = kwargs.get("task_type", "classification")
    gpu_count = kwargs.get("gpu_count", 1)
    min_vram_gb = kwargs.get("min_vram_gb", 8)

    validate_template_inputs(model_name, task_type, gpu_count, min_vram_gb)

    # Continue with template creation...
    return build_flexible_template(username, model_name, task_type, **kwargs)

3. Template Documentation

Document templates thoroughly:

def build_documented_template(
    username: str,
    model_name: str,
    **kwargs
) -> Chute:
    """
    Production-ready template for ML model deployment

    This template provides a robust foundation for deploying machine learning
    models with monitoring, caching, and auto-scaling capabilities.

    Args:
        username (str): Your Chutes username
        model_name (str): HuggingFace model identifier (e.g., 'bert-base-uncased')

    Keyword Args:
        task_type (str): Type of ML task ('classification', 'ner', 'generation')
            Default: 'classification'
        gpu_count (int): Number of GPUs required (0-8)
            Default: 1
        min_vram_gb (int): Minimum VRAM per GPU in GB (4-80)
            Default: 8
        enable_monitoring (bool): Enable Prometheus metrics
            Default: True
        enable_caching (bool): Enable Redis caching
            Default: True
        auto_scale (bool): Enable auto-scaling
            Default: False

    Returns:
        Chute: Configured chute instance ready for deployment

    Example:
        >>> chute = build_documented_template(
        ...     username="myuser",
        ...     model_name="bert-base-uncased",
        ...     task_type="classification",
        ...     enable_monitoring=True,
        ...     auto_scale=True
        ... )
        >>> result = chute.deploy()

    Raises:
        ValueError: If invalid parameters are provided

    Note:
        This template automatically configures hardware requirements based on
        the model size. For 70B+ models, consider using multiple GPUs.
    """

    # Template implementation...
    pass

Creating Template Packages

Organizing Templates

Structure templates as reusable packages:

my_chutes_templates/
├── __init__.py
├── text/
│   ├── __init__.py
│   ├── classification.py
│   ├── generation.py
│   └── embedding.py
├── vision/
│   ├── __init__.py
│   ├── classification.py
│   ├── detection.py
│   └── segmentation.py
├── audio/
│   ├── __init__.py
│   ├── transcription.py
│   └── generation.py
└── templates/
    ├── text_classification/
    │   ├── app.py
    │   └── requirements.txt
    ├── image_classification/
    │   ├── app.py
    │   └── requirements.txt
    └── audio_transcription/
        ├── app.py
        └── requirements.txt

Package Implementation

# my_chutes_templates/__init__.py
from .text.classification import build_text_classification_template
from .text.generation import build_text_generation_template
from .vision.classification import build_image_classification_template

__all__ = [
    "build_text_classification_template",
    "build_text_generation_template",
    "build_image_classification_template"
]

__version__ = "1.0.0"

# my_chutes_templates/text/classification.py
from ..base import BaseTemplate

class TextClassificationTemplate(BaseTemplate):
    """Template for text classification models"""

    def __init__(self):
        super().__init__(
            template_name="text_classification",
            required_params=["model_name", "num_labels"],
            default_packages=[
                "torch==2.1.0",
                "transformers==4.35.0",
                "scikit-learn==1.3.0"
            ]
        )

    def build(self, username: str, **kwargs) -> Chute:
        return self._build_template(username, **kwargs)

def build_text_classification_template(username: str, **kwargs) -> Chute:
    """Convenience function for building text classification template"""
    template = TextClassificationTemplate()
    return template.build(username, **kwargs)

Template Testing

Unit Tests for Templates

import unittest
from unittest.mock import patch, MagicMock
from my_chutes_templates import build_text_classification_template

class TestTextClassificationTemplate(unittest.TestCase):

    def test_template_creation(self):
        """Test basic template creation"""
        chute = build_text_classification_template(
            username="testuser",
            model_name="bert-base-uncased",
            num_labels=3
        )

        self.assertEqual(chute.username, "testuser")
        self.assertIn("bert-base-uncased", chute.name)
        self.assertEqual(chute.environment["NUM_LABELS"], "3")

    def test_invalid_parameters(self):
        """Test validation of invalid parameters"""
        with self.assertRaises(ValueError):
            build_text_classification_template(
                username="testuser",
                model_name="invalid-model",  # Invalid format
                num_labels=3
            )

    @patch('chutes.chute.Chute.deploy')
    def test_template_deployment(self, mock_deploy):
        """Test template deployment"""
        mock_deploy.return_value = {"status": "success"}

        chute = build_text_classification_template(
            username="testuser",
            model_name="bert-base-uncased",
            num_labels=3
        )

        result = chute.deploy()
        self.assertEqual(result["status"], "success")
        mock_deploy.assert_called_once()

if __name__ == "__main__":
    unittest.main()

Next Steps

Best Practices - General deployment best practices
Templates Guide - Using existing templates
Performance Optimization - Optimize your custom templates

For advanced template development, see the Template Development Guide.