This guide demonstrates how to call and run chutes in your applications using various programming languages. We'll cover examples for Python, TypeScript, Go, and Rust.
Overview
Chutes can be invoked via simple HTTP POST requests to the endpoint:
POST https://{username}-{chute-name}.chutes.ai/{path}
Or using the API endpoint:
POST https://api.chutes.ai/chutes/{chute-id}/{path}
Authentication
All requests require authentication using either:
API Key in the X-API-Key header
Bearer token in the Authorization header
Python Example (using aiohttp)
Basic LLM Invocation
import aiohttp
import asyncio
import json
asyncdefcall_llm_chute():
url = "https://myuser-my-llm.chutes.ai/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"X-API-Key": "your-api-key-here"
}
payload = {
"model": "meta-llama/Llama-3.1-8B-Instruct",
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Hello! How are you?"}
],
"max_tokens": 100,
"temperature": 0.7
}
asyncwith aiohttp.ClientSession() as session:
asyncwith session.post(url, headers=headers, json=payload) as response:
result = await response.json()
print(result["choices"][0]["message"]["content"])
# Run the async function
asyncio.run(call_llm_chute())
Streaming Response
import aiohttp
import asyncio
import json
asyncdefstream_llm_response():
url = "https://myuser-my-llm.chutes.ai/v1/chat/completions"
headers = {
"Content-Type": "application/json",
"X-API-Key": "your-api-key-here"
}
payload = {
"model": "meta-llama/Llama-3.1-8B-Instruct",
"messages": [
{"role": "user", "content": "Write a short story about AI"}
],
"stream": True,
"max_tokens": 500
}
asyncwith aiohttp.ClientSession() as session:
asyncwith session.post(url, headers=headers, json=payload) as response:
asyncfor line in response.content:
if line:
line_str = line.decode('utf-8').strip()
if line_str.startswith("data: "):
data = line_str[6:]
if data != "[DONE]":
try:
chunk = json.loads(data)
content = chunk["choices"][0]["delta"].get("content", "")
print(content, end="", flush=True)
except json.JSONDecodeError:
pass
asyncio.run(stream_llm_response())
Image Generation
import aiohttp
import asyncio
import base64
asyncdefgenerate_image():
url = "https://myuser-my-diffusion.chutes.ai/v1/images/generations"
headers = {
"Content-Type": "application/json",
"X-API-Key": "your-api-key-here"
}
payload = {
"prompt": "A beautiful sunset over mountains, oil painting style",
"n": 1,
"size": "1024x1024",
"response_format": "b64_json"
}
asyncwith aiohttp.ClientSession() as session:
asyncwith session.post(url, headers=headers, json=payload) as response:
result = await response.json()
# Save the image
image_data = base64.b64decode(result["data"][0]["b64_json"])
withopen("generated_image.png", "wb") as f:
f.write(image_data)
print("Image saved as generated_image.png")
asyncio.run(generate_image())
TypeScript Example
Tip: For TypeScript projects, consider using the Vercel AI SDK Integration for a more streamlined developer experience with built-in streaming, tool calling, and type safety.