Error Handling Guide
This guide covers best practices for handling errors in Atomic Agents applications, including validation errors, API failures, and custom error handling patterns.
Overview
Atomic Agents provides multiple layers of error handling:
Schema Validation - Pydantic validates input/output at runtime
API Error Handling - Handle LLM provider errors gracefully
Hook System - Monitor and respond to errors via hooks
Custom Exception Handling - Build robust error recovery patterns
Schema Validation Errors
Pydantic schemas catch invalid data before it reaches the LLM.
Basic Validation
import os
from typing import List
from pydantic import Field, field_validator
import instructor
import openai
from atomic_agents import AtomicAgent, AgentConfig, BaseIOSchema
from atomic_agents.context import ChatHistory
class ValidatedInputSchema(BaseIOSchema):
"""Input schema with validation rules."""
query: str = Field(..., description="User query", min_length=1, max_length=1000)
max_results: int = Field(default=10, ge=1, le=100, description="Maximum results to return")
@field_validator('query')
@classmethod
def query_not_empty(cls, v: str) -> str:
if not v.strip():
raise ValueError("Query cannot be empty or whitespace only")
return v.strip()
class ValidatedOutputSchema(BaseIOSchema):
"""Output schema with validation."""
answer: str = Field(..., description="The response")
confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score 0-1")
sources: List[str] = Field(default_factory=list, description="Source references")
# Initialize client and agent
client = instructor.from_openai(openai.OpenAI())
agent = AtomicAgent[ValidatedInputSchema, ValidatedOutputSchema](
config=AgentConfig(
client=client,
model="gpt-5-mini",
history=ChatHistory()
)
)
# Handle validation errors
try:
response = agent.run(ValidatedInputSchema(query="", max_results=5))
except ValueError as e:
print(f"Validation error: {e}")
Custom Validators
from pydantic import Field, field_validator, model_validator
from typing import Optional
from atomic_agents import BaseIOSchema
class SearchInputSchema(BaseIOSchema):
"""Search input with complex validation."""
query: str = Field(..., description="Search query")
category: Optional[str] = Field(None, description="Category filter")
date_from: Optional[str] = Field(None, description="Start date YYYY-MM-DD")
date_to: Optional[str] = Field(None, description="End date YYYY-MM-DD")
@field_validator('category')
@classmethod
def validate_category(cls, v: Optional[str]) -> Optional[str]:
valid_categories = ['technology', 'science', 'business', 'health']
if v is not None and v.lower() not in valid_categories:
raise ValueError(f"Category must be one of: {valid_categories}")
return v.lower() if v else None
@model_validator(mode='after')
def validate_dates(self):
if self.date_from and self.date_to:
if self.date_from > self.date_to:
raise ValueError("date_from must be before date_to")
return self
API Error Handling
Handle LLM provider errors gracefully with retry logic.
Basic Retry Pattern
import os
import time
from typing import Optional
import instructor
import openai
from openai import APIError, RateLimitError, APIConnectionError
from atomic_agents import AtomicAgent, AgentConfig, BasicChatInputSchema, BasicChatOutputSchema
from atomic_agents.context import ChatHistory
def create_agent_with_retry(
max_retries: int = 3,
retry_delay: float = 1.0
) -> AtomicAgent:
"""Create an agent with retry configuration."""
client = instructor.from_openai(openai.OpenAI())
return AtomicAgent[BasicChatInputSchema, BasicChatOutputSchema](
config=AgentConfig(
client=client,
model="gpt-5-mini",
history=ChatHistory(),
model_api_parameters={
"max_tokens": 1000,
"temperature": 0.7
}
)
)
def run_with_retry(
agent: AtomicAgent,
input_data: BasicChatInputSchema,
max_retries: int = 3,
retry_delay: float = 1.0
) -> Optional[BasicChatOutputSchema]:
"""Run agent with automatic retry on transient failures."""
last_error = None
for attempt in range(max_retries):
try:
return agent.run(input_data)
except RateLimitError as e:
last_error = e
wait_time = retry_delay * (2 ** attempt) # Exponential backoff
print(f"Rate limited. Waiting {wait_time}s before retry {attempt + 1}/{max_retries}")
time.sleep(wait_time)
except APIConnectionError as e:
last_error = e
print(f"Connection error. Retry {attempt + 1}/{max_retries}")
time.sleep(retry_delay)
except APIError as e:
last_error = e
if e.status_code and e.status_code >= 500:
print(f"Server error. Retry {attempt + 1}/{max_retries}")
time.sleep(retry_delay)
else:
raise # Don't retry client errors (4xx)
print(f"All retries failed. Last error: {last_error}")
return None
# Usage
agent = create_agent_with_retry()
user_input = BasicChatInputSchema(chat_message="Explain quantum computing")
response = run_with_retry(agent, user_input)
if response:
print(f"Response: {response.chat_message}")
else:
print("Failed to get response after retries")
Using the Hook System for Error Handling
The Atomic Agents hook system provides powerful error monitoring capabilities.
Error Logging Hook
import os
import logging
from datetime import datetime
from typing import Any, Optional
import instructor
import openai
from atomic_agents import AtomicAgent, AgentConfig, BasicChatInputSchema, BasicChatOutputSchema
from atomic_agents.context import ChatHistory
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def on_error_hook(error: Exception, context: dict) -> None:
"""Hook called when an error occurs during agent execution."""
logger.error(f"Agent error: {type(error).__name__}: {error}")
logger.error(f"Context: {context}")
def on_completion_hook(response: Any, duration_ms: float) -> None:
"""Hook called on successful completion."""
logger.info(f"Agent completed in {duration_ms:.2f}ms")
# Create agent with hooks using Instructor's hook system
client = instructor.from_openai(openai.OpenAI())
# Register hooks with the instructor client
client.on("completion", lambda *args: on_completion_hook(*args))
agent = AtomicAgent[BasicChatInputSchema, BasicChatOutputSchema](
config=AgentConfig(
client=client,
model="gpt-5-mini",
history=ChatHistory()
)
)
Comprehensive Error Handler
import os
from typing import Callable, Optional, TypeVar
from functools import wraps
import instructor
import openai
from pydantic import ValidationError
from atomic_agents import AtomicAgent, AgentConfig, BaseIOSchema
T = TypeVar('T', bound=BaseIOSchema)
class AgentErrorHandler:
"""Centralized error handler for Atomic Agents."""
def __init__(
self,
on_validation_error: Optional[Callable[[ValidationError], None]] = None,
on_api_error: Optional[Callable[[Exception], None]] = None,
on_unknown_error: Optional[Callable[[Exception], None]] = None
):
self.on_validation_error = on_validation_error or self._default_validation_handler
self.on_api_error = on_api_error or self._default_api_handler
self.on_unknown_error = on_unknown_error or self._default_unknown_handler
def _default_validation_handler(self, error: ValidationError) -> None:
print(f"Validation failed: {error.error_count()} errors")
for err in error.errors():
print(f" - {err['loc']}: {err['msg']}")
def _default_api_handler(self, error: Exception) -> None:
print(f"API error: {type(error).__name__}: {error}")
def _default_unknown_handler(self, error: Exception) -> None:
print(f"Unknown error: {type(error).__name__}: {error}")
def wrap(self, func: Callable) -> Callable:
"""Decorator to wrap agent calls with error handling."""
@wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except ValidationError as e:
self.on_validation_error(e)
return None
except (openai.APIError, openai.APIConnectionError) as e:
self.on_api_error(e)
return None
except Exception as e:
self.on_unknown_error(e)
return None
return wrapper
# Usage
error_handler = AgentErrorHandler()
@error_handler.wrap
def ask_agent(agent: AtomicAgent, question: str):
from atomic_agents import BasicChatInputSchema
return agent.run(BasicChatInputSchema(chat_message=question))
# Create and use agent
client = instructor.from_openai(openai.OpenAI())
agent = AtomicAgent(
config=AgentConfig(
client=client,
model="gpt-5-mini"
)
)
response = ask_agent(agent, "What is machine learning?")
Graceful Degradation
Implement fallback behavior when the primary agent fails.
Fallback Agent Pattern
import os
from typing import Optional, List
import instructor
import openai
from atomic_agents import AtomicAgent, AgentConfig, BasicChatInputSchema, BasicChatOutputSchema
from atomic_agents.context import ChatHistory
class FallbackAgentChain:
"""Chain of agents with automatic fallback on failure."""
def __init__(self, agents: List[AtomicAgent]):
self.agents = agents
def run(self, input_data: BasicChatInputSchema) -> Optional[BasicChatOutputSchema]:
"""Try each agent in order until one succeeds."""
last_error = None
for i, agent in enumerate(self.agents):
try:
print(f"Trying agent {i + 1}/{len(self.agents)}")
return agent.run(input_data)
except Exception as e:
last_error = e
print(f"Agent {i + 1} failed: {e}")
continue
print(f"All agents failed. Last error: {last_error}")
return None
# Create primary and fallback agents with different models/providers
def create_fallback_chain() -> FallbackAgentChain:
# Primary: GPT-4
primary_client = instructor.from_openai(openai.OpenAI())
primary_agent = AtomicAgent[BasicChatInputSchema, BasicChatOutputSchema](
config=AgentConfig(
client=primary_client,
model="gpt-4o",
history=ChatHistory()
)
)
# Fallback: GPT-4o-mini (cheaper, faster)
fallback_client = instructor.from_openai(openai.OpenAI())
fallback_agent = AtomicAgent[BasicChatInputSchema, BasicChatOutputSchema](
config=AgentConfig(
client=fallback_client,
model="gpt-5-mini",
history=ChatHistory()
)
)
return FallbackAgentChain([primary_agent, fallback_agent])
# Usage
chain = create_fallback_chain()
response = chain.run(BasicChatInputSchema(chat_message="Explain quantum computing"))
if response:
print(response.chat_message)
Best Practices
1. Always Validate Input
from pydantic import Field, field_validator
from atomic_agents import BaseIOSchema
class SafeInputSchema(BaseIOSchema):
"""Input schema with comprehensive validation."""
message: str = Field(..., min_length=1, max_length=10000)
@field_validator('message')
@classmethod
def sanitize_message(cls, v: str) -> str:
# Remove potential prompt injection attempts
dangerous_patterns = ['ignore previous', 'disregard instructions']
for pattern in dangerous_patterns:
if pattern.lower() in v.lower():
raise ValueError("Invalid input detected")
return v.strip()
2. Log All Errors
import logging
from functools import wraps
logger = logging.getLogger(__name__)
def log_errors(func):
"""Decorator to log all errors from agent operations."""
@wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
logger.exception(f"Error in {func.__name__}: {e}")
raise
return wrapper
3. Set Timeouts
import os
import instructor
import openai
from atomic_agents import AtomicAgent, AgentConfig
from atomic_agents.context import ChatHistory
# Configure timeout at client level
client = instructor.from_openai(
openai.OpenAI(timeout=30.0) # 30 second timeout
)
agent = AtomicAgent(
config=AgentConfig(
client=client,
model="gpt-5-mini",
history=ChatHistory(),
model_api_parameters={
"max_tokens": 500 # Limit response length
}
)
)
4. Implement Circuit Breaker
import time
from typing import Optional, Callable
from dataclasses import dataclass
@dataclass
class CircuitBreaker:
"""Simple circuit breaker for agent calls."""
failure_threshold: int = 5
reset_timeout: float = 60.0
_failure_count: int = 0
_last_failure_time: float = 0
_state: str = "closed" # closed, open, half-open
def call(self, func: Callable, *args, **kwargs):
"""Execute function with circuit breaker protection."""
if self._state == "open":
if time.time() - self._last_failure_time > self.reset_timeout:
self._state = "half-open"
else:
raise Exception("Circuit breaker is open")
try:
result = func(*args, **kwargs)
self._on_success()
return result
except Exception as e:
self._on_failure()
raise
def _on_success(self):
self._failure_count = 0
self._state = "closed"
def _on_failure(self):
self._failure_count += 1
self._last_failure_time = time.time()
if self._failure_count >= self.failure_threshold:
self._state = "open"
# Usage
circuit_breaker = CircuitBreaker(failure_threshold=3, reset_timeout=30.0)
def safe_agent_call(agent, input_data):
return circuit_breaker.call(agent.run, input_data)
Summary
Key error handling strategies in Atomic Agents:
Strategy |
Use Case |
Implementation |
|---|---|---|
Schema Validation |
Prevent invalid inputs |
Pydantic validators |
Retry Logic |
Transient failures |
Exponential backoff |
Hook System |
Monitoring & logging |
Instructor hooks |
Fallback Chain |
High availability |
Multiple agents |
Circuit Breaker |
Prevent cascade failures |
State machine |
Always combine multiple strategies for robust production applications.