# Error Handling Guide This guide covers best practices for handling errors in Atomic Agents applications, including validation errors, API failures, and custom error handling patterns. ## Overview Atomic Agents provides multiple layers of error handling: 1. **Schema Validation** - Pydantic validates input/output at runtime 2. **API Error Handling** - Handle LLM provider errors gracefully 3. **Hook System** - Monitor and respond to errors via hooks 4. **Custom Exception Handling** - Build robust error recovery patterns ## Schema Validation Errors Pydantic schemas catch invalid data before it reaches the LLM. ### Basic Validation ```python import os from typing import List from pydantic import Field, field_validator import instructor import openai from atomic_agents import AtomicAgent, AgentConfig, BaseIOSchema from atomic_agents.context import ChatHistory class ValidatedInputSchema(BaseIOSchema): """Input schema with validation rules.""" query: str = Field(..., description="User query", min_length=1, max_length=1000) max_results: int = Field(default=10, ge=1, le=100, description="Maximum results to return") @field_validator('query') @classmethod def query_not_empty(cls, v: str) -> str: if not v.strip(): raise ValueError("Query cannot be empty or whitespace only") return v.strip() class ValidatedOutputSchema(BaseIOSchema): """Output schema with validation.""" answer: str = Field(..., description="The response") confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score 0-1") sources: List[str] = Field(default_factory=list, description="Source references") # Initialize client and agent client = instructor.from_openai(openai.OpenAI()) agent = AtomicAgent[ValidatedInputSchema, ValidatedOutputSchema]( config=AgentConfig( client=client, model="gpt-5-mini", history=ChatHistory() ) ) # Handle validation errors try: response = agent.run(ValidatedInputSchema(query="", max_results=5)) except ValueError as e: print(f"Validation error: {e}") ``` ### Custom Validators ```python from pydantic import Field, field_validator, model_validator from typing import Optional from atomic_agents import BaseIOSchema class SearchInputSchema(BaseIOSchema): """Search input with complex validation.""" query: str = Field(..., description="Search query") category: Optional[str] = Field(None, description="Category filter") date_from: Optional[str] = Field(None, description="Start date YYYY-MM-DD") date_to: Optional[str] = Field(None, description="End date YYYY-MM-DD") @field_validator('category') @classmethod def validate_category(cls, v: Optional[str]) -> Optional[str]: valid_categories = ['technology', 'science', 'business', 'health'] if v is not None and v.lower() not in valid_categories: raise ValueError(f"Category must be one of: {valid_categories}") return v.lower() if v else None @model_validator(mode='after') def validate_dates(self): if self.date_from and self.date_to: if self.date_from > self.date_to: raise ValueError("date_from must be before date_to") return self ``` ## API Error Handling Handle LLM provider errors gracefully with retry logic. ### Basic Retry Pattern ```python import os import time from typing import Optional import instructor import openai from openai import APIError, RateLimitError, APIConnectionError from atomic_agents import AtomicAgent, AgentConfig, BasicChatInputSchema, BasicChatOutputSchema from atomic_agents.context import ChatHistory def create_agent_with_retry( max_retries: int = 3, retry_delay: float = 1.0 ) -> AtomicAgent: """Create an agent with retry configuration.""" client = instructor.from_openai(openai.OpenAI()) return AtomicAgent[BasicChatInputSchema, BasicChatOutputSchema]( config=AgentConfig( client=client, model="gpt-5-mini", history=ChatHistory(), model_api_parameters={ "max_tokens": 1000, "temperature": 0.7 } ) ) def run_with_retry( agent: AtomicAgent, input_data: BasicChatInputSchema, max_retries: int = 3, retry_delay: float = 1.0 ) -> Optional[BasicChatOutputSchema]: """Run agent with automatic retry on transient failures.""" last_error = None for attempt in range(max_retries): try: return agent.run(input_data) except RateLimitError as e: last_error = e wait_time = retry_delay * (2 ** attempt) # Exponential backoff print(f"Rate limited. Waiting {wait_time}s before retry {attempt + 1}/{max_retries}") time.sleep(wait_time) except APIConnectionError as e: last_error = e print(f"Connection error. Retry {attempt + 1}/{max_retries}") time.sleep(retry_delay) except APIError as e: last_error = e if e.status_code and e.status_code >= 500: print(f"Server error. Retry {attempt + 1}/{max_retries}") time.sleep(retry_delay) else: raise # Don't retry client errors (4xx) print(f"All retries failed. Last error: {last_error}") return None # Usage agent = create_agent_with_retry() user_input = BasicChatInputSchema(chat_message="Explain quantum computing") response = run_with_retry(agent, user_input) if response: print(f"Response: {response.chat_message}") else: print("Failed to get response after retries") ``` ## Using the Hook System for Error Handling The Atomic Agents hook system provides powerful error monitoring capabilities. ### Error Logging Hook ```python import os import logging from datetime import datetime from typing import Any, Optional import instructor import openai from atomic_agents import AtomicAgent, AgentConfig, BasicChatInputSchema, BasicChatOutputSchema from atomic_agents.context import ChatHistory # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) def on_error_hook(error: Exception, context: dict) -> None: """Hook called when an error occurs during agent execution.""" logger.error(f"Agent error: {type(error).__name__}: {error}") logger.error(f"Context: {context}") def on_completion_hook(response: Any, duration_ms: float) -> None: """Hook called on successful completion.""" logger.info(f"Agent completed in {duration_ms:.2f}ms") # Create agent with hooks using Instructor's hook system client = instructor.from_openai(openai.OpenAI()) # Register hooks with the instructor client client.on("completion", lambda *args: on_completion_hook(*args)) agent = AtomicAgent[BasicChatInputSchema, BasicChatOutputSchema]( config=AgentConfig( client=client, model="gpt-5-mini", history=ChatHistory() ) ) ``` ### Comprehensive Error Handler ```python import os from typing import Callable, Optional, TypeVar from functools import wraps import instructor import openai from pydantic import ValidationError from atomic_agents import AtomicAgent, AgentConfig, BaseIOSchema T = TypeVar('T', bound=BaseIOSchema) class AgentErrorHandler: """Centralized error handler for Atomic Agents.""" def __init__( self, on_validation_error: Optional[Callable[[ValidationError], None]] = None, on_api_error: Optional[Callable[[Exception], None]] = None, on_unknown_error: Optional[Callable[[Exception], None]] = None ): self.on_validation_error = on_validation_error or self._default_validation_handler self.on_api_error = on_api_error or self._default_api_handler self.on_unknown_error = on_unknown_error or self._default_unknown_handler def _default_validation_handler(self, error: ValidationError) -> None: print(f"Validation failed: {error.error_count()} errors") for err in error.errors(): print(f" - {err['loc']}: {err['msg']}") def _default_api_handler(self, error: Exception) -> None: print(f"API error: {type(error).__name__}: {error}") def _default_unknown_handler(self, error: Exception) -> None: print(f"Unknown error: {type(error).__name__}: {error}") def wrap(self, func: Callable) -> Callable: """Decorator to wrap agent calls with error handling.""" @wraps(func) def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except ValidationError as e: self.on_validation_error(e) return None except (openai.APIError, openai.APIConnectionError) as e: self.on_api_error(e) return None except Exception as e: self.on_unknown_error(e) return None return wrapper # Usage error_handler = AgentErrorHandler() @error_handler.wrap def ask_agent(agent: AtomicAgent, question: str): from atomic_agents import BasicChatInputSchema return agent.run(BasicChatInputSchema(chat_message=question)) # Create and use agent client = instructor.from_openai(openai.OpenAI()) agent = AtomicAgent( config=AgentConfig( client=client, model="gpt-5-mini" ) ) response = ask_agent(agent, "What is machine learning?") ``` ## Graceful Degradation Implement fallback behavior when the primary agent fails. ### Fallback Agent Pattern ```python import os from typing import Optional, List import instructor import openai from atomic_agents import AtomicAgent, AgentConfig, BasicChatInputSchema, BasicChatOutputSchema from atomic_agents.context import ChatHistory class FallbackAgentChain: """Chain of agents with automatic fallback on failure.""" def __init__(self, agents: List[AtomicAgent]): self.agents = agents def run(self, input_data: BasicChatInputSchema) -> Optional[BasicChatOutputSchema]: """Try each agent in order until one succeeds.""" last_error = None for i, agent in enumerate(self.agents): try: print(f"Trying agent {i + 1}/{len(self.agents)}") return agent.run(input_data) except Exception as e: last_error = e print(f"Agent {i + 1} failed: {e}") continue print(f"All agents failed. Last error: {last_error}") return None # Create primary and fallback agents with different models/providers def create_fallback_chain() -> FallbackAgentChain: # Primary: GPT-4 primary_client = instructor.from_openai(openai.OpenAI()) primary_agent = AtomicAgent[BasicChatInputSchema, BasicChatOutputSchema]( config=AgentConfig( client=primary_client, model="gpt-4o", history=ChatHistory() ) ) # Fallback: GPT-4o-mini (cheaper, faster) fallback_client = instructor.from_openai(openai.OpenAI()) fallback_agent = AtomicAgent[BasicChatInputSchema, BasicChatOutputSchema]( config=AgentConfig( client=fallback_client, model="gpt-5-mini", history=ChatHistory() ) ) return FallbackAgentChain([primary_agent, fallback_agent]) # Usage chain = create_fallback_chain() response = chain.run(BasicChatInputSchema(chat_message="Explain quantum computing")) if response: print(response.chat_message) ``` ## Best Practices ### 1. Always Validate Input ```python from pydantic import Field, field_validator from atomic_agents import BaseIOSchema class SafeInputSchema(BaseIOSchema): """Input schema with comprehensive validation.""" message: str = Field(..., min_length=1, max_length=10000) @field_validator('message') @classmethod def sanitize_message(cls, v: str) -> str: # Remove potential prompt injection attempts dangerous_patterns = ['ignore previous', 'disregard instructions'] for pattern in dangerous_patterns: if pattern.lower() in v.lower(): raise ValueError("Invalid input detected") return v.strip() ``` ### 2. Log All Errors ```python import logging from functools import wraps logger = logging.getLogger(__name__) def log_errors(func): """Decorator to log all errors from agent operations.""" @wraps(func) def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except Exception as e: logger.exception(f"Error in {func.__name__}: {e}") raise return wrapper ``` ### 3. Set Timeouts ```python import os import instructor import openai from atomic_agents import AtomicAgent, AgentConfig from atomic_agents.context import ChatHistory # Configure timeout at client level client = instructor.from_openai( openai.OpenAI(timeout=30.0) # 30 second timeout ) agent = AtomicAgent( config=AgentConfig( client=client, model="gpt-5-mini", history=ChatHistory(), model_api_parameters={ "max_tokens": 500 # Limit response length } ) ) ``` ### 4. Implement Circuit Breaker ```python import time from typing import Optional, Callable from dataclasses import dataclass @dataclass class CircuitBreaker: """Simple circuit breaker for agent calls.""" failure_threshold: int = 5 reset_timeout: float = 60.0 _failure_count: int = 0 _last_failure_time: float = 0 _state: str = "closed" # closed, open, half-open def call(self, func: Callable, *args, **kwargs): """Execute function with circuit breaker protection.""" if self._state == "open": if time.time() - self._last_failure_time > self.reset_timeout: self._state = "half-open" else: raise Exception("Circuit breaker is open") try: result = func(*args, **kwargs) self._on_success() return result except Exception as e: self._on_failure() raise def _on_success(self): self._failure_count = 0 self._state = "closed" def _on_failure(self): self._failure_count += 1 self._last_failure_time = time.time() if self._failure_count >= self.failure_threshold: self._state = "open" # Usage circuit_breaker = CircuitBreaker(failure_threshold=3, reset_timeout=30.0) def safe_agent_call(agent, input_data): return circuit_breaker.call(agent.run, input_data) ``` ## Summary Key error handling strategies in Atomic Agents: | Strategy | Use Case | Implementation | |----------|----------|----------------| | Schema Validation | Prevent invalid inputs | Pydantic validators | | Retry Logic | Transient failures | Exponential backoff | | Hook System | Monitoring & logging | Instructor hooks | | Fallback Chain | High availability | Multiple agents | | Circuit Breaker | Prevent cascade failures | State machine | Always combine multiple strategies for robust production applications.