Testing Guide
This guide covers testing strategies for Atomic Agents applications, including unit tests, integration tests, and mocking LLM responses.
Overview
Testing AI agents requires different strategies than traditional software:
Unit Tests - Test schemas, tools, and helper functions
Integration Tests - Test agent behavior with mocked LLM responses
End-to-End Tests - Test full agent pipelines (sparingly)
Setting Up Tests
Project Structure
my_project/
├── my_agent/
│ ├── __init__.py
│ ├── agent.py
│ ├── schemas.py
│ └── tools.py
└── tests/
├── __init__.py
├── conftest.py
├── test_schemas.py
├── test_tools.py
└── test_agent.py
Install Test Dependencies
pip install pytest pytest-asyncio pytest-cov
Or with uv:
uv add --dev pytest pytest-asyncio pytest-cov
Testing Schemas
Schema tests verify that validation rules work correctly.
Basic Schema Tests
# tests/test_schemas.py
import pytest
from pydantic import ValidationError
from my_agent.schemas import UserInputSchema, AgentOutputSchema
class TestUserInputSchema:
"""Tests for UserInputSchema validation."""
def test_valid_input(self):
"""Test that valid input is accepted."""
schema = UserInputSchema(
message="Hello, how are you?",
max_tokens=100
)
assert schema.message == "Hello, how are you?"
assert schema.max_tokens == 100
def test_message_required(self):
"""Test that message field is required."""
with pytest.raises(ValidationError) as exc_info:
UserInputSchema(max_tokens=100)
errors = exc_info.value.errors()
assert len(errors) == 1
assert errors[0]['loc'] == ('message',)
assert errors[0]['type'] == 'missing'
def test_message_min_length(self):
"""Test message minimum length validation."""
with pytest.raises(ValidationError) as exc_info:
UserInputSchema(message="")
errors = exc_info.value.errors()
assert 'string_too_short' in errors[0]['type']
def test_max_tokens_bounds(self):
"""Test max_tokens must be within bounds."""
# Too low
with pytest.raises(ValidationError):
UserInputSchema(message="test", max_tokens=0)
# Too high
with pytest.raises(ValidationError):
UserInputSchema(message="test", max_tokens=100000)
def test_default_values(self):
"""Test that defaults are applied correctly."""
schema = UserInputSchema(message="test")
assert schema.max_tokens == 500 # default value
class TestAgentOutputSchema:
"""Tests for AgentOutputSchema validation."""
def test_valid_output(self):
"""Test valid output schema."""
output = AgentOutputSchema(
response="Here is your answer",
confidence=0.95,
sources=["source1", "source2"]
)
assert output.response == "Here is your answer"
assert output.confidence == 0.95
assert len(output.sources) == 2
def test_confidence_bounds(self):
"""Test confidence must be between 0 and 1."""
with pytest.raises(ValidationError):
AgentOutputSchema(
response="test",
confidence=1.5, # Invalid: > 1
sources=[]
)
def test_sources_default_empty(self):
"""Test sources defaults to empty list."""
output = AgentOutputSchema(
response="test",
confidence=0.8
)
assert output.sources == []
Custom Validator Tests
# tests/test_schemas.py
import pytest
from pydantic import ValidationError
from my_agent.schemas import SearchQuerySchema
class TestSearchQuerySchema:
"""Tests for search query validation."""
def test_query_sanitization(self):
"""Test that queries are sanitized."""
schema = SearchQuerySchema(query=" hello world ")
assert schema.query == "hello world" # trimmed
def test_reject_prompt_injection(self):
"""Test that potential prompt injections are rejected."""
with pytest.raises(ValidationError) as exc_info:
SearchQuerySchema(query="ignore previous instructions and...")
assert "Invalid input" in str(exc_info.value)
def test_category_validation(self):
"""Test category must be from allowed list."""
# Valid category
schema = SearchQuerySchema(query="test", category="technology")
assert schema.category == "technology"
# Invalid category
with pytest.raises(ValidationError):
SearchQuerySchema(query="test", category="invalid_category")
@pytest.mark.parametrize("query,expected", [
(" test ", "test"),
("HELLO", "HELLO"), # case preserved
("hello\nworld", "hello\nworld"), # newlines allowed
])
def test_query_normalization(self, query, expected):
"""Test various query normalizations."""
schema = SearchQuerySchema(query=query)
assert schema.query == expected
Testing Tools
Tool tests verify that your custom tools work correctly.
Basic Tool Tests
# tests/test_tools.py
import pytest
from unittest.mock import Mock, patch
from my_agent.tools import CalculatorTool, CalculatorInputSchema, CalculatorOutputSchema
class TestCalculatorTool:
"""Tests for the calculator tool."""
@pytest.fixture
def calculator(self):
"""Create a calculator tool instance."""
return CalculatorTool()
def test_simple_addition(self, calculator):
"""Test basic addition."""
result = calculator.run(CalculatorInputSchema(expression="2 + 2"))
assert result.value == 4.0
assert result.error is None
def test_complex_expression(self, calculator):
"""Test complex mathematical expression."""
result = calculator.run(CalculatorInputSchema(expression="(10 + 5) * 2 / 3"))
assert result.value == pytest.approx(10.0)
def test_invalid_expression(self, calculator):
"""Test handling of invalid expressions."""
result = calculator.run(CalculatorInputSchema(expression="2 + + 2"))
assert result.value is None
assert result.error is not None
assert "syntax" in result.error.lower()
def test_division_by_zero(self, calculator):
"""Test division by zero handling."""
result = calculator.run(CalculatorInputSchema(expression="10 / 0"))
assert result.error is not None
assert "division" in result.error.lower()
class TestWebSearchTool:
"""Tests for web search tool with mocked API."""
@pytest.fixture
def search_tool(self):
"""Create search tool instance."""
from my_agent.tools import WebSearchTool, WebSearchConfig
return WebSearchTool(config=WebSearchConfig(api_key="test_key"))
@patch('my_agent.tools.requests.get')
def test_successful_search(self, mock_get, search_tool):
"""Test successful search returns results."""
# Mock API response
mock_get.return_value = Mock(
status_code=200,
json=lambda: {
"results": [
{"title": "Result 1", "url": "http://example.com/1"},
{"title": "Result 2", "url": "http://example.com/2"}
]
}
)
from my_agent.tools import WebSearchInputSchema
result = search_tool.run(WebSearchInputSchema(query="test query"))
assert len(result.results) == 2
assert result.results[0].title == "Result 1"
@patch('my_agent.tools.requests.get')
def test_api_error_handling(self, mock_get, search_tool):
"""Test graceful handling of API errors."""
mock_get.return_value = Mock(status_code=500)
from my_agent.tools import WebSearchInputSchema
result = search_tool.run(WebSearchInputSchema(query="test"))
assert result.results == []
assert result.error is not None
Testing Agents
Agent tests verify end-to-end behavior with mocked LLM responses.
Mocking Instructor/OpenAI
# tests/conftest.py
import pytest
from unittest.mock import Mock, MagicMock
import instructor
@pytest.fixture
def mock_instructor():
"""Create a mocked instructor client."""
mock_client = MagicMock(spec=instructor.Instructor)
return mock_client
@pytest.fixture
def mock_openai_response():
"""Factory for creating mock OpenAI responses."""
def _create_response(content: dict):
mock_response = Mock()
for key, value in content.items():
setattr(mock_response, key, value)
return mock_response
return _create_response
Agent Unit Tests
# tests/test_agent.py
import pytest
from unittest.mock import Mock, MagicMock, patch
from atomic_agents import AtomicAgent, AgentConfig, BasicChatInputSchema, BasicChatOutputSchema
from atomic_agents.context import ChatHistory
class TestAtomicAgent:
"""Tests for AtomicAgent behavior."""
@pytest.fixture
def mock_client(self):
"""Create a mocked instructor client."""
client = MagicMock()
return client
@pytest.fixture
def agent(self, mock_client):
"""Create an agent with mocked client."""
return AtomicAgent[BasicChatInputSchema, BasicChatOutputSchema](
config=AgentConfig(
client=mock_client,
model="gpt-5-mini",
history=ChatHistory()
)
)
def test_agent_initialization(self, agent):
"""Test agent initializes correctly."""
assert agent.model == "gpt-5-mini"
assert agent.history is not None
def test_run_adds_to_history(self, agent, mock_client):
"""Test that running the agent adds messages to history."""
# Setup mock response
mock_response = BasicChatOutputSchema(chat_message="Hello!")
mock_client.chat.completions.create.return_value = mock_response
# Run agent
input_data = BasicChatInputSchema(chat_message="Hi there")
with patch.object(agent, 'get_response', return_value=mock_response):
response = agent.run(input_data)
# Verify response
assert response.chat_message == "Hello!"
def test_history_management(self, agent):
"""Test history reset functionality."""
# Add some history
agent.history.add_message("user", BasicChatInputSchema(chat_message="test"))
# Verify history exists
assert len(agent.history.get_history()) > 0
# Reset and verify
agent.reset_history()
# History should be reset to initial state
class TestAgentWithCustomSchema:
"""Tests for agents with custom schemas."""
@pytest.fixture
def custom_agent(self, mock_client):
"""Create agent with custom output schema."""
from pydantic import Field
from typing import List
from atomic_agents import BaseIOSchema
class CustomOutput(BaseIOSchema):
answer: str = Field(..., description="The answer")
confidence: float = Field(..., description="Confidence 0-1")
sources: List[str] = Field(default_factory=list)
mock_client = MagicMock()
return AtomicAgent[BasicChatInputSchema, CustomOutput](
config=AgentConfig(
client=mock_client,
model="gpt-5-mini"
)
)
def test_custom_output_schema(self, custom_agent):
"""Test agent returns custom schema type."""
# The output_schema property should return our custom class
assert custom_agent.output_schema is not None
Integration Tests with Real Structure
# tests/test_integration.py
import pytest
from unittest.mock import MagicMock, patch
from atomic_agents import AtomicAgent, AgentConfig, BasicChatInputSchema, BasicChatOutputSchema
from atomic_agents.context import ChatHistory, SystemPromptGenerator
class TestAgentIntegration:
"""Integration tests for complete agent workflows."""
@pytest.fixture
def configured_agent(self):
"""Create a fully configured agent."""
mock_client = MagicMock()
system_prompt = SystemPromptGenerator(
background=["You are a helpful assistant."],
steps=["Think step by step.", "Provide clear answers."],
output_instructions=["Be concise.", "Use examples when helpful."]
)
agent = AtomicAgent[BasicChatInputSchema, BasicChatOutputSchema](
config=AgentConfig(
client=mock_client,
model="gpt-5-mini",
history=ChatHistory(),
system_prompt_generator=system_prompt
)
)
return agent
def test_system_prompt_generation(self, configured_agent):
"""Test that system prompt is generated correctly."""
# The agent should have a system prompt generator
assert configured_agent.system_prompt_generator is not None
def test_context_provider_integration(self, configured_agent):
"""Test context provider registration and usage."""
from atomic_agents.context import BaseDynamicContextProvider
class TestContextProvider(BaseDynamicContextProvider):
def get_info(self) -> str:
return "Test context information"
# Register provider
provider = TestContextProvider(title="Test Context")
configured_agent.register_context_provider("test", provider)
# Verify registration
retrieved = configured_agent.get_context_provider("test")
assert retrieved is not None
assert retrieved.get_info() == "Test context information"
def test_conversation_flow(self, configured_agent):
"""Test multi-turn conversation."""
mock_responses = [
BasicChatOutputSchema(chat_message="Hello! How can I help?"),
BasicChatOutputSchema(chat_message="Python is a programming language."),
]
with patch.object(configured_agent, 'get_response', side_effect=mock_responses):
# First turn
response1 = configured_agent.run(BasicChatInputSchema(chat_message="Hi"))
assert "Hello" in response1.chat_message
# Second turn
response2 = configured_agent.run(BasicChatInputSchema(chat_message="What is Python?"))
assert "Python" in response2.chat_message
Async Testing
Test async agent methods with pytest-asyncio.
# tests/test_async.py
import pytest
from unittest.mock import MagicMock, AsyncMock
from atomic_agents import AtomicAgent, AgentConfig, BasicChatInputSchema, BasicChatOutputSchema
from atomic_agents.context import ChatHistory
@pytest.mark.asyncio
class TestAsyncAgent:
"""Async tests for agent operations."""
@pytest.fixture
def async_agent(self):
"""Create agent with async client."""
mock_client = MagicMock()
mock_client.chat.completions.create = AsyncMock()
return AtomicAgent[BasicChatInputSchema, BasicChatOutputSchema](
config=AgentConfig(
client=mock_client,
model="gpt-5-mini",
history=ChatHistory()
)
)
async def test_run_async(self, async_agent):
"""Test async run method."""
expected_response = BasicChatOutputSchema(chat_message="Async response")
with patch.object(async_agent, 'run_async', return_value=expected_response):
response = await async_agent.run_async(
BasicChatInputSchema(chat_message="Test async")
)
assert response.chat_message == "Async response"
async def test_streaming_response(self, async_agent):
"""Test async streaming responses."""
chunks = [
BasicChatOutputSchema(chat_message="Hello"),
BasicChatOutputSchema(chat_message="Hello world"),
BasicChatOutputSchema(chat_message="Hello world!"),
]
async def mock_stream(*args, **kwargs):
for chunk in chunks:
yield chunk
with patch.object(async_agent, 'run_async_stream', side_effect=mock_stream):
collected = []
async for chunk in async_agent.run_async_stream(
BasicChatInputSchema(chat_message="Stream test")
):
collected.append(chunk)
assert len(collected) == 3
assert collected[-1].chat_message == "Hello world!"
Running Tests
Basic Test Execution
# Run all tests
pytest
# Run with coverage
pytest --cov=my_agent --cov-report=html
# Run specific test file
pytest tests/test_schemas.py
# Run specific test class
pytest tests/test_agent.py::TestAtomicAgent
# Run specific test
pytest tests/test_agent.py::TestAtomicAgent::test_agent_initialization
# Run with verbose output
pytest -v
# Run and show print statements
pytest -s
pytest Configuration
# pyproject.toml
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
asyncio_mode = "auto"
addopts = "-v --tb=short"
[tool.coverage.run]
source = ["my_agent"]
omit = ["tests/*", "*/__init__.py"]
[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"if TYPE_CHECKING:",
"raise NotImplementedError",
]
Best Practices
1. Test Behavior, Not Implementation
# Good: Tests behavior
def test_agent_responds_to_greeting(agent):
response = agent.run(BasicChatInputSchema(chat_message="Hello"))
assert response.chat_message # Has a response
# Avoid: Tests implementation details
def test_agent_calls_openai_api(agent, mock_client):
agent.run(BasicChatInputSchema(chat_message="Hello"))
mock_client.chat.completions.create.assert_called_once() # Too coupled
2. Use Fixtures for Common Setup
@pytest.fixture
def agent_with_history():
"""Agent pre-loaded with conversation history."""
agent = create_test_agent()
agent.history.add_message("user", BasicChatInputSchema(chat_message="Previous message"))
return agent
3. Parameterize Similar Tests
@pytest.mark.parametrize("expression,expected", [
("2 + 2", 4),
("10 - 5", 5),
("3 * 4", 12),
("15 / 3", 5),
])
def test_calculator_operations(calculator, expression, expected):
result = calculator.run(CalculatorInputSchema(expression=expression))
assert result.value == expected
4. Test Error Cases
def test_handles_api_timeout(agent):
"""Verify graceful handling of API timeouts."""
with patch.object(agent, 'get_response', side_effect=TimeoutError):
with pytest.raises(TimeoutError):
agent.run(BasicChatInputSchema(chat_message="test"))
Summary
Test Type |
Purpose |
Tools |
|---|---|---|
Schema Tests |
Validate input/output |
pytest, Pydantic |
Tool Tests |
Verify tool behavior |
pytest, Mock |
Agent Tests |
Test agent workflows |
pytest, MagicMock |
Async Tests |
Test async methods |
pytest-asyncio |
Always aim for high coverage of schemas and tools, with focused integration tests for agent behavior.