pytest Integration¶
The most natural way to use russo -- declarative test scenarios using markers and fixtures.
Source files
How it works¶
The russo pytest plugin (auto-discovered via the pytest11 entry point) provides:
@pytest.mark.russo-- marker to declare test scenariosrusso_result-- fixture that runs the full pipeline and returns anEvalResult- Overridable fixtures --
russo_synthesizer,russo_agent,russo_evaluator - CLI options -- caching, reporting, and more
Step 1: Configure fixtures in conftest.py¶
Define your synthesizer and agent as pytest fixtures:
# conftest.py
import os
import pytest
import russo
from russo.evaluators import ExactEvaluator
@pytest.fixture(scope="session")
def russo_synthesizer():
"""TTS synthesizer for all russo tests."""
api_key = os.environ.get("GOOGLE_API_KEY")
if api_key:
from russo.synthesizers import GoogleSynthesizer
return GoogleSynthesizer(api_key=api_key)
# Fallback for CI / offline use
class FakeSynthesizer:
async def synthesize(self, text: str) -> russo.Audio:
return russo.Audio(data=b"\x00" * 4800, format="wav", sample_rate=24000)
return FakeSynthesizer()
@pytest.fixture(scope="session")
def russo_agent():
"""The agent under test."""
# Replace with your real agent:
# from russo.adapters import GeminiLiveAgent
# return GeminiLiveAgent(client=..., model="...", tools=[...])
@russo.agent
async def fake_agent(audio: russo.Audio) -> russo.AgentResponse:
return russo.AgentResponse(
tool_calls=[
russo.ToolCall(
name="book_flight",
arguments={"from_city": "NYC", "to_city": "LA"},
),
]
)
return fake_agent
@pytest.fixture
def russo_evaluator():
"""Override to use a custom evaluator (defaults to ExactEvaluator)."""
return ExactEvaluator()
Step 2: Write tests using the marker¶
The @pytest.mark.russo marker declares the prompt and expected tool calls. The russo_result fixture runs the full pipeline automatically.
Basic assertion¶
# test_flights.py
import pytest
import russo
@pytest.mark.russo(
prompt="Book a flight from NYC to LA",
expect=[russo.tool_call("book_flight", from_city="NYC", to_city="LA")],
)
async def test_book_flight(russo_result):
"""Verify the agent calls book_flight with the right arguments."""
russo.assert_tool_calls(russo_result)
Match rate check¶
@pytest.mark.russo(
prompt="Book a flight from NYC to LA",
expect=[russo.tool_call("book_flight", from_city="NYC", to_city="LA")],
)
async def test_match_rate(russo_result):
"""Check that every expected tool call was matched."""
assert russo_result.passed
assert russo_result.match_rate == 1.0
Custom failure message¶
@pytest.mark.russo(
prompt="Book a flight from NYC to LA",
expect=[russo.tool_call("book_flight", from_city="NYC", to_city="LA")],
)
async def test_with_custom_message(russo_result):
russo.assert_tool_calls(russo_result, message="Flight booking agent failed")
Detailed inspection¶
@pytest.mark.russo(
prompt="Book a flight from NYC to LA",
expect=[russo.tool_call("book_flight", from_city="NYC", to_city="LA")],
)
async def test_detailed_inspection(russo_result):
"""Access individual match details for richer assertions."""
assert russo_result.passed
for match in russo_result.matches:
assert match.matched, f"Expected {match.expected.name} was not matched"
assert match.actual is not None
assert match.actual.name == match.expected.name
assert len(russo_result.actual) >= 1
assert russo_result.actual[0].name == "book_flight"
Step 3: Run the tests¶
Expected output:
test_flights.py::test_book_flight PASSED
test_flights.py::test_match_rate PASSED
test_flights.py::test_with_custom_message PASSED
test_flights.py::test_detailed_inspection PASSED
Concurrent runs¶
Run the same test multiple times or test multiple prompt variants — see Concurrent Runs for the full guide.
Multiple runs per test¶
@pytest.mark.russo(
prompt="Book a flight from NYC to LA",
expect=[russo.tool_call("book_flight", from_city="NYC", to_city="LA")],
runs=5,
)
async def test_reliability(russo_result):
"""Runs the prompt 5 times concurrently. Returns a BatchResult."""
assert russo_result.pass_rate >= 0.8
Multiple prompts¶
@pytest.mark.russo(
prompts=[
"Book a flight from NYC to LA",
"I need to fly from NYC to LA",
],
expect=[russo.tool_call("book_flight", from_city="NYC", to_city="LA")],
)
async def test_prompt_variants(russo_result):
assert russo_result.total == 2
assert russo_result.passed
Combined: prompts × runs¶
@pytest.mark.russo(
prompts=["Book from NYC to LA", "Fly NYC to LA"],
expect=[russo.tool_call("book_flight", from_city="NYC", to_city="LA")],
runs=3,
max_concurrency=4,
)
async def test_full_matrix(russo_result):
assert russo_result.total == 6 # 2 prompts × 3 runs
Source file
CLI options¶
The russo plugin adds several command-line options:
# Caching
pytest --russo-cache # enable audio cache (default)
pytest --russo-no-cache # disable caching
pytest --russo-clear-cache # clear cache before running
pytest --russo-cache-dir .my_cache # custom cache directory
# Concurrent runs
pytest --russo-runs 5 # run each test 5 times (marker overrides this)
pytest --russo-max-concurrency 3 # limit parallel pipeline executions
# Reporting
pytest --russo-report report.html # generate HTML report