Skip to content

russo

Top-level module exports.

russo

russo — testing framework for LLM tool-call accuracy.

Audio

Bases: BaseModel

Audio data with format metadata.

save

save(path: str | Path) -> Path

Save audio to a file. Wraps raw PCM in a WAV container if needed.

Usage

audio.save("output.wav")

Source code in src/russo/_types.py
def save(self, path: str | Path) -> Path:
    """Save audio to a file. Wraps raw PCM in a WAV container if needed.

    Usage:
        audio.save("output.wav")
    """
    import wave

    p = Path(path)
    p.parent.mkdir(parents=True, exist_ok=True)

    if p.suffix.lower() == ".wav":
        with wave.open(str(p), "wb") as wf:
            wf.setnchannels(self.channels)
            wf.setsampwidth(self.sample_width)
            wf.setframerate(self.sample_rate)
            wf.writeframes(self.data)
    else:
        # For non-WAV formats, write raw bytes
        p.write_bytes(self.data)
    return p

ToolCall

Bases: BaseModel

A normalized tool/function call representation.

Provider-agnostic — parsers convert provider-specific formats into this.

AgentResponse

Bases: BaseModel

Normalized response from an agent, containing extracted tool calls.

raw class-attribute instance-attribute

raw: Any | None = None

The raw, unparsed response from the provider (for debugging).

EvalResult

Bases: BaseModel

Full evaluation result for a test scenario.

match_rate property

match_rate: float

Fraction of expected tool calls that matched.

summary

summary() -> str

Human-readable summary of the evaluation.

Source code in src/russo/_types.py
def summary(self) -> str:
    """Human-readable summary of the evaluation."""
    status = "PASSED" if self.passed else "FAILED"
    lines = [f"{status} ({self.match_rate:.0%} match rate)"]
    for m in self.matches:
        icon = "+" if m.matched else "-"
        actual_str = f" -> {m.actual.name}({m.actual.arguments})" if m.actual else " -> (no match)"
        lines.append(f"  [{icon}] {m.expected.name}({m.expected.arguments}){actual_str}")
        if m.details:
            lines.append(f"      {m.details}")
    return "\n".join(lines)

ToolCallMatch

Bases: BaseModel

Result of comparing a single expected tool call against actuals.

AudioCache

AudioCache(cache_dir: Path = _DEFAULT_CACHE_DIR)

File-system cache for synthesized audio.

Each entry is a pair of files

.audio — raw audio bytes .meta — JSON with format, sample_rate, prompt

Usage

cache = AudioCache() # .russo_cache/ cache = AudioCache(Path("my_cache")) # custom dir cache.get("abc123") # Audio | None cache.put("abc123", audio) cache.clear()

Source code in src/russo/_cache.py
def __init__(self, cache_dir: Path = _DEFAULT_CACHE_DIR) -> None:
    self.cache_dir = cache_dir

cache_key

cache_key(prompt: str, **extra: Any) -> str

Deterministic key from prompt text + optional extra metadata.

Extra kwargs (e.g. voice, model) are included so a change in synthesizer config invalidates the cache automatically.

Source code in src/russo/_cache.py
def cache_key(self, prompt: str, **extra: Any) -> str:
    """Deterministic key from prompt text + optional extra metadata.

    Extra kwargs (e.g. voice, model) are included so a change in
    synthesizer config invalidates the cache automatically.
    """
    blob = json.dumps({"prompt": prompt, **extra}, sort_keys=True)
    return hashlib.sha256(blob.encode()).hexdigest()[:24]

get

get(key: str) -> Audio | None

Load cached audio, or None if not cached.

Source code in src/russo/_cache.py
def get(self, key: str) -> Audio | None:
    """Load cached audio, or None if not cached."""
    audio_path = self.cache_dir / f"{key}.audio"
    meta_path = self.cache_dir / f"{key}.meta"
    if not audio_path.exists() or not meta_path.exists():
        return None
    try:
        meta = json.loads(meta_path.read_text())
        data = audio_path.read_bytes()
        logger.debug("Cache hit: %s", key)
        return Audio(data=data, format=meta["format"], sample_rate=meta["sample_rate"])
    except (json.JSONDecodeError, KeyError, OSError) as exc:
        logger.warning("Corrupt cache entry %s, removing: %s", key, exc)
        self._remove_entry(key)
        return None

put

put(key: str, audio: Audio, *, prompt: str = '') -> None

Write audio + metadata to cache.

Source code in src/russo/_cache.py
def put(self, key: str, audio: Audio, *, prompt: str = "") -> None:
    """Write audio + metadata to cache."""
    self._ensure_dir()
    audio_path = self.cache_dir / f"{key}.audio"
    meta_path = self.cache_dir / f"{key}.meta"
    audio_path.write_bytes(audio.data)
    meta = {
        "format": audio.format,
        "sample_rate": audio.sample_rate,
        "prompt": prompt,
    }
    meta_path.write_text(json.dumps(meta, indent=2))
    logger.debug("Cached: %s (%d bytes)", key, len(audio.data))

clear

clear() -> None

Remove all cached entries.

Source code in src/russo/_cache.py
def clear(self) -> None:
    """Remove all cached entries."""
    if not self.cache_dir.exists():
        return
    count = 0
    for f in self.cache_dir.iterdir():
        if f.suffix in (".audio", ".meta"):
            f.unlink()
            count += 1
    logger.info("Cleared %d cache files from %s", count, self.cache_dir)

size

size() -> int

Number of cached audio entries.

Source code in src/russo/_cache.py
def size(self) -> int:
    """Number of cached audio entries."""
    if not self.cache_dir.exists():
        return 0
    return sum(1 for f in self.cache_dir.iterdir() if f.suffix == ".audio")

CachedSynthesizer

CachedSynthesizer(synthesizer: Synthesizer, *, cache: AudioCache | None = None, enabled: bool = True, cache_key_extra: dict[str, Any] | None = None)

Wraps any Synthesizer with local audio caching.

Satisfies the Synthesizer protocol — drop-in replacement.

Usage

synth = CachedSynthesizer(GoogleSynthesizer(...))

Disable caching at runtime

synth = CachedSynthesizer(GoogleSynthesizer(...), enabled=False)

Custom cache directory

synth = CachedSynthesizer( GoogleSynthesizer(...), cache=AudioCache(Path("/tmp/my_cache")), )

Include synthesizer config in cache key (invalidates on config change)

synth = CachedSynthesizer( GoogleSynthesizer(voice="Kore", model="gemini-2.5-flash-preview-tts"), cache_key_extra={"voice": "Kore", "model": "gemini-2.5-flash-preview-tts"}, )

Clear cache

synth.cache.clear()

Source code in src/russo/_cache.py
def __init__(
    self,
    synthesizer: Synthesizer,
    *,
    cache: AudioCache | None = None,
    enabled: bool = True,
    cache_key_extra: dict[str, Any] | None = None,
) -> None:
    self.inner = synthesizer
    self.cache = cache or AudioCache()
    self.enabled = enabled
    self.cache_key_extra = cache_key_extra or {}

synthesize async

synthesize(text: str) -> Audio

Synthesize with cache lookup/store.

Source code in src/russo/_cache.py
async def synthesize(self, text: str) -> Audio:
    """Synthesize with cache lookup/store."""
    if not self.enabled:
        return await self.inner.synthesize(text)

    key = self.cache.cache_key(text, **self.cache_key_extra)
    cached = self.cache.get(key)
    if cached is not None:
        return cached

    audio = await self.inner.synthesize(text)
    self.cache.put(key, audio, prompt=text)
    return audio

ToolCallAssertionError

ToolCallAssertionError(result: EvalResult, message: str = '')

Bases: AssertionError

Rich assertion error with detailed tool call diff.

Source code in src/russo/_assertions.py
def __init__(self, result: EvalResult, message: str = "") -> None:
    self.result = result
    detail = result.summary()
    full_message = f"{message}\n{detail}" if message else detail
    super().__init__(full_message)

tool_call

tool_call(name: str, **arguments: Any) -> ToolCall

Shorthand for creating a ToolCall.

Usage

russo.tool_call("book_flight", from_city="NYC", to_city="LA")

Source code in src/russo/_helpers.py
def tool_call(name: str, **arguments: Any) -> ToolCall:
    """Shorthand for creating a ToolCall.

    Usage:
        russo.tool_call("book_flight", from_city="NYC", to_city="LA")
    """
    return ToolCall(name=name, arguments=arguments)

agent

agent(fn: Callable[[Audio], Coroutine[Any, Any, AgentResponse]]) -> _CallableAgent

Decorator to turn an async function into an Agent.

Usage

@russo.agent async def my_agent(audio: russo.Audio) -> russo.AgentResponse: result = await call_my_api(audio.data) return russo.AgentResponse(tool_calls=[...])

Source code in src/russo/_helpers.py
def agent(fn: Callable[[Audio], Coroutine[Any, Any, AgentResponse]]) -> _CallableAgent:
    """Decorator to turn an async function into an Agent.

    Usage:
        @russo.agent
        async def my_agent(audio: russo.Audio) -> russo.AgentResponse:
            result = await call_my_api(audio.data)
            return russo.AgentResponse(tool_calls=[...])
    """
    return _CallableAgent(fn)

run async

run(*, prompt: str, synthesizer: Synthesizer, agent: Agent, evaluator: Evaluator, expect: list[ToolCall]) -> EvalResult

Run the full russo pipeline.

  1. Synthesize audio from the text prompt.
  2. Pass audio to the agent under test.
  3. Evaluate the agent's tool calls against expectations.
PARAMETER DESCRIPTION
prompt

The text prompt to synthesize into audio.

TYPE: str

synthesizer

Converts text to audio.

TYPE: Synthesizer

agent

The agent under test.

TYPE: Agent

evaluator

Compares expected vs actual tool calls.

TYPE: Evaluator

expect

The expected tool calls.

TYPE: list[ToolCall]

RETURNS DESCRIPTION
EvalResult

EvalResult with pass/fail and per-call match details.

Source code in src/russo/_pipeline.py
async def run(
    *,
    prompt: str,
    synthesizer: Synthesizer,
    agent: Agent,
    evaluator: Evaluator,
    expect: list[ToolCall],
) -> EvalResult:
    """Run the full russo pipeline.

    1. Synthesize audio from the text prompt.
    2. Pass audio to the agent under test.
    3. Evaluate the agent's tool calls against expectations.

    Args:
        prompt: The text prompt to synthesize into audio.
        synthesizer: Converts text to audio.
        agent: The agent under test.
        evaluator: Compares expected vs actual tool calls.
        expect: The expected tool calls.

    Returns:
        EvalResult with pass/fail and per-call match details.
    """
    audio = await synthesizer.synthesize(prompt)
    response = await agent.run(audio)
    return evaluator.evaluate(expected=expect, actual=response.tool_calls)

assert_tool_calls

assert_tool_calls(result: EvalResult, *, message: str = '') -> None

Assert that an EvalResult passed.

Raises a ToolCallAssertionError with a rich diff if it didn't.

Usage

result = await russo.run(...) russo.assert_tool_calls(result)

Or with a custom message

russo.assert_tool_calls(result, message="Flight booking should work")

Source code in src/russo/_assertions.py
def assert_tool_calls(
    result: EvalResult,
    *,
    message: str = "",
) -> None:
    """Assert that an EvalResult passed.

    Raises a ToolCallAssertionError with a rich diff if it didn't.

    Usage:
        result = await russo.run(...)
        russo.assert_tool_calls(result)

        # Or with a custom message
        russo.assert_tool_calls(result, message="Flight booking should work")
    """
    if not result.passed:
        raise ToolCallAssertionError(result, message)