russo¶

Top-level module exports.

russo ¶

russo — testing framework for LLM tool-call accuracy.

Audio ¶

Bases: BaseModel

Audio data with format metadata.

save ¶

save(path: str | Path) -> Path

Save audio to a file. Wraps raw PCM in a WAV container if needed.

Usage

audio.save("output.wav")

Source code in src/russo/_types.py

def save(self, path: str | Path) -> Path:
    """Save audio to a file. Wraps raw PCM in a WAV container if needed.

    Usage:
        audio.save("output.wav")
    """
    import wave

    p = Path(path)
    p.parent.mkdir(parents=True, exist_ok=True)

    if p.suffix.lower() == ".wav":
        with wave.open(str(p), "wb") as wf:
            wf.setnchannels(self.channels)
            wf.setsampwidth(self.sample_width)
            wf.setframerate(self.sample_rate)
            wf.writeframes(self.data)
    else:
        # For non-WAV formats, write raw bytes
        p.write_bytes(self.data)
    return p

ToolCall ¶

Bases: BaseModel

A normalized tool/function call representation.

Provider-agnostic — parsers convert provider-specific formats into this.

AgentResponse ¶

Bases: BaseModel

Normalized response from an agent, containing extracted tool calls.

raw `class-attribute` `instance-attribute` ¶

raw: Any | None = None

The raw, unparsed response from the provider (for debugging).

EvalResult ¶

Bases: BaseModel

Full evaluation result for a test scenario.

match_rate `property` ¶

match_rate: float

Fraction of expected tool calls that matched.

summary ¶

summary() -> str

Human-readable summary of the evaluation.

Source code in src/russo/_types.py

def summary(self) -> str:
    """Human-readable summary of the evaluation."""
    status = "PASSED" if self.passed else "FAILED"
    lines = [f"{status} ({self.match_rate:.0%} match rate)"]
    for m in self.matches:
        icon = "+" if m.matched else "-"
        actual_str = f" -> {m.actual.name}({m.actual.arguments})" if m.actual else " -> (no match)"
        lines.append(f"  [{icon}] {m.expected.name}({m.expected.arguments}){actual_str}")
        if m.details:
            lines.append(f"      {m.details}")
    return "\n".join(lines)

ToolCallMatch ¶

Bases: BaseModel

Result of comparing a single expected tool call against actuals.

AudioCache ¶

AudioCache(cache_dir: Path = _DEFAULT_CACHE_DIR)

File-system cache for synthesized audio.

Each entry is a pair of files

.audio — raw audio bytes .meta — JSON with format, sample_rate, prompt

Usage

cache = AudioCache() # .russo_cache/ cache = AudioCache(Path("my_cache")) # custom dir cache.get("abc123") # Audio | None cache.put("abc123", audio) cache.clear()

Source code in src/russo/_cache.py

def __init__(self, cache_dir: Path = _DEFAULT_CACHE_DIR) -> None:
    self.cache_dir = cache_dir

cache_key ¶

cache_key(prompt: str, **extra: Any) -> str

Deterministic key from prompt text + optional extra metadata.

Extra kwargs (e.g. voice, model) are included so a change in synthesizer config invalidates the cache automatically.

Source code in src/russo/_cache.py

def cache_key(self, prompt: str, **extra: Any) -> str:
    """Deterministic key from prompt text + optional extra metadata.

    Extra kwargs (e.g. voice, model) are included so a change in
    synthesizer config invalidates the cache automatically.
    """
    blob = json.dumps({"prompt": prompt, **extra}, sort_keys=True)
    return hashlib.sha256(blob.encode()).hexdigest()[:24]

get ¶

get(key: str) -> Audio | None

Load cached audio, or None if not cached.

Source code in src/russo/_cache.py

def get(self, key: str) -> Audio | None:
    """Load cached audio, or None if not cached."""
    audio_path = self.cache_dir / f"{key}.audio"
    meta_path = self.cache_dir / f"{key}.meta"
    if not audio_path.exists() or not meta_path.exists():
        return None
    try:
        meta = json.loads(meta_path.read_text())
        data = audio_path.read_bytes()
        logger.debug("Cache hit: %s", key)
        return Audio(data=data, format=meta["format"], sample_rate=meta["sample_rate"])
    except (json.JSONDecodeError, KeyError, OSError) as exc:
        logger.warning("Corrupt cache entry %s, removing: %s", key, exc)
        self._remove_entry(key)
        return None

put ¶

put(key: str, audio: Audio, *, prompt: str = '') -> None

Write audio + metadata to cache.

Source code in src/russo/_cache.py

def put(self, key: str, audio: Audio, *, prompt: str = "") -> None:
    """Write audio + metadata to cache."""
    self._ensure_dir()
    audio_path = self.cache_dir / f"{key}.audio"
    meta_path = self.cache_dir / f"{key}.meta"
    audio_path.write_bytes(audio.data)
    meta = {
        "format": audio.format,
        "sample_rate": audio.sample_rate,
        "prompt": prompt,
    }
    meta_path.write_text(json.dumps(meta, indent=2))
    logger.debug("Cached: %s (%d bytes)", key, len(audio.data))

clear ¶

clear() -> None

Remove all cached entries.

Source code in src/russo/_cache.py

def clear(self) -> None:
    """Remove all cached entries."""
    if not self.cache_dir.exists():
        return
    count = 0
    for f in self.cache_dir.iterdir():
        if f.suffix in (".audio", ".meta"):
            f.unlink()
            count += 1
    logger.info("Cleared %d cache files from %s", count, self.cache_dir)

size ¶

size() -> int

Number of cached audio entries.

Source code in src/russo/_cache.py

def size(self) -> int:
    """Number of cached audio entries."""
    if not self.cache_dir.exists():
        return 0
    return sum(1 for f in self.cache_dir.iterdir() if f.suffix == ".audio")

CachedSynthesizer ¶

CachedSynthesizer(synthesizer: Synthesizer, *, cache: AudioCache | None = None, enabled: bool = True, cache_key_extra: dict[str, Any] | None = None)

Wraps any Synthesizer with local audio caching.

Satisfies the Synthesizer protocol — drop-in replacement.

Usage

synth = CachedSynthesizer(GoogleSynthesizer(...))

Disable caching at runtime¶

synth = CachedSynthesizer(GoogleSynthesizer(...), enabled=False)

Custom cache directory¶

synth = CachedSynthesizer( GoogleSynthesizer(...), cache=AudioCache(Path("/tmp/my_cache")), )

Include synthesizer config in cache key (invalidates on config change)¶

synth = CachedSynthesizer( GoogleSynthesizer(voice="Kore", model="gemini-2.5-flash-preview-tts"), cache_key_extra={"voice": "Kore", "model": "gemini-2.5-flash-preview-tts"}, )

Clear cache¶

synth.cache.clear()

Source code in src/russo/_cache.py

def __init__(
    self,
    synthesizer: Synthesizer,
    *,
    cache: AudioCache | None = None,
    enabled: bool = True,
    cache_key_extra: dict[str, Any] | None = None,
) -> None:
    self.inner = synthesizer
    self.cache = cache or AudioCache()
    self.enabled = enabled
    self.cache_key_extra = cache_key_extra or {}

synthesize `async` ¶

synthesize(text: str) -> Audio

Synthesize with cache lookup/store.

Source code in src/russo/_cache.py

async def synthesize(self, text: str) -> Audio:
    """Synthesize with cache lookup/store."""
    if not self.enabled:
        return await self.inner.synthesize(text)

    key = self.cache.cache_key(text, **self.cache_key_extra)
    cached = self.cache.get(key)
    if cached is not None:
        return cached

    audio = await self.inner.synthesize(text)
    self.cache.put(key, audio, prompt=text)
    return audio

ToolCallAssertionError ¶

ToolCallAssertionError(result: EvalResult, message: str = '')

Bases: AssertionError

Rich assertion error with detailed tool call diff.

Source code in src/russo/_assertions.py

def __init__(self, result: EvalResult, message: str = "") -> None:
    self.result = result
    detail = result.summary()
    full_message = f"{message}\n{detail}" if message else detail
    super().__init__(full_message)

tool_call ¶

tool_call(name: str, **arguments: Any) -> ToolCall

Shorthand for creating a ToolCall.

Usage

russo.tool_call("book_flight", from_city="NYC", to_city="LA")

Source code in src/russo/_helpers.py

def tool_call(name: str, **arguments: Any) -> ToolCall:
    """Shorthand for creating a ToolCall.

    Usage:
        russo.tool_call("book_flight", from_city="NYC", to_city="LA")
    """
    return ToolCall(name=name, arguments=arguments)

agent ¶

agent(fn: Callable[[Audio], Coroutine[Any, Any, AgentResponse]]) -> _CallableAgent

Decorator to turn an async function into an Agent.

Usage

@russo.agent async def my_agent(audio: russo.Audio) -> russo.AgentResponse: result = await call_my_api(audio.data) return russo.AgentResponse(tool_calls=[...])

Source code in src/russo/_helpers.py

def agent(fn: Callable[[Audio], Coroutine[Any, Any, AgentResponse]]) -> _CallableAgent:
    """Decorator to turn an async function into an Agent.

    Usage:
        @russo.agent
        async def my_agent(audio: russo.Audio) -> russo.AgentResponse:
            result = await call_my_api(audio.data)
            return russo.AgentResponse(tool_calls=[...])
    """
    return _CallableAgent(fn)

run `async` ¶

run(*, prompt: str, synthesizer: Synthesizer, agent: Agent, evaluator: Evaluator, expect: list[ToolCall]) -> EvalResult

Run the full russo pipeline.

Synthesize audio from the text prompt.
Pass audio to the agent under test.
Evaluate the agent's tool calls against expectations.

PARAMETER	DESCRIPTION
`prompt`	The text prompt to synthesize into audio. TYPE: `str`
`synthesizer`	Converts text to audio. TYPE: `Synthesizer`
`agent`	The agent under test. TYPE: `Agent`
`evaluator`	Compares expected vs actual tool calls. TYPE: `Evaluator`
`expect`	The expected tool calls. TYPE: `list[ToolCall]`

RETURNS	DESCRIPTION
`EvalResult`	EvalResult with pass/fail and per-call match details.

Source code in src/russo/_pipeline.py

async def run(
    *,
    prompt: str,
    synthesizer: Synthesizer,
    agent: Agent,
    evaluator: Evaluator,
    expect: list[ToolCall],
) -> EvalResult:
    """Run the full russo pipeline.

    1. Synthesize audio from the text prompt.
    2. Pass audio to the agent under test.
    3. Evaluate the agent's tool calls against expectations.

    Args:
        prompt: The text prompt to synthesize into audio.
        synthesizer: Converts text to audio.
        agent: The agent under test.
        evaluator: Compares expected vs actual tool calls.
        expect: The expected tool calls.

    Returns:
        EvalResult with pass/fail and per-call match details.
    """
    audio = await synthesizer.synthesize(prompt)
    response = await agent.run(audio)
    return evaluator.evaluate(expected=expect, actual=response.tool_calls)

assert_tool_calls ¶

assert_tool_calls(result: EvalResult, *, message: str = '') -> None

Assert that an EvalResult passed.

Raises a ToolCallAssertionError with a rich diff if it didn't.

Usage

result = await russo.run(...) russo.assert_tool_calls(result)

Or with a custom message¶

russo.assert_tool_calls(result, message="Flight booking should work")

Source code in src/russo/_assertions.py

def assert_tool_calls(
    result: EvalResult,
    *,
    message: str = "",
) -> None:
    """Assert that an EvalResult passed.

    Raises a ToolCallAssertionError with a rich diff if it didn't.

    Usage:
        result = await russo.run(...)
        russo.assert_tool_calls(result)

        # Or with a custom message
        russo.assert_tool_calls(result, message="Flight booking should work")
    """
    if not result.passed:
        raise ToolCallAssertionError(result, message)

russo¶

russo ¶

Audio ¶

save ¶

ToolCall ¶

AgentResponse ¶

raw class-attribute instance-attribute ¶

EvalResult ¶

match_rate property ¶

summary ¶

ToolCallMatch ¶

AudioCache ¶

cache_key ¶

get ¶

put ¶

clear ¶

size ¶

CachedSynthesizer ¶

Disable caching at runtime¶

Custom cache directory¶

Include synthesizer config in cache key (invalidates on config change)¶

Clear cache¶

synthesize async ¶

ToolCallAssertionError ¶

tool_call ¶

agent ¶

run async ¶

assert_tool_calls ¶

Or with a custom message¶

raw `class-attribute` `instance-attribute` ¶

match_rate `property` ¶

synthesize `async` ¶

run `async` ¶