Skip to content

pytest Plugin

pytest plugin for russo — auto-discovered via the pytest11 entry point.

pytest_plugin

pytest plugin for russo — auto-discovered via the pytest11 entry point.

Provides: - russo marker for declarative test scenarios - russo_result fixture that runs the full pipeline - Terminal summary via pytest_terminal_summary hook - --russo-report CLI option for HTML report output - --russo-runs for concurrent multi-run testing

pytest_sessionstart

pytest_sessionstart(session: Session) -> None

Clear audio cache if --russo-clear-cache was passed.

Source code in src/russo/pytest_plugin.py
def pytest_sessionstart(session: pytest.Session) -> None:
    """Clear audio cache if --russo-clear-cache was passed."""
    if session.config.getoption("russo_clear_cache", default=False):
        from pathlib import Path

        cache_dir = session.config.getoption("russo_cache_dir", default=".russo_cache")
        cache = AudioCache(Path(cache_dir))
        n = cache.size()
        cache.clear()
        # Use write_line via terminal writer if available
        tw = session.config.get_terminal_writer()
        tw.line(f"russo: cleared {n} cached audio entries from {cache_dir}")

russo_audio_cache

russo_audio_cache(request: FixtureRequest) -> AudioCache

Session-scoped audio cache. Override in conftest.py to customize.

Source code in src/russo/pytest_plugin.py
@pytest.fixture(scope="session")
def russo_audio_cache(request: pytest.FixtureRequest) -> AudioCache:
    """Session-scoped audio cache. Override in conftest.py to customize."""
    from pathlib import Path

    cache_dir = request.config.getoption("russo_cache_dir", default=".russo_cache")
    return AudioCache(Path(cache_dir))

russo_evaluator

russo_evaluator() -> ExactEvaluator

Default evaluator — exact match. Override in conftest.py to customize.

Source code in src/russo/pytest_plugin.py
@pytest.fixture
def russo_evaluator() -> ExactEvaluator:
    """Default evaluator — exact match. Override in conftest.py to customize."""
    return ExactEvaluator()

russo_result async

russo_result(request: FixtureRequest) -> EvalResult | BatchResult | None

Run the russo pipeline based on the @pytest.mark.russo marker.

Reads marker kwargs, resolves synthesizer/agent/evaluator fixtures, runs the pipeline, and returns the result.

Marker kwargs

prompt (str): Single text prompt. prompts (list[str]): Multiple text prompts (runs all concurrently). expect (list): Expected tool calls. runs (int): Number of times to run each prompt (default 1). Falls back to --russo-runs CLI option. max_concurrency (int | None): Cap on concurrent runs. Falls back to --russo-max-concurrency CLI option.

RETURNS DESCRIPTION
EvalResult | BatchResult | None
  • EvalResult for single prompt + single run (backward compatible).
EvalResult | BatchResult | None
  • BatchResult when using multiple prompts or runs > 1.
EvalResult | BatchResult | None
  • None if the test has no russo marker.
Source code in src/russo/pytest_plugin.py
@pytest.fixture
async def russo_result(
    request: pytest.FixtureRequest,
) -> EvalResult | BatchResult | None:
    """Run the russo pipeline based on the ``@pytest.mark.russo`` marker.

    Reads marker kwargs, resolves synthesizer/agent/evaluator fixtures,
    runs the pipeline, and returns the result.

    Marker kwargs:
        prompt (str): Single text prompt.
        prompts (list[str]): Multiple text prompts (runs all concurrently).
        expect (list): Expected tool calls.
        runs (int): Number of times to run each prompt (default 1).
            Falls back to ``--russo-runs`` CLI option.
        max_concurrency (int | None): Cap on concurrent runs.
            Falls back to ``--russo-max-concurrency`` CLI option.

    Returns:
        - ``EvalResult`` for single prompt + single run (backward compatible).
        - ``BatchResult`` when using multiple prompts or ``runs > 1``.
        - ``None`` if the test has no russo marker.
    """
    marker = request.node.get_closest_marker("russo")
    if marker is None:
        return None

    # --- extract marker arguments ---
    prompt: str = marker.kwargs.get("prompt", "")
    if not prompt and marker.args:
        prompt = marker.args[0]

    prompts: list[str] = list(marker.kwargs.get("prompts", []))

    # runs: marker overrides CLI option, CLI option overrides default 1
    runs: int = marker.kwargs.get("runs", 0) or request.config.getoption("russo_runs", default=None) or 1
    max_concurrency: int | None = marker.kwargs.get("max_concurrency") or request.config.getoption(
        "russo_max_concurrency", default=None
    )

    expect_raw: list[Any] = marker.kwargs.get("expect", [])
    expect: list[ToolCall] = [tc if isinstance(tc, ToolCall) else ToolCall(**tc) for tc in expect_raw]

    # --- resolve fixtures ---
    synthesizer = request.getfixturevalue("russo_synthesizer")
    agent = request.getfixturevalue("russo_agent")

    cache_enabled = request.config.getoption("russo_cache", default=True)
    if cache_enabled and not isinstance(synthesizer, CachedSynthesizer):
        cache = request.getfixturevalue("russo_audio_cache")
        synthesizer = CachedSynthesizer(synthesizer, cache=cache)
    elif not cache_enabled and isinstance(synthesizer, CachedSynthesizer):
        synthesizer.enabled = False

    try:
        evaluator = request.getfixturevalue("russo_evaluator")
    except pytest.FixtureLookupError:
        evaluator = ExactEvaluator()

    # --- decide execution mode ---
    is_batch = bool(prompts) or runs > 1

    if is_batch:
        effective_prompts = prompts if prompts else [prompt]
        result: EvalResult | BatchResult = await run_concurrent(
            prompts=effective_prompts,
            synthesizer=synthesizer,
            agent=agent,
            evaluator=evaluator,
            expect=expect,
            runs=runs,
            max_concurrency=max_concurrency,
        )
    else:
        result = await run(
            prompt=prompt,
            synthesizer=synthesizer,
            agent=agent,
            evaluator=evaluator,
            expect=expect,
        )

    _reporter.add(request.node.nodeid, result)
    return result

pytest_terminal_summary

pytest_terminal_summary(terminalreporter: Any, exitstatus: int, config: Config) -> None

Print russo results summary at the end of the test run.

Source code in src/russo/pytest_plugin.py
def pytest_terminal_summary(
    terminalreporter: Any,
    exitstatus: int,  # noqa: ARG001
    config: pytest.Config,
) -> None:
    """Print russo results summary at the end of the test run."""
    if _reporter.total == 0:
        return

    terminalreporter.write_line(_reporter.summary())

    # Write HTML report if requested
    report_path = config.getoption("--russo-report", default=None)
    if report_path:
        _write_html_report(report_path)
        terminalreporter.write_line(f"\nRusso HTML report written to: {report_path}")

pytest_sessionfinish

pytest_sessionfinish(session: Session, exitstatus: int) -> None

Reset global reporter state between sessions (relevant for xdist, etc.).

Source code in src/russo/pytest_plugin.py
def pytest_sessionfinish(session: pytest.Session, exitstatus: int) -> None:  # noqa: ARG001
    """Reset global reporter state between sessions (relevant for xdist, etc.)."""
    global _reporter  # noqa: PLW0603
    _reporter = TerminalReporter()