From e1f2b4b818fed51edbea2d85be4e80c3dd796173 Mon Sep 17 00:00:00 2001 From: mateo-berri <277851410+mateo-berri@users.noreply.github.com> Date: Thu, 30 Apr 2026 21:48:48 +0000 Subject: [PATCH] tests(vcr): trim non-load-bearing comments and docstrings Removes commentary that restated the code, including: - module-level banners explaining what the conftest does (covered by Readme.md and the function bodies) - docstrings on _scrub_response, _before_record_response, vcr_config, _vcr_disabled, pytest_recording_configure (function names + bodies are self-evident) - inline notes about header filtering, match_on, etc. - per-test docstrings restating the test name Keeps the two non-obvious notes that aren't recoverable from the code: the vcrpy/respx httpx-transport collision rationale on _RESPX_CONFLICTING_FILES, the vcrpy "return None to skip persisting" contract on filter_non_2xx_response, and the fixture-ordering dependency on _vcr_record_retries. --- Makefile | 6 -- tests/_flush_vcr_cache.py | 9 --- tests/_vcr_redis_persister.py | 59 ++------------ tests/llm_responses_api_testing/conftest.py | 16 +--- tests/llm_translation/conftest.py | 77 ++----------------- .../test_anthropic_completion.py | 13 ---- .../test_vcr_redis_persister.py | 52 ++----------- 7 files changed, 19 insertions(+), 213 deletions(-) diff --git a/Makefile b/Makefile index 125b0e749d..5dbd308a3e 100644 --- a/Makefile +++ b/Makefile @@ -186,11 +186,5 @@ test-llm-translation-single: install-test-deps --junitxml=test-results/junit.xml \ -v --tb=short --maxfail=100 --timeout=300 -# VCR cache helpers ----------------------------------------------------------- -# Drop every Redis key under the ``litellm:vcr:cassette:*`` prefix. Use this -# when you want the next CI run (or local run) to re-record against live -# providers immediately instead of waiting for the 24h TTL to roll over. -# Reads REDIS_HOST / REDIS_PORT / REDIS_PASSWORD from the environment, the -# same vars CircleCI uses for its other Redis-backed jobs. test-llm-translation-flush-vcr-cache: $(UV_RUN) python tests/_flush_vcr_cache.py diff --git a/tests/_flush_vcr_cache.py b/tests/_flush_vcr_cache.py index 4bb3fd7ba8..dfaba2367c 100644 --- a/tests/_flush_vcr_cache.py +++ b/tests/_flush_vcr_cache.py @@ -1,12 +1,3 @@ -"""Flush every VCR cassette stored in Redis. - -Run via ``make test-llm-translation-flush-vcr-cache``. Use when you want the -next test run to re-record against live providers right now instead of -waiting for the 24h TTL to expire. - -Reads ``REDIS_HOST``, ``REDIS_PORT``, ``REDIS_PASSWORD`` from the environment. -""" - from __future__ import annotations import os diff --git a/tests/_vcr_redis_persister.py b/tests/_vcr_redis_persister.py index 88423393c5..3b1e456c0a 100644 --- a/tests/_vcr_redis_persister.py +++ b/tests/_vcr_redis_persister.py @@ -1,18 +1,3 @@ -"""Redis-backed cassette persister for vcrpy. - -Stores the same serialized cassette payload that ``FilesystemPersister`` -would write to disk, but under a Redis key with a 24h TTL. Cassettes -auto-expire so the next CI run after the rollover re-records against the -live provider, surfacing API drift within a day instead of waiting for a -human to refresh ``cassettes/*.yaml`` by hand. - -On a cache miss we raise ``CassetteNotFoundError``; vcrpy's record-mode -machinery catches that and falls through to a live HTTP call, which then -gets persisted via ``save_cassette``. Non-2xx responses are filtered out -upstream by ``conftest.before_record_response`` so a transient provider -failure can't poison the cache for 24h. -""" - from __future__ import annotations import os @@ -26,12 +11,7 @@ REDIS_KEY_PREFIX = "litellm:vcr:cassette:" def redis_key_for(cassette_path: str) -> str: - """Map a cassette file path to a stable Redis key. - - Uses the path relative to CWD so keys are stable across machines. - """ - rel = os.path.relpath(str(cassette_path)) - return f"{REDIS_KEY_PREFIX}{rel}" + return f"{REDIS_KEY_PREFIX}{os.path.relpath(str(cassette_path))}" def _build_default_client(): @@ -39,9 +19,7 @@ def _build_default_client(): host = os.environ.get("REDIS_HOST") if not host: - raise RuntimeError( - "REDIS_HOST is not set; cannot build Redis cassette persister" - ) + raise RuntimeError("REDIS_HOST is not set") return redis.Redis( host=host, port=int(os.environ.get("REDIS_PORT", 6379)), @@ -56,14 +34,6 @@ def make_redis_persister( client: Optional[Any] = None, ttl_seconds: int = CASSETTE_TTL_SECONDS, ): - """Build a vcrpy-compatible persister bound to a Redis client. - - The returned object exposes ``load_cassette`` / ``save_cassette`` and is - a drop-in replacement for ``vcr.persisters.filesystem.FilesystemPersister``. - Pass an explicit ``client`` in tests; production callers omit it and let - the persister build a client from ``REDIS_HOST`` / ``REDIS_PORT`` / - ``REDIS_PASSWORD``. - """ redis_client = client if client is not None else _build_default_client() class _RedisPersister: @@ -80,32 +50,17 @@ def make_redis_persister( def save_cassette(cassette_path, cassette_dict, serializer): data = serialize(cassette_dict, serializer) payload = data.encode("utf-8") if isinstance(data, str) else data - redis_client.set( - redis_key_for(cassette_path), - payload, - ex=ttl_seconds, - ) + redis_client.set(redis_key_for(cassette_path), payload, ex=ttl_seconds) return _RedisPersister def filter_non_2xx_response(response): - """vcrpy ``before_record_response`` hook that drops non-2xx responses. - - Returning ``None`` tells vcrpy to skip persisting the response (see - ``vcr.cassette.Cassette.append``). This prevents transient 5xx/429 - failures from being baked into the cache for the rest of the TTL window. - """ + # Returning None tells vcrpy to skip persisting; see Cassette.append. if not isinstance(response, dict): return response status = response.get("status") - code = None - if isinstance(status, dict): - code = status.get("code") - elif isinstance(status, int): - code = status - if code is None: + code = status.get("code") if isinstance(status, dict) else status + if not isinstance(code, int): return response - if 200 <= int(code) < 300: - return response - return None + return response if 200 <= code < 300 else None diff --git a/tests/llm_responses_api_testing/conftest.py b/tests/llm_responses_api_testing/conftest.py index adda3f5188..7209d2f957 100644 --- a/tests/llm_responses_api_testing/conftest.py +++ b/tests/llm_responses_api_testing/conftest.py @@ -1,10 +1,4 @@ # conftest.py -# -# Auto-applies ``@pytest.mark.vcr`` to every collected test (see -# ``pytest_collection_modifyitems``) so live provider calls land in the -# Redis-backed VCR cache. The persister, header scrubbing and 2xx-only -# filtering live in ``tests/_vcr_redis_persister.py``; the cache key and -# 24h TTL match the llm_translation conftest. import asyncio import importlib @@ -25,7 +19,6 @@ from tests._vcr_redis_persister import ( # noqa: E402 ) -# Headers that must never be persisted to a cassette. _FILTERED_REQUEST_HEADERS = ( "authorization", "x-api-key", @@ -44,7 +37,6 @@ _FILTERED_REQUEST_HEADERS = ( "x-goog-user-project", ) -# Per-request response headers we strip so cassettes diff cleanly. _FILTERED_RESPONSE_HEADERS = ( "set-cookie", "x-request-id", @@ -70,8 +62,7 @@ def _scrub_response(response): def _before_record_response(response): - response = _scrub_response(response) - return filter_non_2xx_response(response) + return filter_non_2xx_response(_scrub_response(response)) @pytest.fixture(scope="module") @@ -152,17 +143,12 @@ def setup_and_teardown(): def pytest_collection_modifyitems(config, items): - # Auto-apply ``@pytest.mark.vcr`` so any provider call lands in the - # Redis cache. No respx files exist in this directory today; if any are - # added later, exclude them by filename here. Skip entirely when VCR - # is disabled (no REDIS_HOST or LITELLM_VCR_DISABLE=1). if not _vcr_disabled(): for item in items: if item.get_closest_marker("vcr") is not None: continue item.add_marker(pytest.mark.vcr) - # Preserve historical custom_logger ordering. custom_logger_tests = [ item for item in items if "custom_logger" in item.parent.name ] diff --git a/tests/llm_translation/conftest.py b/tests/llm_translation/conftest.py index ac87d00980..98b7fb16bc 100644 --- a/tests/llm_translation/conftest.py +++ b/tests/llm_translation/conftest.py @@ -4,12 +4,6 @@ # Mirrors the pattern in tests/local_testing/conftest.py: # - Function-scoped fixture resets litellm globals to true defaults # - Module-scoped reload only in single-process mode -# -# Also wires up the Redis-backed VCR cache. Every test in this directory is -# auto-marked with ``@pytest.mark.vcr`` (see ``pytest_collection_modifyitems``) -# unless its file appears in ``_RESPX_CONFLICTING_FILES`` — those use respx, -# which patches the same httpx transport vcrpy does. Cache key naming, TTL, -# and 2xx-only filtering live in ``tests/_vcr_redis_persister.py``. import asyncio import importlib @@ -30,19 +24,9 @@ from tests._vcr_redis_persister import ( # noqa: E402 ) -# --------------------------------------------------------------------------- -# VCR cassette infrastructure (pytest-recording + Redis) -# --------------------------------------------------------------------------- -# All tests in tests/llm_translation/ are auto-marked with ``@pytest.mark.vcr`` -# (excluding the respx-using files listed below). On cache miss vcrpy records -# the live response into Redis under ``litellm:vcr:cassette:`` with -# a 24h TTL; subsequent runs within that window replay without touching the -# network. Set ``LITELLM_VCR_DISABLE=1`` to skip VCR entirely (e.g. when -# debugging an upstream API change locally). - -# Test files that use ``respx`` to patch httpx. vcrpy patches the same -# transport, so applying both to the same test will make one of them silently -# win and the other look like a no-op. Skip auto-marking these. +# vcrpy and respx both patch the httpx transport — applying both makes one +# silently win. Files in this set use respx and are skipped by the +# auto-marker below. _RESPX_CONFLICTING_FILES = frozenset( { "test_azure_o_series.py", @@ -55,23 +39,14 @@ _RESPX_CONFLICTING_FILES = frozenset( "test_xai.py", } ) - -# The persister's own unit tests must not run inside a VCR cassette context — -# they call ``save_cassette`` / ``load_cassette`` directly against fakeredis -# and don't make HTTP calls, but auto-marking them would still wrap each -# test in a Redis lookup we don't want. _VCR_AUTO_MARKER_SKIP_FILES = _RESPX_CONFLICTING_FILES | frozenset( {"test_vcr_redis_persister.py"} ) -# Headers that must never be persisted to a cassette. Matched -# case-insensitively by vcrpy. _FILTERED_REQUEST_HEADERS = ( "authorization", "x-api-key", "anthropic-api-key", - # Strip ``anthropic-version`` so cassettes have a stable shape across - # SDK versions that bump the header. "anthropic-version", "openai-api-key", "azure-api-key", @@ -86,8 +61,6 @@ _FILTERED_REQUEST_HEADERS = ( "x-goog-user-project", ) -# Per-request response headers we strip so cassettes diff cleanly across -# re-records. _FILTERED_RESPONSE_HEADERS = ( "set-cookie", "x-request-id", @@ -102,7 +75,6 @@ _FILTERED_RESPONSE_HEADERS = ( def _scrub_response(response): - """Strip per-request response headers we don't want in the cassette.""" if not isinstance(response, dict): return response headers = response.get("headers") or {} @@ -114,32 +86,15 @@ def _scrub_response(response): def _before_record_response(response): - """Compose per-request scrubbing with the 2xx-only cache policy. - - Order matters: we scrub headers first so we don't leak request IDs even - on responses we end up dropping from the cassette mid-development. - """ - response = _scrub_response(response) - return filter_non_2xx_response(response) + return filter_non_2xx_response(_scrub_response(response)) @pytest.fixture(scope="module") def vcr_config(): - """Shared VCR config consumed by ``pytest-recording``. - - ``record_mode="once"`` is what makes this a useful daily cache: - - cassette absent (cache miss) → record the live call into Redis, - - cassette present (cache hit) → replay only. - 24h TTL on the Redis key means each new day's first run records against - live providers, surfacing API drift within a day instead of silently - serving stale responses forever. - """ return { "filter_headers": list(_FILTERED_REQUEST_HEADERS), "decode_compressed_response": True, "record_mode": "once", - # Match on full request shape so streaming vs non-streaming and - # different prompts produce distinct cassettes. "match_on": ( "method", "scheme", @@ -154,18 +109,12 @@ def vcr_config(): def _vcr_disabled() -> bool: - """VCR is disabled when explicitly opted out or when Redis isn't wired. - - No Redis means no cache to read from or write to — fall back to live - calls instead of silently writing YAML to disk (which we don't ship). - """ if os.environ.get("LITELLM_VCR_DISABLE") == "1": return True return not os.environ.get("REDIS_HOST") def pytest_recording_configure(config, vcr): - """Register the Redis-backed cassette persister.""" if _vcr_disabled(): return vcr.register_persister(make_redis_persister()) @@ -248,33 +197,18 @@ def setup_and_teardown(event_loop): # Add event_loop as a dependency event_loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True)) -# Number of attempts a vcr-marked test gets when recording against a live -# provider. Replay-only runs never reach the network so this only matters on -# cache miss / record mode. Tenacity-style exponential backoff is provided by -# the underlying provider SDKs (openai, anthropic) when they see 429/5xx, so -# bumping num_retries propagates retry-with-backoff for free. _VCR_RECORD_RETRIES = 3 @pytest.fixture(autouse=True) def _vcr_record_retries(setup_and_teardown, request): - """Configure record-time retries for ``@pytest.mark.vcr`` tests. - - Depends on ``setup_and_teardown`` so this runs *after* the per-test - ``importlib.reload(litellm)`` resets ``num_retries`` back to None. - """ + # Depends on setup_and_teardown so this runs after litellm is reloaded. if request.node.get_closest_marker("vcr") is None: return litellm.num_retries = _VCR_RECORD_RETRIES def pytest_collection_modifyitems(config, items): - # 1. Auto-apply ``@pytest.mark.vcr`` to every collected test in this - # directory so any provider call lands in the Redis cache. Skip files - # that use respx (it patches the same transport vcrpy does) and the - # persister's own unit tests. Skip entirely if VCR is disabled (no - # REDIS_HOST or LITELLM_VCR_DISABLE=1) so dev runs without Redis - # don't go through cassette logic at all. if not _vcr_disabled(): for item in items: filename = os.path.basename(str(item.fspath)) @@ -284,7 +218,6 @@ def pytest_collection_modifyitems(config, items): continue item.add_marker(pytest.mark.vcr) - # 2. Preserve the historical ordering of custom_logger tests vs the rest. custom_logger_tests = [ item for item in items if "custom_logger" in item.parent.name ] diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py index ae42155642..8c5ac83001 100644 --- a/tests/llm_translation/test_anthropic_completion.py +++ b/tests/llm_translation/test_anthropic_completion.py @@ -1888,12 +1888,6 @@ def test_metadata_filter_applies_to_azure_anthropic(): def test_anthropic_basic_completion_replay(): - """Smoke-test that a vanilla Anthropic completion replays from a cassette. - - Exercises the full LiteLLM transformation pipeline (request shaping + - response parsing) against a real-shape Anthropic payload. The cassette - is loaded from the Redis-backed VCR cache configured in conftest.py. - """ response = litellm.completion( model="anthropic/claude-sonnet-4-5-20250929", messages=[{"role": "user", "content": "Hello!"}], @@ -1903,17 +1897,10 @@ def test_anthropic_basic_completion_replay(): assert response.choices[0].message.content == ("Hello! How can I help you today?") assert response.usage.prompt_tokens == 12 assert response.usage.completion_tokens == 11 - # Anthropic sets stop_reason="end_turn" → litellm normalises to "stop" assert response.choices[0].finish_reason == "stop" def test_anthropic_streaming_completion_replay(): - """Replay a streaming Anthropic completion from the VCR cache. - - Exercises the SSE chunk parser and the public streaming surface — any - regression in the streaming transformation surfaces here because the - cassette captures every ``content_block_delta`` event Anthropic emits. - """ stream = litellm.completion( model="anthropic/claude-sonnet-4-5-20250929", messages=[{"role": "user", "content": "Hello!"}], diff --git a/tests/llm_translation/test_vcr_redis_persister.py b/tests/llm_translation/test_vcr_redis_persister.py index 9ca23410bd..6de6283e6b 100644 --- a/tests/llm_translation/test_vcr_redis_persister.py +++ b/tests/llm_translation/test_vcr_redis_persister.py @@ -1,19 +1,3 @@ -"""Tests for the Redis-backed vcrpy cassette persister. - -These cover the three behaviours we actually rely on in CI: - -1. ``save_cassette`` followed by ``load_cassette`` returns the same - request/response pairs (roundtrip via the real vcrpy serializer). -2. Saved keys expire after ~24h so the cache auto-refreshes against live - providers without manual ``make`` runs. -3. ``load_cassette`` raises ``CassetteNotFoundError`` on a miss, so vcrpy's - record-mode machinery falls through to a live HTTP call instead of - silently matching against an empty cassette. - -We also pin the 2xx-only filter so a transient 5xx/429 from the provider -can't be baked into the cache for the rest of the TTL window. -""" - from __future__ import annotations import os @@ -25,8 +9,6 @@ from vcr.persisters.filesystem import CassetteNotFoundError from vcr.request import Request from vcr.serializers import yamlserializer -# Make tests/ importable as a package so we can pull the shared persister -# without depending on pytest's CWD. sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) from tests._vcr_redis_persister import ( # noqa: E402 @@ -38,7 +20,6 @@ from tests._vcr_redis_persister import ( # noqa: E402 def _sample_cassette_dict(): - """Build a minimal cassette payload that exercises serialize/deserialize.""" request = Request( method="POST", uri="https://api.anthropic.com/v1/messages", @@ -48,8 +29,6 @@ def _sample_cassette_dict(): response = { "status": {"code": 200, "message": "OK"}, "headers": {"content-type": ["application/json"]}, - # vcrpy stores response bodies as bytes; mirror that so the - # roundtrip assertion exercises real-world serialization shapes. "body": {"string": b'{"id":"msg_1","type":"message"}'}, } return {"requests": [request], "responses": [response]} @@ -61,10 +40,7 @@ def _persister_with_fake_redis(): def test_save_then_load_roundtrips_cassette_content(): - """A saved cassette must come back from ``load_cassette`` identical to - what was put in. If serialize/deserialize ever drift (e.g. encoding bug) - every replay-mode test in the suite breaks; this catches it cheaply.""" - fake, persister = _persister_with_fake_redis() + _, persister = _persister_with_fake_redis() cassette_path = "tests/llm_translation/cassettes/test_x/test_y.yaml" persister.save_cassette(cassette_path, _sample_cassette_dict(), yamlserializer) @@ -79,27 +55,16 @@ def test_save_then_load_roundtrips_cassette_content(): def test_saved_key_has_24h_ttl(): - """The whole point of the Redis backend is that entries auto-expire after - 24h so each daily CI run re-records against live providers. If the TTL - isn't being applied, the cache never refreshes and we silently mask - upstream API drift.""" fake, persister = _persister_with_fake_redis() cassette_path = "tests/llm_translation/cassettes/test_x/test_ttl.yaml" persister.save_cassette(cassette_path, _sample_cassette_dict(), yamlserializer) ttl = fake.ttl(redis_key_for(cassette_path)) - assert ttl > 0, "key was saved without an expiry — would never refresh" - assert ttl <= CASSETTE_TTL_SECONDS - assert ttl >= CASSETTE_TTL_SECONDS - 5 # allow tiny clock slack + assert CASSETTE_TTL_SECONDS - 5 <= ttl <= CASSETTE_TTL_SECONDS def test_load_missing_key_raises_cassette_not_found(): - """Cache miss must surface as ``CassetteNotFoundError``. vcrpy's record - machinery catches that exception and falls through to the live HTTP - call; if we returned empty/None instead, vcrpy would treat it as a - cassette with zero matching requests and the test would fail with a - confusing ``CannotOverwriteExistingCassetteException``.""" _, persister = _persister_with_fake_redis() with pytest.raises(CassetteNotFoundError): persister.load_cassette("never/recorded.yaml", yamlserializer) @@ -116,24 +81,19 @@ def test_load_missing_key_raises_cassette_not_found(): (400, True), (401, True), (404, True), - (429, True), # rate limit — must never be cached - (500, True), # transient 5xx — must never be cached + (429, True), + (500, True), (502, True), (503, True), ], ) def test_only_2xx_responses_are_cached(status_code, expect_dropped): - """Pin the cache-poisoning protection: a non-2xx must be dropped from - the cassette (returned as ``None`` from the hook) so a transient 429 - or 503 doesn't get pinned for the rest of the TTL window. 2xx - responses must pass through untouched.""" response = { "status": {"code": status_code, "message": "X"}, "headers": {}, "body": {"string": ""}, } result = filter_non_2xx_response(response) - if expect_dropped: - assert result is None - else: + assert (result is None) == expect_dropped + if not expect_dropped: assert result is response