From e1f2b4b818fed51edbea2d85be4e80c3dd796173 Mon Sep 17 00:00:00 2001
From: mateo-berri <277851410+mateo-berri@users.noreply.github.com>
Date: Thu, 30 Apr 2026 21:48:48 +0000
Subject: [PATCH] tests(vcr): trim non-load-bearing comments and docstrings

Removes commentary that restated the code, including:

- module-level banners explaining what the conftest does (covered by
  Readme.md and the function bodies)
- docstrings on _scrub_response, _before_record_response, vcr_config,
  _vcr_disabled, pytest_recording_configure (function names + bodies
  are self-evident)
- inline notes about header filtering, match_on, etc.
- per-test docstrings restating the test name

Keeps the two non-obvious notes that aren't recoverable from the code:
the vcrpy/respx httpx-transport collision rationale on
_RESPX_CONFLICTING_FILES, the vcrpy "return None to skip persisting"
contract on filter_non_2xx_response, and the fixture-ordering
dependency on _vcr_record_retries.
---
 Makefile                                      |  6 --
 tests/_flush_vcr_cache.py                     |  9 ---
 tests/_vcr_redis_persister.py                 | 59 ++------------
 tests/llm_responses_api_testing/conftest.py   | 16 +---
 tests/llm_translation/conftest.py             | 77 ++-----------------
 .../test_anthropic_completion.py              | 13 ----
 .../test_vcr_redis_persister.py               | 52 ++-----------
 7 files changed, 19 insertions(+), 213 deletions(-)

diff --git a/Makefile b/Makefile
index 125b0e749d..5dbd308a3e 100644
--- a/Makefile
+++ b/Makefile
@@ -186,11 +186,5 @@ test-llm-translation-single: install-test-deps
 		--junitxml=test-results/junit.xml \
 		-v --tb=short --maxfail=100 --timeout=300
 
-# VCR cache helpers -----------------------------------------------------------
-# Drop every Redis key under the ``litellm:vcr:cassette:*`` prefix. Use this
-# when you want the next CI run (or local run) to re-record against live
-# providers immediately instead of waiting for the 24h TTL to roll over.
-# Reads REDIS_HOST / REDIS_PORT / REDIS_PASSWORD from the environment, the
-# same vars CircleCI uses for its other Redis-backed jobs.
 test-llm-translation-flush-vcr-cache:
 	$(UV_RUN) python tests/_flush_vcr_cache.py
diff --git a/tests/_flush_vcr_cache.py b/tests/_flush_vcr_cache.py
index 4bb3fd7ba8..dfaba2367c 100644
--- a/tests/_flush_vcr_cache.py
+++ b/tests/_flush_vcr_cache.py
@@ -1,12 +1,3 @@
-"""Flush every VCR cassette stored in Redis.
-
-Run via ``make test-llm-translation-flush-vcr-cache``. Use when you want the
-next test run to re-record against live providers right now instead of
-waiting for the 24h TTL to expire.
-
-Reads ``REDIS_HOST``, ``REDIS_PORT``, ``REDIS_PASSWORD`` from the environment.
-"""
-
 from __future__ import annotations
 
 import os
diff --git a/tests/_vcr_redis_persister.py b/tests/_vcr_redis_persister.py
index 88423393c5..3b1e456c0a 100644
--- a/tests/_vcr_redis_persister.py
+++ b/tests/_vcr_redis_persister.py
@@ -1,18 +1,3 @@
-"""Redis-backed cassette persister for vcrpy.
-
-Stores the same serialized cassette payload that ``FilesystemPersister``
-would write to disk, but under a Redis key with a 24h TTL. Cassettes
-auto-expire so the next CI run after the rollover re-records against the
-live provider, surfacing API drift within a day instead of waiting for a
-human to refresh ``cassettes/*.yaml`` by hand.
-
-On a cache miss we raise ``CassetteNotFoundError``; vcrpy's record-mode
-machinery catches that and falls through to a live HTTP call, which then
-gets persisted via ``save_cassette``. Non-2xx responses are filtered out
-upstream by ``conftest.before_record_response`` so a transient provider
-failure can't poison the cache for 24h.
-"""
-
 from __future__ import annotations
 
 import os
@@ -26,12 +11,7 @@ REDIS_KEY_PREFIX = "litellm:vcr:cassette:"
 
 
 def redis_key_for(cassette_path: str) -> str:
-    """Map a cassette file path to a stable Redis key.
-
-    Uses the path relative to CWD so keys are stable across machines.
-    """
-    rel = os.path.relpath(str(cassette_path))
-    return f"{REDIS_KEY_PREFIX}{rel}"
+    return f"{REDIS_KEY_PREFIX}{os.path.relpath(str(cassette_path))}"
 
 
 def _build_default_client():
@@ -39,9 +19,7 @@ def _build_default_client():
 
     host = os.environ.get("REDIS_HOST")
     if not host:
-        raise RuntimeError(
-            "REDIS_HOST is not set; cannot build Redis cassette persister"
-        )
+        raise RuntimeError("REDIS_HOST is not set")
     return redis.Redis(
         host=host,
         port=int(os.environ.get("REDIS_PORT", 6379)),
@@ -56,14 +34,6 @@ def make_redis_persister(
     client: Optional[Any] = None,
     ttl_seconds: int = CASSETTE_TTL_SECONDS,
 ):
-    """Build a vcrpy-compatible persister bound to a Redis client.
-
-    The returned object exposes ``load_cassette`` / ``save_cassette`` and is
-    a drop-in replacement for ``vcr.persisters.filesystem.FilesystemPersister``.
-    Pass an explicit ``client`` in tests; production callers omit it and let
-    the persister build a client from ``REDIS_HOST`` / ``REDIS_PORT`` /
-    ``REDIS_PASSWORD``.
-    """
     redis_client = client if client is not None else _build_default_client()
 
     class _RedisPersister:
@@ -80,32 +50,17 @@ def make_redis_persister(
         def save_cassette(cassette_path, cassette_dict, serializer):
             data = serialize(cassette_dict, serializer)
             payload = data.encode("utf-8") if isinstance(data, str) else data
-            redis_client.set(
-                redis_key_for(cassette_path),
-                payload,
-                ex=ttl_seconds,
-            )
+            redis_client.set(redis_key_for(cassette_path), payload, ex=ttl_seconds)
 
     return _RedisPersister
 
 
 def filter_non_2xx_response(response):
-    """vcrpy ``before_record_response`` hook that drops non-2xx responses.
-
-    Returning ``None`` tells vcrpy to skip persisting the response (see
-    ``vcr.cassette.Cassette.append``). This prevents transient 5xx/429
-    failures from being baked into the cache for the rest of the TTL window.
-    """
+    # Returning None tells vcrpy to skip persisting; see Cassette.append.
     if not isinstance(response, dict):
         return response
     status = response.get("status")
-    code = None
-    if isinstance(status, dict):
-        code = status.get("code")
-    elif isinstance(status, int):
-        code = status
-    if code is None:
+    code = status.get("code") if isinstance(status, dict) else status
+    if not isinstance(code, int):
         return response
-    if 200 <= int(code) < 300:
-        return response
-    return None
+    return response if 200 <= code < 300 else None
diff --git a/tests/llm_responses_api_testing/conftest.py b/tests/llm_responses_api_testing/conftest.py
index adda3f5188..7209d2f957 100644
--- a/tests/llm_responses_api_testing/conftest.py
+++ b/tests/llm_responses_api_testing/conftest.py
@@ -1,10 +1,4 @@
 # conftest.py
-#
-# Auto-applies ``@pytest.mark.vcr`` to every collected test (see
-# ``pytest_collection_modifyitems``) so live provider calls land in the
-# Redis-backed VCR cache. The persister, header scrubbing and 2xx-only
-# filtering live in ``tests/_vcr_redis_persister.py``; the cache key and
-# 24h TTL match the llm_translation conftest.
 
 import asyncio
 import importlib
@@ -25,7 +19,6 @@ from tests._vcr_redis_persister import (  # noqa: E402
 )
 
 
-# Headers that must never be persisted to a cassette.
 _FILTERED_REQUEST_HEADERS = (
     "authorization",
     "x-api-key",
@@ -44,7 +37,6 @@ _FILTERED_REQUEST_HEADERS = (
     "x-goog-user-project",
 )
 
-# Per-request response headers we strip so cassettes diff cleanly.
 _FILTERED_RESPONSE_HEADERS = (
     "set-cookie",
     "x-request-id",
@@ -70,8 +62,7 @@ def _scrub_response(response):
 
 
 def _before_record_response(response):
-    response = _scrub_response(response)
-    return filter_non_2xx_response(response)
+    return filter_non_2xx_response(_scrub_response(response))
 
 
 @pytest.fixture(scope="module")
@@ -152,17 +143,12 @@ def setup_and_teardown():
 
 
 def pytest_collection_modifyitems(config, items):
-    # Auto-apply ``@pytest.mark.vcr`` so any provider call lands in the
-    # Redis cache. No respx files exist in this directory today; if any are
-    # added later, exclude them by filename here. Skip entirely when VCR
-    # is disabled (no REDIS_HOST or LITELLM_VCR_DISABLE=1).
     if not _vcr_disabled():
         for item in items:
             if item.get_closest_marker("vcr") is not None:
                 continue
             item.add_marker(pytest.mark.vcr)
 
-    # Preserve historical custom_logger ordering.
     custom_logger_tests = [
         item for item in items if "custom_logger" in item.parent.name
     ]
diff --git a/tests/llm_translation/conftest.py b/tests/llm_translation/conftest.py
index ac87d00980..98b7fb16bc 100644
--- a/tests/llm_translation/conftest.py
+++ b/tests/llm_translation/conftest.py
@@ -4,12 +4,6 @@
 # Mirrors the pattern in tests/local_testing/conftest.py:
 #   - Function-scoped fixture resets litellm globals to true defaults
 #   - Module-scoped reload only in single-process mode
-#
-# Also wires up the Redis-backed VCR cache. Every test in this directory is
-# auto-marked with ``@pytest.mark.vcr`` (see ``pytest_collection_modifyitems``)
-# unless its file appears in ``_RESPX_CONFLICTING_FILES`` — those use respx,
-# which patches the same httpx transport vcrpy does. Cache key naming, TTL,
-# and 2xx-only filtering live in ``tests/_vcr_redis_persister.py``.
 
 import asyncio
 import importlib
@@ -30,19 +24,9 @@ from tests._vcr_redis_persister import (  # noqa: E402
 )
 
 
-# ---------------------------------------------------------------------------
-# VCR cassette infrastructure (pytest-recording + Redis)
-# ---------------------------------------------------------------------------
-# All tests in tests/llm_translation/ are auto-marked with ``@pytest.mark.vcr``
-# (excluding the respx-using files listed below). On cache miss vcrpy records
-# the live response into Redis under ``litellm:vcr:cassette:<rel_path>`` with
-# a 24h TTL; subsequent runs within that window replay without touching the
-# network. Set ``LITELLM_VCR_DISABLE=1`` to skip VCR entirely (e.g. when
-# debugging an upstream API change locally).
-
-# Test files that use ``respx`` to patch httpx. vcrpy patches the same
-# transport, so applying both to the same test will make one of them silently
-# win and the other look like a no-op. Skip auto-marking these.
+# vcrpy and respx both patch the httpx transport — applying both makes one
+# silently win. Files in this set use respx and are skipped by the
+# auto-marker below.
 _RESPX_CONFLICTING_FILES = frozenset(
     {
         "test_azure_o_series.py",
@@ -55,23 +39,14 @@ _RESPX_CONFLICTING_FILES = frozenset(
         "test_xai.py",
     }
 )
-
-# The persister's own unit tests must not run inside a VCR cassette context —
-# they call ``save_cassette`` / ``load_cassette`` directly against fakeredis
-# and don't make HTTP calls, but auto-marking them would still wrap each
-# test in a Redis lookup we don't want.
 _VCR_AUTO_MARKER_SKIP_FILES = _RESPX_CONFLICTING_FILES | frozenset(
     {"test_vcr_redis_persister.py"}
 )
 
-# Headers that must never be persisted to a cassette. Matched
-# case-insensitively by vcrpy.
 _FILTERED_REQUEST_HEADERS = (
     "authorization",
     "x-api-key",
     "anthropic-api-key",
-    # Strip ``anthropic-version`` so cassettes have a stable shape across
-    # SDK versions that bump the header.
     "anthropic-version",
     "openai-api-key",
     "azure-api-key",
@@ -86,8 +61,6 @@ _FILTERED_REQUEST_HEADERS = (
     "x-goog-user-project",
 )
 
-# Per-request response headers we strip so cassettes diff cleanly across
-# re-records.
 _FILTERED_RESPONSE_HEADERS = (
     "set-cookie",
     "x-request-id",
@@ -102,7 +75,6 @@ _FILTERED_RESPONSE_HEADERS = (
 
 
 def _scrub_response(response):
-    """Strip per-request response headers we don't want in the cassette."""
     if not isinstance(response, dict):
         return response
     headers = response.get("headers") or {}
@@ -114,32 +86,15 @@ def _scrub_response(response):
 
 
 def _before_record_response(response):
-    """Compose per-request scrubbing with the 2xx-only cache policy.
-
-    Order matters: we scrub headers first so we don't leak request IDs even
-    on responses we end up dropping from the cassette mid-development.
-    """
-    response = _scrub_response(response)
-    return filter_non_2xx_response(response)
+    return filter_non_2xx_response(_scrub_response(response))
 
 
 @pytest.fixture(scope="module")
 def vcr_config():
-    """Shared VCR config consumed by ``pytest-recording``.
-
-    ``record_mode="once"`` is what makes this a useful daily cache:
-    - cassette absent (cache miss) → record the live call into Redis,
-    - cassette present (cache hit) → replay only.
-    24h TTL on the Redis key means each new day's first run records against
-    live providers, surfacing API drift within a day instead of silently
-    serving stale responses forever.
-    """
     return {
         "filter_headers": list(_FILTERED_REQUEST_HEADERS),
         "decode_compressed_response": True,
         "record_mode": "once",
-        # Match on full request shape so streaming vs non-streaming and
-        # different prompts produce distinct cassettes.
         "match_on": (
             "method",
             "scheme",
@@ -154,18 +109,12 @@ def vcr_config():
 
 
 def _vcr_disabled() -> bool:
-    """VCR is disabled when explicitly opted out or when Redis isn't wired.
-
-    No Redis means no cache to read from or write to — fall back to live
-    calls instead of silently writing YAML to disk (which we don't ship).
-    """
     if os.environ.get("LITELLM_VCR_DISABLE") == "1":
         return True
     return not os.environ.get("REDIS_HOST")
 
 
 def pytest_recording_configure(config, vcr):
-    """Register the Redis-backed cassette persister."""
     if _vcr_disabled():
         return
     vcr.register_persister(make_redis_persister())
@@ -248,33 +197,18 @@ def setup_and_teardown(event_loop):  # Add event_loop as a dependency
         event_loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
 
 
-# Number of attempts a vcr-marked test gets when recording against a live
-# provider. Replay-only runs never reach the network so this only matters on
-# cache miss / record mode. Tenacity-style exponential backoff is provided by
-# the underlying provider SDKs (openai, anthropic) when they see 429/5xx, so
-# bumping num_retries propagates retry-with-backoff for free.
 _VCR_RECORD_RETRIES = 3
 
 
 @pytest.fixture(autouse=True)
 def _vcr_record_retries(setup_and_teardown, request):
-    """Configure record-time retries for ``@pytest.mark.vcr`` tests.
-
-    Depends on ``setup_and_teardown`` so this runs *after* the per-test
-    ``importlib.reload(litellm)`` resets ``num_retries`` back to None.
-    """
+    # Depends on setup_and_teardown so this runs after litellm is reloaded.
     if request.node.get_closest_marker("vcr") is None:
         return
     litellm.num_retries = _VCR_RECORD_RETRIES
 
 
 def pytest_collection_modifyitems(config, items):
-    # 1. Auto-apply ``@pytest.mark.vcr`` to every collected test in this
-    #    directory so any provider call lands in the Redis cache. Skip files
-    #    that use respx (it patches the same transport vcrpy does) and the
-    #    persister's own unit tests. Skip entirely if VCR is disabled (no
-    #    REDIS_HOST or LITELLM_VCR_DISABLE=1) so dev runs without Redis
-    #    don't go through cassette logic at all.
     if not _vcr_disabled():
         for item in items:
             filename = os.path.basename(str(item.fspath))
@@ -284,7 +218,6 @@ def pytest_collection_modifyitems(config, items):
                 continue
             item.add_marker(pytest.mark.vcr)
 
-    # 2. Preserve the historical ordering of custom_logger tests vs the rest.
     custom_logger_tests = [
         item for item in items if "custom_logger" in item.parent.name
     ]
diff --git a/tests/llm_translation/test_anthropic_completion.py b/tests/llm_translation/test_anthropic_completion.py
index ae42155642..8c5ac83001 100644
--- a/tests/llm_translation/test_anthropic_completion.py
+++ b/tests/llm_translation/test_anthropic_completion.py
@@ -1888,12 +1888,6 @@ def test_metadata_filter_applies_to_azure_anthropic():
 
 
 def test_anthropic_basic_completion_replay():
-    """Smoke-test that a vanilla Anthropic completion replays from a cassette.
-
-    Exercises the full LiteLLM transformation pipeline (request shaping +
-    response parsing) against a real-shape Anthropic payload. The cassette
-    is loaded from the Redis-backed VCR cache configured in conftest.py.
-    """
     response = litellm.completion(
         model="anthropic/claude-sonnet-4-5-20250929",
         messages=[{"role": "user", "content": "Hello!"}],
@@ -1903,17 +1897,10 @@ def test_anthropic_basic_completion_replay():
     assert response.choices[0].message.content == ("Hello! How can I help you today?")
     assert response.usage.prompt_tokens == 12
     assert response.usage.completion_tokens == 11
-    # Anthropic sets stop_reason="end_turn" → litellm normalises to "stop"
     assert response.choices[0].finish_reason == "stop"
 
 
 def test_anthropic_streaming_completion_replay():
-    """Replay a streaming Anthropic completion from the VCR cache.
-
-    Exercises the SSE chunk parser and the public streaming surface — any
-    regression in the streaming transformation surfaces here because the
-    cassette captures every ``content_block_delta`` event Anthropic emits.
-    """
     stream = litellm.completion(
         model="anthropic/claude-sonnet-4-5-20250929",
         messages=[{"role": "user", "content": "Hello!"}],
diff --git a/tests/llm_translation/test_vcr_redis_persister.py b/tests/llm_translation/test_vcr_redis_persister.py
index 9ca23410bd..6de6283e6b 100644
--- a/tests/llm_translation/test_vcr_redis_persister.py
+++ b/tests/llm_translation/test_vcr_redis_persister.py
@@ -1,19 +1,3 @@
-"""Tests for the Redis-backed vcrpy cassette persister.
-
-These cover the three behaviours we actually rely on in CI:
-
-1. ``save_cassette`` followed by ``load_cassette`` returns the same
-   request/response pairs (roundtrip via the real vcrpy serializer).
-2. Saved keys expire after ~24h so the cache auto-refreshes against live
-   providers without manual ``make`` runs.
-3. ``load_cassette`` raises ``CassetteNotFoundError`` on a miss, so vcrpy's
-   record-mode machinery falls through to a live HTTP call instead of
-   silently matching against an empty cassette.
-
-We also pin the 2xx-only filter so a transient 5xx/429 from the provider
-can't be baked into the cache for the rest of the TTL window.
-"""
-
 from __future__ import annotations
 
 import os
@@ -25,8 +9,6 @@ from vcr.persisters.filesystem import CassetteNotFoundError
 from vcr.request import Request
 from vcr.serializers import yamlserializer
 
-# Make tests/ importable as a package so we can pull the shared persister
-# without depending on pytest's CWD.
 sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))
 
 from tests._vcr_redis_persister import (  # noqa: E402
@@ -38,7 +20,6 @@ from tests._vcr_redis_persister import (  # noqa: E402
 
 
 def _sample_cassette_dict():
-    """Build a minimal cassette payload that exercises serialize/deserialize."""
     request = Request(
         method="POST",
         uri="https://api.anthropic.com/v1/messages",
@@ -48,8 +29,6 @@ def _sample_cassette_dict():
     response = {
         "status": {"code": 200, "message": "OK"},
         "headers": {"content-type": ["application/json"]},
-        # vcrpy stores response bodies as bytes; mirror that so the
-        # roundtrip assertion exercises real-world serialization shapes.
         "body": {"string": b'{"id":"msg_1","type":"message"}'},
     }
     return {"requests": [request], "responses": [response]}
@@ -61,10 +40,7 @@ def _persister_with_fake_redis():
 
 
 def test_save_then_load_roundtrips_cassette_content():
-    """A saved cassette must come back from ``load_cassette`` identical to
-    what was put in. If serialize/deserialize ever drift (e.g. encoding bug)
-    every replay-mode test in the suite breaks; this catches it cheaply."""
-    fake, persister = _persister_with_fake_redis()
+    _, persister = _persister_with_fake_redis()
     cassette_path = "tests/llm_translation/cassettes/test_x/test_y.yaml"
 
     persister.save_cassette(cassette_path, _sample_cassette_dict(), yamlserializer)
@@ -79,27 +55,16 @@ def test_save_then_load_roundtrips_cassette_content():
 
 
 def test_saved_key_has_24h_ttl():
-    """The whole point of the Redis backend is that entries auto-expire after
-    24h so each daily CI run re-records against live providers. If the TTL
-    isn't being applied, the cache never refreshes and we silently mask
-    upstream API drift."""
     fake, persister = _persister_with_fake_redis()
     cassette_path = "tests/llm_translation/cassettes/test_x/test_ttl.yaml"
 
     persister.save_cassette(cassette_path, _sample_cassette_dict(), yamlserializer)
 
     ttl = fake.ttl(redis_key_for(cassette_path))
-    assert ttl > 0, "key was saved without an expiry — would never refresh"
-    assert ttl <= CASSETTE_TTL_SECONDS
-    assert ttl >= CASSETTE_TTL_SECONDS - 5  # allow tiny clock slack
+    assert CASSETTE_TTL_SECONDS - 5 <= ttl <= CASSETTE_TTL_SECONDS
 
 
 def test_load_missing_key_raises_cassette_not_found():
-    """Cache miss must surface as ``CassetteNotFoundError``. vcrpy's record
-    machinery catches that exception and falls through to the live HTTP
-    call; if we returned empty/None instead, vcrpy would treat it as a
-    cassette with zero matching requests and the test would fail with a
-    confusing ``CannotOverwriteExistingCassetteException``."""
     _, persister = _persister_with_fake_redis()
     with pytest.raises(CassetteNotFoundError):
         persister.load_cassette("never/recorded.yaml", yamlserializer)
@@ -116,24 +81,19 @@ def test_load_missing_key_raises_cassette_not_found():
         (400, True),
         (401, True),
         (404, True),
-        (429, True),  # rate limit — must never be cached
-        (500, True),  # transient 5xx — must never be cached
+        (429, True),
+        (500, True),
         (502, True),
         (503, True),
     ],
 )
 def test_only_2xx_responses_are_cached(status_code, expect_dropped):
-    """Pin the cache-poisoning protection: a non-2xx must be dropped from
-    the cassette (returned as ``None`` from the hook) so a transient 429
-    or 503 doesn't get pinned for the rest of the TTL window. 2xx
-    responses must pass through untouched."""
     response = {
         "status": {"code": status_code, "message": "X"},
         "headers": {},
         "body": {"string": ""},
     }
     result = filter_non_2xx_response(response)
-    if expect_dropped:
-        assert result is None
-    else:
+    assert (result is None) == expect_dropped
+    if not expect_dropped:
         assert result is response