fix(tests/vcr): mint Google OAuth tokens live to prevent stale-token replay (#29229)

The Redis-backed VCR layer was recording and replaying the Google OAuth2/STS token-mint call. The replayed ya29.* access token is long-expired, but its recorded expires_in keeps credentials.expired False, so litellm never refreshes it and sends the stale token to a live Vertex/Gemini endpoint, which returns 401 ACCESS_TOKEN_EXPIRED. This broke live partner-model tests whose completion call is not itself cassette-backed (e.g. test_vertex_ai_llama_tool_calling). Force credential-exchange hosts to pass through live (never recorded, never replayed) by returning None from before_record_request, mirroring the existing telemetry passthrough, so a fresh token is minted each run. Regression from #28826, which added OAuth-token matcher tolerance plus TTL-refresh-on-read so a stale token episode matched and never expired.
2026-05-28 17:12:02 -07:00 · 2026-05-28 17:12:02 -07:00 · 76f56c3283
commit 76f56c3283
parent 5e2d75d75d
2 changed files with 74 additions and 1 deletions
--- a/tests/_vcr_conftest_common.py
+++ b/tests/_vcr_conftest_common.py
@ -644,6 +644,23 @@ def _should_drop_telemetry_record(request) -> bool:
    return not _current_test_records_telemetry()


+def _should_passthrough_credential_exchange(request) -> bool:
+    """Force the Google OAuth2/STS token mint to run live, never from cassette.
+
+    The mint returns a short-lived ``ya29.*`` access token. Recording it lets a
+    *stale* token replay on a later run; litellm caches it (the recorded
+    ``expires_in`` keeps ``credentials.expired`` False, so it is never
+    refreshed) and sends it to a live Vertex/Gemini endpoint, which rejects it
+    with ``ACCESS_TOKEN_EXPIRED``. The token body carries nothing a test asserts
+    on, so always mint it live: returning ``None`` from ``before_record_request``
+    makes vcrpy neither store nor replay the call. Inert during
+    ``Cassette._load`` for the same reason as ``_should_drop_telemetry_record``.
+    """
+    if _vcr_load_in_progress():
+        return False
+    return _is_credential_exchange_request(request)
+
+
 # Google APIs (Vertex AI, Gemini, OAuth2/STS). Auth is a ``ya29.*`` OAuth2
 # access token minted fresh on every run, so the per-request key fingerprint
 # rotates and never matches a recording. The logical credential — the GCP
@ -931,6 +948,8 @@ def _before_record_request(request):
    # store the interaction; the request passes through live (fire-and-forget).
    if _should_drop_telemetry_record(request):
        return None
+    if _should_passthrough_credential_exchange(request):
+        return None
    headers = getattr(request, "headers", None)
    if headers is None:
        return request
--- a/tests/llm_translation/test_vcr_filters.py
+++ b/tests/llm_translation/test_vcr_filters.py
@ -21,11 +21,13 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..",
 from tests._vcr_conftest_common import (  # noqa: E402
    VCR_FIXED_MULTIPART_BOUNDARY,
    VCR_IMAGE_B64_PLACEHOLDER,
+    _before_record_request,
    _normalize_multipart_boundary,
+    _should_passthrough_credential_exchange,
    _strip_image_b64_payloads,
+    _vcr_load_guard,
 )

-
 # ---------------------------------------------------------------------------
 # Image b64 stripper
 # ---------------------------------------------------------------------------
@ -218,3 +220,55 @@ def test_normalize_multipart_handles_quoted_boundary():
    _normalize_multipart_boundary(req)
    assert b"quoted-boundary" not in req.body
    assert VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8") in req.body
+
+
+# ---------------------------------------------------------------------------
+# Credential-exchange passthrough (Google OAuth2/STS token mint must run live)
+# ---------------------------------------------------------------------------
+
+
+def _oauth_token_request() -> Request:
+    return Request(
+        method="POST",
+        uri="https://oauth2.googleapis.com/token",
+        body=b"assertion=eyJhbGciOiJSUzI1NiJ9.signed-jwt&grant_type=urn",
+        headers={"content-type": "application/x-www-form-urlencoded"},
+    )
+
+
+def test_before_record_request_drops_oauth_token_mint():
+    # The token mint must never be stored or replayed, else a stale ya29.* token
+    # gets sent to a live Vertex/Gemini endpoint -> ACCESS_TOKEN_EXPIRED.
+    assert _before_record_request(_oauth_token_request()) is None
+
+
+def test_before_record_request_keeps_normal_request():
+    req = Request(
+        method="POST",
+        uri="https://api.openai.com/v1/chat/completions",
+        body=b'{"model":"gpt-4o"}',
+        headers={"content-type": "application/json"},
+    )
+    assert _before_record_request(req) is req
+
+
+def test_credential_exchange_passthrough_inert_during_cassette_load():
+    # During Cassette._load stored episodes are replayed through this hook;
+    # dropping there would mutate the cassette on read. The guard makes it inert.
+    _vcr_load_guard.active = True
+    try:
+        assert _should_passthrough_credential_exchange(_oauth_token_request()) is False
+        assert _before_record_request(_oauth_token_request()) is not None
+    finally:
+        _vcr_load_guard.active = False
+
+
+def test_credential_exchange_passthrough_covers_sts_and_metadata_hosts():
+    for host in ("sts.googleapis.com", "metadata.google.internal", "169.254.169.254"):
+        req = Request(
+            method="POST",
+            uri=f"https://{host}/token",
+            body=b"grant_type=urn",
+            headers={},
+        )
+        assert _should_passthrough_credential_exchange(req) is True