From 76f56c3283bbeb3ac9448a71331c4e407ca0beed Mon Sep 17 00:00:00 2001 From: yuneng-jiang Date: Thu, 28 May 2026 17:12:02 -0700 Subject: [PATCH] fix(tests/vcr): mint Google OAuth tokens live to prevent stale-token replay (#29229) The Redis-backed VCR layer was recording and replaying the Google OAuth2/STS token-mint call. The replayed ya29.* access token is long-expired, but its recorded expires_in keeps credentials.expired False, so litellm never refreshes it and sends the stale token to a live Vertex/Gemini endpoint, which returns 401 ACCESS_TOKEN_EXPIRED. This broke live partner-model tests whose completion call is not itself cassette-backed (e.g. test_vertex_ai_llama_tool_calling). Force credential-exchange hosts to pass through live (never recorded, never replayed) by returning None from before_record_request, mirroring the existing telemetry passthrough, so a fresh token is minted each run. Regression from #28826, which added OAuth-token matcher tolerance plus TTL-refresh-on-read so a stale token episode matched and never expired. --- tests/_vcr_conftest_common.py | 19 ++++++++ tests/llm_translation/test_vcr_filters.py | 56 ++++++++++++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/tests/_vcr_conftest_common.py b/tests/_vcr_conftest_common.py index 6d3d34d4ac..d08b87bd58 100644 --- a/tests/_vcr_conftest_common.py +++ b/tests/_vcr_conftest_common.py @@ -644,6 +644,23 @@ def _should_drop_telemetry_record(request) -> bool: return not _current_test_records_telemetry() +def _should_passthrough_credential_exchange(request) -> bool: + """Force the Google OAuth2/STS token mint to run live, never from cassette. + + The mint returns a short-lived ``ya29.*`` access token. Recording it lets a + *stale* token replay on a later run; litellm caches it (the recorded + ``expires_in`` keeps ``credentials.expired`` False, so it is never + refreshed) and sends it to a live Vertex/Gemini endpoint, which rejects it + with ``ACCESS_TOKEN_EXPIRED``. The token body carries nothing a test asserts + on, so always mint it live: returning ``None`` from ``before_record_request`` + makes vcrpy neither store nor replay the call. Inert during + ``Cassette._load`` for the same reason as ``_should_drop_telemetry_record``. + """ + if _vcr_load_in_progress(): + return False + return _is_credential_exchange_request(request) + + # Google APIs (Vertex AI, Gemini, OAuth2/STS). Auth is a ``ya29.*`` OAuth2 # access token minted fresh on every run, so the per-request key fingerprint # rotates and never matches a recording. The logical credential — the GCP @@ -931,6 +948,8 @@ def _before_record_request(request): # store the interaction; the request passes through live (fire-and-forget). if _should_drop_telemetry_record(request): return None + if _should_passthrough_credential_exchange(request): + return None headers = getattr(request, "headers", None) if headers is None: return request diff --git a/tests/llm_translation/test_vcr_filters.py b/tests/llm_translation/test_vcr_filters.py index 0389168278..2b5a6b32a7 100644 --- a/tests/llm_translation/test_vcr_filters.py +++ b/tests/llm_translation/test_vcr_filters.py @@ -21,11 +21,13 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", from tests._vcr_conftest_common import ( # noqa: E402 VCR_FIXED_MULTIPART_BOUNDARY, VCR_IMAGE_B64_PLACEHOLDER, + _before_record_request, _normalize_multipart_boundary, + _should_passthrough_credential_exchange, _strip_image_b64_payloads, + _vcr_load_guard, ) - # --------------------------------------------------------------------------- # Image b64 stripper # --------------------------------------------------------------------------- @@ -218,3 +220,55 @@ def test_normalize_multipart_handles_quoted_boundary(): _normalize_multipart_boundary(req) assert b"quoted-boundary" not in req.body assert VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8") in req.body + + +# --------------------------------------------------------------------------- +# Credential-exchange passthrough (Google OAuth2/STS token mint must run live) +# --------------------------------------------------------------------------- + + +def _oauth_token_request() -> Request: + return Request( + method="POST", + uri="https://oauth2.googleapis.com/token", + body=b"assertion=eyJhbGciOiJSUzI1NiJ9.signed-jwt&grant_type=urn", + headers={"content-type": "application/x-www-form-urlencoded"}, + ) + + +def test_before_record_request_drops_oauth_token_mint(): + # The token mint must never be stored or replayed, else a stale ya29.* token + # gets sent to a live Vertex/Gemini endpoint -> ACCESS_TOKEN_EXPIRED. + assert _before_record_request(_oauth_token_request()) is None + + +def test_before_record_request_keeps_normal_request(): + req = Request( + method="POST", + uri="https://api.openai.com/v1/chat/completions", + body=b'{"model":"gpt-4o"}', + headers={"content-type": "application/json"}, + ) + assert _before_record_request(req) is req + + +def test_credential_exchange_passthrough_inert_during_cassette_load(): + # During Cassette._load stored episodes are replayed through this hook; + # dropping there would mutate the cassette on read. The guard makes it inert. + _vcr_load_guard.active = True + try: + assert _should_passthrough_credential_exchange(_oauth_token_request()) is False + assert _before_record_request(_oauth_token_request()) is not None + finally: + _vcr_load_guard.active = False + + +def test_credential_exchange_passthrough_covers_sts_and_metadata_hosts(): + for host in ("sts.googleapis.com", "metadata.google.internal", "169.254.169.254"): + req = Request( + method="POST", + uri=f"https://{host}/token", + body=b"grant_type=urn", + headers={}, + ) + assert _should_passthrough_credential_exchange(req) is True