fix(tests/vcr): mint Google OAuth tokens live to prevent stale-token replay (#29229)

The Redis-backed VCR layer was recording and replaying the Google
OAuth2/STS token-mint call. The replayed ya29.* access token is
long-expired, but its recorded expires_in keeps credentials.expired
False, so litellm never refreshes it and sends the stale token to a live
Vertex/Gemini endpoint, which returns 401 ACCESS_TOKEN_EXPIRED. This
broke live partner-model tests whose completion call is not itself
cassette-backed (e.g. test_vertex_ai_llama_tool_calling).

Force credential-exchange hosts to pass through live (never recorded,
never replayed) by returning None from before_record_request, mirroring
the existing telemetry passthrough, so a fresh token is minted each run.

Regression from #28826, which added OAuth-token matcher tolerance plus
TTL-refresh-on-read so a stale token episode matched and never expired.
This commit is contained in:
yuneng-jiang 2026-05-28 17:12:02 -07:00 committed by GitHub
parent 5e2d75d75d
commit 76f56c3283
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 74 additions and 1 deletions

View File

@ -644,6 +644,23 @@ def _should_drop_telemetry_record(request) -> bool:
return not _current_test_records_telemetry()
def _should_passthrough_credential_exchange(request) -> bool:
"""Force the Google OAuth2/STS token mint to run live, never from cassette.
The mint returns a short-lived ``ya29.*`` access token. Recording it lets a
*stale* token replay on a later run; litellm caches it (the recorded
``expires_in`` keeps ``credentials.expired`` False, so it is never
refreshed) and sends it to a live Vertex/Gemini endpoint, which rejects it
with ``ACCESS_TOKEN_EXPIRED``. The token body carries nothing a test asserts
on, so always mint it live: returning ``None`` from ``before_record_request``
makes vcrpy neither store nor replay the call. Inert during
``Cassette._load`` for the same reason as ``_should_drop_telemetry_record``.
"""
if _vcr_load_in_progress():
return False
return _is_credential_exchange_request(request)
# Google APIs (Vertex AI, Gemini, OAuth2/STS). Auth is a ``ya29.*`` OAuth2
# access token minted fresh on every run, so the per-request key fingerprint
# rotates and never matches a recording. The logical credential — the GCP
@ -931,6 +948,8 @@ def _before_record_request(request):
# store the interaction; the request passes through live (fire-and-forget).
if _should_drop_telemetry_record(request):
return None
if _should_passthrough_credential_exchange(request):
return None
headers = getattr(request, "headers", None)
if headers is None:
return request

View File

@ -21,11 +21,13 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..",
from tests._vcr_conftest_common import ( # noqa: E402
VCR_FIXED_MULTIPART_BOUNDARY,
VCR_IMAGE_B64_PLACEHOLDER,
_before_record_request,
_normalize_multipart_boundary,
_should_passthrough_credential_exchange,
_strip_image_b64_payloads,
_vcr_load_guard,
)
# ---------------------------------------------------------------------------
# Image b64 stripper
# ---------------------------------------------------------------------------
@ -218,3 +220,55 @@ def test_normalize_multipart_handles_quoted_boundary():
_normalize_multipart_boundary(req)
assert b"quoted-boundary" not in req.body
assert VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8") in req.body
# ---------------------------------------------------------------------------
# Credential-exchange passthrough (Google OAuth2/STS token mint must run live)
# ---------------------------------------------------------------------------
def _oauth_token_request() -> Request:
return Request(
method="POST",
uri="https://oauth2.googleapis.com/token",
body=b"assertion=eyJhbGciOiJSUzI1NiJ9.signed-jwt&grant_type=urn",
headers={"content-type": "application/x-www-form-urlencoded"},
)
def test_before_record_request_drops_oauth_token_mint():
# The token mint must never be stored or replayed, else a stale ya29.* token
# gets sent to a live Vertex/Gemini endpoint -> ACCESS_TOKEN_EXPIRED.
assert _before_record_request(_oauth_token_request()) is None
def test_before_record_request_keeps_normal_request():
req = Request(
method="POST",
uri="https://api.openai.com/v1/chat/completions",
body=b'{"model":"gpt-4o"}',
headers={"content-type": "application/json"},
)
assert _before_record_request(req) is req
def test_credential_exchange_passthrough_inert_during_cassette_load():
# During Cassette._load stored episodes are replayed through this hook;
# dropping there would mutate the cassette on read. The guard makes it inert.
_vcr_load_guard.active = True
try:
assert _should_passthrough_credential_exchange(_oauth_token_request()) is False
assert _before_record_request(_oauth_token_request()) is not None
finally:
_vcr_load_guard.active = False
def test_credential_exchange_passthrough_covers_sts_and_metadata_hosts():
for host in ("sts.googleapis.com", "metadata.google.internal", "169.254.169.254"):
req = Request(
method="POST",
uri=f"https://{host}/token",
body=b"grant_type=urn",
headers={},
)
assert _should_passthrough_credential_exchange(req) is True