fix(tests/vcr): mint Google OAuth tokens live to prevent stale-token replay (#29229)
The Redis-backed VCR layer was recording and replaying the Google OAuth2/STS token-mint call. The replayed ya29.* access token is long-expired, but its recorded expires_in keeps credentials.expired False, so litellm never refreshes it and sends the stale token to a live Vertex/Gemini endpoint, which returns 401 ACCESS_TOKEN_EXPIRED. This broke live partner-model tests whose completion call is not itself cassette-backed (e.g. test_vertex_ai_llama_tool_calling). Force credential-exchange hosts to pass through live (never recorded, never replayed) by returning None from before_record_request, mirroring the existing telemetry passthrough, so a fresh token is minted each run. Regression from #28826, which added OAuth-token matcher tolerance plus TTL-refresh-on-read so a stale token episode matched and never expired.
This commit is contained in:
parent
5e2d75d75d
commit
76f56c3283
@ -644,6 +644,23 @@ def _should_drop_telemetry_record(request) -> bool:
|
||||
return not _current_test_records_telemetry()
|
||||
|
||||
|
||||
def _should_passthrough_credential_exchange(request) -> bool:
|
||||
"""Force the Google OAuth2/STS token mint to run live, never from cassette.
|
||||
|
||||
The mint returns a short-lived ``ya29.*`` access token. Recording it lets a
|
||||
*stale* token replay on a later run; litellm caches it (the recorded
|
||||
``expires_in`` keeps ``credentials.expired`` False, so it is never
|
||||
refreshed) and sends it to a live Vertex/Gemini endpoint, which rejects it
|
||||
with ``ACCESS_TOKEN_EXPIRED``. The token body carries nothing a test asserts
|
||||
on, so always mint it live: returning ``None`` from ``before_record_request``
|
||||
makes vcrpy neither store nor replay the call. Inert during
|
||||
``Cassette._load`` for the same reason as ``_should_drop_telemetry_record``.
|
||||
"""
|
||||
if _vcr_load_in_progress():
|
||||
return False
|
||||
return _is_credential_exchange_request(request)
|
||||
|
||||
|
||||
# Google APIs (Vertex AI, Gemini, OAuth2/STS). Auth is a ``ya29.*`` OAuth2
|
||||
# access token minted fresh on every run, so the per-request key fingerprint
|
||||
# rotates and never matches a recording. The logical credential — the GCP
|
||||
@ -931,6 +948,8 @@ def _before_record_request(request):
|
||||
# store the interaction; the request passes through live (fire-and-forget).
|
||||
if _should_drop_telemetry_record(request):
|
||||
return None
|
||||
if _should_passthrough_credential_exchange(request):
|
||||
return None
|
||||
headers = getattr(request, "headers", None)
|
||||
if headers is None:
|
||||
return request
|
||||
|
||||
@ -21,11 +21,13 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..",
|
||||
from tests._vcr_conftest_common import ( # noqa: E402
|
||||
VCR_FIXED_MULTIPART_BOUNDARY,
|
||||
VCR_IMAGE_B64_PLACEHOLDER,
|
||||
_before_record_request,
|
||||
_normalize_multipart_boundary,
|
||||
_should_passthrough_credential_exchange,
|
||||
_strip_image_b64_payloads,
|
||||
_vcr_load_guard,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Image b64 stripper
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -218,3 +220,55 @@ def test_normalize_multipart_handles_quoted_boundary():
|
||||
_normalize_multipart_boundary(req)
|
||||
assert b"quoted-boundary" not in req.body
|
||||
assert VCR_FIXED_MULTIPART_BOUNDARY.encode("utf-8") in req.body
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Credential-exchange passthrough (Google OAuth2/STS token mint must run live)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _oauth_token_request() -> Request:
|
||||
return Request(
|
||||
method="POST",
|
||||
uri="https://oauth2.googleapis.com/token",
|
||||
body=b"assertion=eyJhbGciOiJSUzI1NiJ9.signed-jwt&grant_type=urn",
|
||||
headers={"content-type": "application/x-www-form-urlencoded"},
|
||||
)
|
||||
|
||||
|
||||
def test_before_record_request_drops_oauth_token_mint():
|
||||
# The token mint must never be stored or replayed, else a stale ya29.* token
|
||||
# gets sent to a live Vertex/Gemini endpoint -> ACCESS_TOKEN_EXPIRED.
|
||||
assert _before_record_request(_oauth_token_request()) is None
|
||||
|
||||
|
||||
def test_before_record_request_keeps_normal_request():
|
||||
req = Request(
|
||||
method="POST",
|
||||
uri="https://api.openai.com/v1/chat/completions",
|
||||
body=b'{"model":"gpt-4o"}',
|
||||
headers={"content-type": "application/json"},
|
||||
)
|
||||
assert _before_record_request(req) is req
|
||||
|
||||
|
||||
def test_credential_exchange_passthrough_inert_during_cassette_load():
|
||||
# During Cassette._load stored episodes are replayed through this hook;
|
||||
# dropping there would mutate the cassette on read. The guard makes it inert.
|
||||
_vcr_load_guard.active = True
|
||||
try:
|
||||
assert _should_passthrough_credential_exchange(_oauth_token_request()) is False
|
||||
assert _before_record_request(_oauth_token_request()) is not None
|
||||
finally:
|
||||
_vcr_load_guard.active = False
|
||||
|
||||
|
||||
def test_credential_exchange_passthrough_covers_sts_and_metadata_hosts():
|
||||
for host in ("sts.googleapis.com", "metadata.google.internal", "169.254.169.254"):
|
||||
req = Request(
|
||||
method="POST",
|
||||
uri=f"https://{host}/token",
|
||||
body=b"grant_type=urn",
|
||||
headers={},
|
||||
)
|
||||
assert _should_passthrough_credential_exchange(req) is True
|
||||
|
||||
Loading…
Reference in New Issue
Block a user