From 92de7423efca5756a2cb1bcf3228812628f91960 Mon Sep 17 00:00:00 2001 From: yuneng-jiang Date: Tue, 19 May 2026 14:48:30 -0700 Subject: [PATCH] fix(tests): replace shut-down gpt-4o-audio-preview with gpt-audio-1.5 (#28281) * fix(tests): replace shut-down gpt-4o-audio-preview with gpt-audio-1.5 OpenAI shut down gpt-4o-audio-preview on 2026-05-07, so the live audio calls in test_stream_chunk_builder_openai_audio_output_usage and test_standard_logging_payload_audio now hard-fail with a model-not-found error on every PR. The error was not "openai-internal", so the except block swallowed it and execution fell through to an unbound completion/response (UnboundLocalError). Switch both tests to gpt-audio-1.5, OpenAI's recommended successor (GA, not deprecated, already present in the litellm cost map so the response_cost assertion still resolves). Also broaden the except to skip with the real error in the reason instead of crashing, so a transient upstream blip can't reintroduce the UnboundLocalError. * fix(tests): narrow audio-test skip to model-not-found, re-raise the rest Address review feedback: an unconditional skip on any exception would silently mask a litellm-internal regression in the audio path (broken param transformation, serialization, bad header) instead of failing CI. Skip only on the upstream-unavailable class (model_not_found / "does not exist" / openai-internal) and re-raise everything else, so genuine regressions still fail loudly. The UnboundLocalError is still fixed because the handler either skips or raises - it never falls through. * fix(tests): add budget_exceeded to expected Interaction status enum Staging added budget_exceeded to the Interaction OpenAPI status enum; the staging merge into this branch picked up the spec change but not the matching test update, so test_status_enum_values failed in CI. Align the test's expected list (exact-match by design) with the live spec. * fix(tests): mock HTTP fetch in test_img_url_token_counter The test parameterized a live third-party image URL (blog.purpureus.net) which now 404s, causing get_image_dimensions to fall through to its base64 decode path and crash with 'not enough values to unpack' on every PR run. Mock safe_get with a tiny 1x1 PNG so the URL branch is still exercised without any network dependency. * fix(tests): swap gpt-4o-audio-preview to gpt-audio-1.5 in test_gpt4o_audio OpenAI shut down gpt-4o-audio-preview on 2026-05-07, so both live tests in test_gpt4o_audio.py (test_audio_output_from_model and test_audio_input_to_model) hard-fail model_not_found on every PR. Swap the hardcoded model to OpenAI's successor gpt-audio-1.5 (same chat-completions audio surface; already in the litellm cost map). Mirror the narrowed-skip pattern from the prior audio fixes: skip on model_not_found / does-not-exist / openai-internal, re-raise everything else so genuine litellm regressions still fail CI loudly. --- tests/llm_translation/test_gpt4o_audio.py | 25 +++++++++++----- .../test_custom_callback_input.py | 12 ++++++-- .../test_stream_chunk_builder.py | 12 ++++++-- .../interactions/test_openapi_compliance.py | 6 +++- .../litellm_core_utils/test_token_counter.py | 29 +++++++++++++++++-- 5 files changed, 68 insertions(+), 16 deletions(-) diff --git a/tests/llm_translation/test_gpt4o_audio.py b/tests/llm_translation/test_gpt4o_audio.py index 169fe85516..a50d07406d 100644 --- a/tests/llm_translation/test_gpt4o_audio.py +++ b/tests/llm_translation/test_gpt4o_audio.py @@ -59,7 +59,7 @@ async def test_audio_output_from_model(stream): litellm.set_verbose = False try: completion = await litellm.acompletion( - model="gpt-4o-audio-preview", + model="gpt-audio-1.5", modalities=["text", "audio"], audio={"voice": "alloy", "format": "pcm16"}, messages=[{"role": "user", "content": "response in 1 word - yes or no"}], @@ -69,8 +69,14 @@ async def test_audio_output_from_model(stream): print(e) pytest.skip("Skipping test due to timeout") except Exception as e: - if "openai-internal" in str(e): - pytest.skip("Skipping test due to openai-internal error") + err = str(e).lower() + if ( + "model_not_found" in err + or "does not exist" in err + or "openai-internal" in err + ): + pytest.skip(f"Skipping - upstream gpt-audio-1.5 unavailable: {e}") + raise if stream is True: await check_streaming_response(completion) @@ -85,7 +91,7 @@ async def test_audio_output_from_model(stream): @pytest.mark.asyncio @pytest.mark.parametrize("stream", [True, False]) -@pytest.mark.parametrize("model", ["gpt-4o-audio-preview"]) # "gpt-4o-audio-preview", +@pytest.mark.parametrize("model", ["gpt-audio-1.5"]) async def test_audio_input_to_model(stream, model): # Fetch the audio file and convert it to a base64 encoded string audio_format = "pcm16" @@ -121,9 +127,14 @@ async def test_audio_input_to_model(stream, model): print(e) pytest.skip("Skipping test due to timeout") except Exception as e: - if "openai-internal" in str(e): - pytest.skip("Skipping test due to openai-internal error") - raise e + err = str(e).lower() + if ( + "model_not_found" in err + or "does not exist" in err + or "openai-internal" in err + ): + pytest.skip(f"Skipping - upstream gpt-audio-1.5 unavailable: {e}") + raise if stream is True: await check_streaming_response(completion) else: diff --git a/tests/local_testing/test_custom_callback_input.py b/tests/local_testing/test_custom_callback_input.py index 545039e60b..6a4ec9206f 100644 --- a/tests/local_testing/test_custom_callback_input.py +++ b/tests/local_testing/test_custom_callback_input.py @@ -1125,7 +1125,7 @@ def test_standard_logging_payload_audio(turn_off_message_logging, stream): ) as mock_client: try: response = litellm.completion( - model="gpt-4o-audio-preview", + model="gpt-audio-1.5", modalities=["text", "audio"], audio={"voice": "alloy", "format": "pcm16"}, messages=[ @@ -1134,8 +1134,14 @@ def test_standard_logging_payload_audio(turn_off_message_logging, stream): stream=stream, ) except Exception as e: - if "openai-internal" in str(e): - pytest.skip("Skipping test due to openai-internal error") + err = str(e).lower() + if ( + "model_not_found" in err + or "does not exist" in err + or "openai-internal" in err + ): + pytest.skip(f"Skipping - upstream gpt-audio-1.5 unavailable: {e}") + raise if stream: for chunk in response: diff --git a/tests/local_testing/test_stream_chunk_builder.py b/tests/local_testing/test_stream_chunk_builder.py index 24fdf49c16..38e04b93f1 100644 --- a/tests/local_testing/test_stream_chunk_builder.py +++ b/tests/local_testing/test_stream_chunk_builder.py @@ -649,7 +649,7 @@ def test_stream_chunk_builder_openai_audio_output_usage(): try: completion = client.chat.completions.create( - model="gpt-4o-audio-preview", + model="gpt-audio-1.5", modalities=["text", "audio"], audio={"voice": "alloy", "format": "pcm16"}, messages=[{"role": "user", "content": "response in 1 word - yes or no"}], @@ -657,8 +657,14 @@ def test_stream_chunk_builder_openai_audio_output_usage(): stream_options={"include_usage": True}, ) except Exception as e: - if "openai-internal" in str(e): - pytest.skip("Skipping test due to openai-internal error") + err = str(e).lower() + if ( + "model_not_found" in err + or "does not exist" in err + or "openai-internal" in err + ): + pytest.skip(f"Skipping - upstream gpt-audio-1.5 unavailable: {e}") + raise chunks = [] for chunk in completion: diff --git a/tests/test_litellm/interactions/test_openapi_compliance.py b/tests/test_litellm/interactions/test_openapi_compliance.py index 1d3b6b8ae1..aededaaca7 100644 --- a/tests/test_litellm/interactions/test_openapi_compliance.py +++ b/tests/test_litellm/interactions/test_openapi_compliance.py @@ -179,7 +179,10 @@ class TestResponseCompliance: # `status` is an output-only field; validate against the response schema. schema = spec_dict["components"]["schemas"]["Interaction"] status_prop = schema["properties"]["status"] - # Google Interactions API uses lowercase status values (updated Feb 2026) + # Google Interactions API uses lowercase status values (updated Feb 2026). + # Keep this an exact match: this test intentionally breaks CI when + # Google changes the live spec — that breakage is how we get notified + # to review the change. expected_statuses = [ "in_progress", "requires_action", @@ -187,6 +190,7 @@ class TestResponseCompliance: "failed", "cancelled", "incomplete", + "budget_exceeded", ] assert status_prop["enum"] == expected_statuses print(f"✓ Status enum values: {expected_statuses}") diff --git a/tests/test_litellm/litellm_core_utils/test_token_counter.py b/tests/test_litellm/litellm_core_utils/test_token_counter.py index 3aa5f01246..324bace0e9 100644 --- a/tests/test_litellm/litellm_core_utils/test_token_counter.py +++ b/tests/test_litellm/litellm_core_utils/test_token_counter.py @@ -437,13 +437,38 @@ def test_gpt_4o_token_counter(): @pytest.mark.parametrize( "img_url", [ - "https://blog.purpureus.net/assets/blog/personal_key_rotation/simplified-asset-graph.jpg", + "https://example.com/test-image.png", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAL0AAAC9CAMAAADRCYwCAAAAh1BMVEX///8AAAD8/Pz5+fkEBAT39/cJCQn09PRNTU3y8vIMDAwzMzPe3t7v7+8QEBCOjo7FxcXR0dHn5+elpaWGhoYYGBivr686OjocHBy0tLQtLS1TU1PY2Ni6urpaWlpERER3d3ecnJxoaGiUlJRiYmIlJSU4ODhBQUFycnKAgIDBwcFnZ2chISE7EjuwAAAI/UlEQVR4nO1caXfiOgz1bhJIyAJhX1JoSzv8/9/3LNlpYd4rhX6o4/N8Z2lKM2cURZau5JsQEhERERERERERERERERERERHx/wBjhDPC3OGN8+Cc5JeMuheaETSdO8vZFyCScHtmz2CsktoeMn7rLM1u3h0PMAEhyYX7v/Q9wQvoGdB0hlbzm45lEq/wd6y6G9aezvBk9AXwp1r3LHJIRsh6s2maxaJpmvqgvkC7WFS3loUnaFJtKRVUCEoV/RpCnHRvAsesVQ1hw+vd7Mpo+424tLs72NplkvQgcdrsvXkW/zJWqH/fA0FT84M/xnQJt4to3+ZLuanbM6X5lfXKHosO9COgREqpCR5i86pf2zPS7j9tTj+9nO7bQz3+xGEyGW9zqgQ1tyQ/VsxEDvce/4dcUPNb5OD9yXvR4Z2QisuP0xiGWPnemgugU5q/troHhGEjIF5sTOyW648aC0TssuaaCEsYEIkGzjWXOp3A0vVsf6kgRyqaDk+T7DIVWrb58b2tT5xpUucKwodOD/5LbrZC1ws6YSaBZJ/8xlh+XZSYXaMJ2ezNqjB3IPXuehPcx2U6b4t1dS/xNdFzguUt8ie7arnPeyCZroxLHzGgGdqVcspwafizPWEXBee+9G1OaufGdvNng/9C+gwgZ3PH3r87G6zXTZ5D5De2G2DeFoANXfbACkT+fxBQ22YFsTTJF9hjFVO6VbqxZXko4WJ8s52P4PnuxO5KRzu0/hlix1ySt8iXjgaQ+4IHPA9nVzNkdduM9LFT/Aacj4FtKrHA7iAw602Vnht6R8Vq1IOS+wNMKLYqayAYfRuufQPGeGb7sZogQQoLZrGPgZ6KoYn70Iw30O92BNEDpvwouCFn6wH2uS+EhRb3WF/HObZk3HuxfRQM3Y/Of/VH0n4MKNHZDiZvO9+m/ABALfkOcuar/7nOo7B95ACGVAFaz4jMiJwJhdaHBkySmzlGTu82gr6FSTik2kJvLnY9nOd/D90qcH268m3I/cgI1xg1maE5CuZYaWLH+UHANCIck0yt7Mx5zBm5vVHXHwChsZ35kKqUpmo5Svq5/fzfAI5g2vDtFPYo1HiEA85QrDeGm9g//LG7K0scO3sdpj2CBDgCa+0OFs0bkvVgnnM/QBDwllOMm+cN7vMSHlB7Uu4haHKaTwgGkv8tlK+hP8fzmFuK/RQTpaLPWvbd58yWIo66HHM0OsPoPhVqmtaEVL7N+wYcTLTbb0DLdgp23Eyy2VYJ2N7bkLFAAibtoLPe5sLt6Oa2bvU+zyeMa8wrixO0gRTn9tO9NCSThTLGqcqtsDvphlfmx/cPBZVvw24jg1LE2lPuEo35Mhi58U0I/Ga8n5w+NS8i34MAQLos5B1u0xL1ZvCVYVRw/Fs2q53KLaXJMWwOZZ/4MPYV19bAHmgGDKB6f01xoeJKFbl63q9J34KdaVNPJWztQyRkzA3KNs1AdAEDowMxh10emXTCx75CkurtbY/ZpdNDGdsn2UcHKHsQ8Ai3WZi48IfkvtjOhsLpuIRSKZTX9FA4o+0d6o/zOWqQzVJMynL9NsxhSJOaourq6nBVQBueMSyubsX2xHrmuABZN2Ns9jr5nwLFlLF/2R6atjW/67Yd11YQ1Z+kA9Zk9dPTM/o6dVo6HHVgC0JR8oUfmI93T9u3gvTG94bAH02Y5xeqRcjuwnKCK6Q2+ajl8KXJ3GSh22P3Zfx6S+n008ROhJn+JRIUVu6o7OXl8w1SeyhuqNDwNI7SjbK08QrqPxS95jy4G7nCXVq6G3HNu0LtK5J0e226CfC005WKK9sVvfxI0eUbcnzutfhWe3rpZHM0nZ/ny/N8tanKYlQ6VEW5Xuym8yV1zZX58vwGhZp/5tFfhybZabdbrQYOs8F+xEhmPsb0/nki6kIyVvzZzUASiOrTfF+Sj9bXC7DoJxeiV8tjQL6loSd0yCx7YyB6rPdLx31U2qCG3F/oXIuDuqd6LFO+4DNIJuxFZqSsU0ea88avovFnWKRYFYRQDfCfcGaBCLn4M4A1ntJ5E57vicwqq2enaZEF5nokCYu9TbKqCC5yCDfL+GhLxT4w4xEJs+anqgou8DOY2q8FMryjb2MehC1dRJ9s4g9NXeTwPkWON4RH+FhIe0AWR/S9ekvQ+t70XHeimGF78LzuU7d7PwrswdIG2VpgF8C53qVQsTDtBJc4CdnkQPbnZY9mbPdDFra3PCXBBQ5QBn2aQqtyhvlyYM4Hb2/mdhsxCUen04GZVvIJZw5PAamMOmjzq8Q+dzAKLXDQ3RUZItWsg4t7W2DP+JDrJDymoMH7E5zQtuEpG03GTIjGCW3LQqOYEsXgFc78x76NeRwY6SNM+IfQoh6myJKRBIcLYxZcwscJ/gI2isTBty2Po9IkYzP0/SS4hGlxRjFAG5z1Jt1LckiB57yWvo35EaolbvA+6fBa24xodL2YjsPpTnj3JgJOqhcgOeLVsYYwoK0wjY+m1D3rGc40CukkaHnkEjarlXrF1B9M6ECQ6Ow0V7R7N4G3LfOHAXtymoyXOb4QhaYHJ/gNBJUkxclpSs7DNcgWWDDmM7Ke5MJpGuioe7w5EOvfTunUKRzOh7G2ylL+6ynHrD54oQO3//cN3yVO+5qMVsPZq0CZIOx4TlcJ8+Vz7V5waL+7WekzUpRFMTnnTlSCq3X5usi8qmIleW/rit1+oQZn1WGSU/sKBYEqMNh1mBOc6PhK8yCfKHdUNQk8o/G19ZPTs5MYfai+DLs5vmee37zEyyH48WW3XA6Xw6+Az8lMhci7N/KleToo7PtTKm+RA887Kqc6E9dyqL/QPTugzMHLbLZtJKqKLFfzVWRNJ63c+95uWT/F7R0U5dDVvuS409AJXhJvD0EwWaWdW8UN11u/7+umaYjT8mJtzZwP/MD4r57fihiHlC5fylHfaqnJdro+Dr7DajvO+vi2EwyD70s8nCH71nzIO1l5Zl+v1DMCb5ebvCMkGHvobXy/hPumGLyX0218/3RyD1GRLOuf9u/OGQyDmto32yMiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIv7GP8YjWPR/czH2AAAAAElFTkSuQmCC", ], ) -def test_img_url_token_counter(img_url): +def test_img_url_token_counter(img_url, monkeypatch): + """ + Verify get_image_dimensions returns valid (width, height) for both an + HTTPS URL and a base64 data URI. The HTTPS branch is exercised with a + mocked HTTP fetch so the test is hermetic - it can't break when a + third-party image URL goes away. + """ + import base64 from litellm.litellm_core_utils.token_counter import get_image_dimensions + # Minimal valid 1x1 PNG, served by the mocked safe_get for the URL case. + _tiny_png = base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=" + ) + + if img_url.startswith(("http://", "https://")): + + class _FakeResponse: + headers = {"Content-Length": str(len(_tiny_png))} + + def read(self): + return _tiny_png + + monkeypatch.setattr( + "litellm.litellm_core_utils.token_counter.safe_get", + lambda client, url, **kw: _FakeResponse(), + ) + width, height = get_image_dimensions(data=img_url) print(width, height)