From 92de7423efca5756a2cb1bcf3228812628f91960 Mon Sep 17 00:00:00 2001
From: yuneng-jiang <yuneng@berri.ai>
Date: Tue, 19 May 2026 14:48:30 -0700
Subject: [PATCH] fix(tests): replace shut-down gpt-4o-audio-preview with
 gpt-audio-1.5 (#28281)

* fix(tests): replace shut-down gpt-4o-audio-preview with gpt-audio-1.5

OpenAI shut down gpt-4o-audio-preview on 2026-05-07, so the live audio
calls in test_stream_chunk_builder_openai_audio_output_usage and
test_standard_logging_payload_audio now hard-fail with a model-not-found
error on every PR. The error was not "openai-internal", so the except
block swallowed it and execution fell through to an unbound
completion/response (UnboundLocalError).

Switch both tests to gpt-audio-1.5, OpenAI's recommended successor
(GA, not deprecated, already present in the litellm cost map so the
response_cost assertion still resolves). Also broaden the except to
skip with the real error in the reason instead of crashing, so a
transient upstream blip can't reintroduce the UnboundLocalError.

* fix(tests): narrow audio-test skip to model-not-found, re-raise the rest

Address review feedback: an unconditional skip on any exception would
silently mask a litellm-internal regression in the audio path (broken
param transformation, serialization, bad header) instead of failing CI.

Skip only on the upstream-unavailable class (model_not_found / "does not
exist" / openai-internal) and re-raise everything else, so genuine
regressions still fail loudly. The UnboundLocalError is still fixed
because the handler either skips or raises - it never falls through.

* fix(tests): add budget_exceeded to expected Interaction status enum

Staging added budget_exceeded to the Interaction OpenAPI status enum; the staging merge into this branch picked up the spec change but not the matching test update, so test_status_enum_values failed in CI. Align the test's expected list (exact-match by design) with the live spec.

* fix(tests): mock HTTP fetch in test_img_url_token_counter

The test parameterized a live third-party image URL (blog.purpureus.net) which now 404s, causing get_image_dimensions to fall through to its base64 decode path and crash with 'not enough values to unpack' on every PR run. Mock safe_get with a tiny 1x1 PNG so the URL branch is still exercised without any network dependency.

* fix(tests): swap gpt-4o-audio-preview to gpt-audio-1.5 in test_gpt4o_audio

OpenAI shut down gpt-4o-audio-preview on 2026-05-07, so both live tests in test_gpt4o_audio.py (test_audio_output_from_model and test_audio_input_to_model) hard-fail model_not_found on every PR. Swap the hardcoded model to OpenAI's successor gpt-audio-1.5 (same chat-completions audio surface; already in the litellm cost map). Mirror the narrowed-skip pattern from the prior audio fixes: skip on model_not_found / does-not-exist / openai-internal, re-raise everything else so genuine litellm regressions still fail CI loudly.
---
 tests/llm_translation/test_gpt4o_audio.py     | 25 +++++++++++-----
 .../test_custom_callback_input.py             | 12 ++++++--
 .../test_stream_chunk_builder.py              | 12 ++++++--
 .../interactions/test_openapi_compliance.py   |  6 +++-
 .../litellm_core_utils/test_token_counter.py  | 29 +++++++++++++++++--
 5 files changed, 68 insertions(+), 16 deletions(-)

diff --git a/tests/llm_translation/test_gpt4o_audio.py b/tests/llm_translation/test_gpt4o_audio.py
index 169fe85516..a50d07406d 100644
--- a/tests/llm_translation/test_gpt4o_audio.py
+++ b/tests/llm_translation/test_gpt4o_audio.py
@@ -59,7 +59,7 @@ async def test_audio_output_from_model(stream):
     litellm.set_verbose = False
     try:
         completion = await litellm.acompletion(
-            model="gpt-4o-audio-preview",
+            model="gpt-audio-1.5",
             modalities=["text", "audio"],
             audio={"voice": "alloy", "format": "pcm16"},
             messages=[{"role": "user", "content": "response in 1 word - yes or no"}],
@@ -69,8 +69,14 @@ async def test_audio_output_from_model(stream):
         print(e)
         pytest.skip("Skipping test due to timeout")
     except Exception as e:
-        if "openai-internal" in str(e):
-            pytest.skip("Skipping test due to openai-internal error")
+        err = str(e).lower()
+        if (
+            "model_not_found" in err
+            or "does not exist" in err
+            or "openai-internal" in err
+        ):
+            pytest.skip(f"Skipping - upstream gpt-audio-1.5 unavailable: {e}")
+        raise
 
     if stream is True:
         await check_streaming_response(completion)
@@ -85,7 +91,7 @@ async def test_audio_output_from_model(stream):
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("stream", [True, False])
-@pytest.mark.parametrize("model", ["gpt-4o-audio-preview"])  # "gpt-4o-audio-preview",
+@pytest.mark.parametrize("model", ["gpt-audio-1.5"])
 async def test_audio_input_to_model(stream, model):
     # Fetch the audio file and convert it to a base64 encoded string
     audio_format = "pcm16"
@@ -121,9 +127,14 @@ async def test_audio_input_to_model(stream, model):
         print(e)
         pytest.skip("Skipping test due to timeout")
     except Exception as e:
-        if "openai-internal" in str(e):
-            pytest.skip("Skipping test due to openai-internal error")
-        raise e
+        err = str(e).lower()
+        if (
+            "model_not_found" in err
+            or "does not exist" in err
+            or "openai-internal" in err
+        ):
+            pytest.skip(f"Skipping - upstream gpt-audio-1.5 unavailable: {e}")
+        raise
     if stream is True:
         await check_streaming_response(completion)
     else:
diff --git a/tests/local_testing/test_custom_callback_input.py b/tests/local_testing/test_custom_callback_input.py
index 545039e60b..6a4ec9206f 100644
--- a/tests/local_testing/test_custom_callback_input.py
+++ b/tests/local_testing/test_custom_callback_input.py
@@ -1125,7 +1125,7 @@ def test_standard_logging_payload_audio(turn_off_message_logging, stream):
     ) as mock_client:
         try:
             response = litellm.completion(
-                model="gpt-4o-audio-preview",
+                model="gpt-audio-1.5",
                 modalities=["text", "audio"],
                 audio={"voice": "alloy", "format": "pcm16"},
                 messages=[
@@ -1134,8 +1134,14 @@ def test_standard_logging_payload_audio(turn_off_message_logging, stream):
                 stream=stream,
             )
         except Exception as e:
-            if "openai-internal" in str(e):
-                pytest.skip("Skipping test due to openai-internal error")
+            err = str(e).lower()
+            if (
+                "model_not_found" in err
+                or "does not exist" in err
+                or "openai-internal" in err
+            ):
+                pytest.skip(f"Skipping - upstream gpt-audio-1.5 unavailable: {e}")
+            raise
 
         if stream:
             for chunk in response:
diff --git a/tests/local_testing/test_stream_chunk_builder.py b/tests/local_testing/test_stream_chunk_builder.py
index 24fdf49c16..38e04b93f1 100644
--- a/tests/local_testing/test_stream_chunk_builder.py
+++ b/tests/local_testing/test_stream_chunk_builder.py
@@ -649,7 +649,7 @@ def test_stream_chunk_builder_openai_audio_output_usage():
 
     try:
         completion = client.chat.completions.create(
-            model="gpt-4o-audio-preview",
+            model="gpt-audio-1.5",
             modalities=["text", "audio"],
             audio={"voice": "alloy", "format": "pcm16"},
             messages=[{"role": "user", "content": "response in 1 word - yes or no"}],
@@ -657,8 +657,14 @@ def test_stream_chunk_builder_openai_audio_output_usage():
             stream_options={"include_usage": True},
         )
     except Exception as e:
-        if "openai-internal" in str(e):
-            pytest.skip("Skipping test due to openai-internal error")
+        err = str(e).lower()
+        if (
+            "model_not_found" in err
+            or "does not exist" in err
+            or "openai-internal" in err
+        ):
+            pytest.skip(f"Skipping - upstream gpt-audio-1.5 unavailable: {e}")
+        raise
 
     chunks = []
     for chunk in completion:
diff --git a/tests/test_litellm/interactions/test_openapi_compliance.py b/tests/test_litellm/interactions/test_openapi_compliance.py
index 1d3b6b8ae1..aededaaca7 100644
--- a/tests/test_litellm/interactions/test_openapi_compliance.py
+++ b/tests/test_litellm/interactions/test_openapi_compliance.py
@@ -179,7 +179,10 @@ class TestResponseCompliance:
         # `status` is an output-only field; validate against the response schema.
         schema = spec_dict["components"]["schemas"]["Interaction"]
         status_prop = schema["properties"]["status"]
-        # Google Interactions API uses lowercase status values (updated Feb 2026)
+        # Google Interactions API uses lowercase status values (updated Feb 2026).
+        # Keep this an exact match: this test intentionally breaks CI when
+        # Google changes the live spec — that breakage is how we get notified
+        # to review the change.
         expected_statuses = [
             "in_progress",
             "requires_action",
@@ -187,6 +190,7 @@ class TestResponseCompliance:
             "failed",
             "cancelled",
             "incomplete",
+            "budget_exceeded",
         ]
         assert status_prop["enum"] == expected_statuses
         print(f"✓ Status enum values: {expected_statuses}")
diff --git a/tests/test_litellm/litellm_core_utils/test_token_counter.py b/tests/test_litellm/litellm_core_utils/test_token_counter.py
index 3aa5f01246..324bace0e9 100644
--- a/tests/test_litellm/litellm_core_utils/test_token_counter.py
+++ b/tests/test_litellm/litellm_core_utils/test_token_counter.py
@@ -437,13 +437,38 @@ def test_gpt_4o_token_counter():
 @pytest.mark.parametrize(
     "img_url",
     [
-        "https://blog.purpureus.net/assets/blog/personal_key_rotation/simplified-asset-graph.jpg",
+        "https://example.com/test-image.png",
         "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAL0AAAC9CAMAAADRCYwCAAAAh1BMVEX///8AAAD8/Pz5+fkEBAT39/cJCQn09PRNTU3y8vIMDAwzMzPe3t7v7+8QEBCOjo7FxcXR0dHn5+elpaWGhoYYGBivr686OjocHBy0tLQtLS1TU1PY2Ni6urpaWlpERER3d3ecnJxoaGiUlJRiYmIlJSU4ODhBQUFycnKAgIDBwcFnZ2chISE7EjuwAAAI/UlEQVR4nO1caXfiOgz1bhJIyAJhX1JoSzv8/9/3LNlpYd4rhX6o4/N8Z2lKM2cURZau5JsQEhERERERERERERERERERERHx/wBjhDPC3OGN8+Cc5JeMuheaETSdO8vZFyCScHtmz2CsktoeMn7rLM1u3h0PMAEhyYX7v/Q9wQvoGdB0hlbzm45lEq/wd6y6G9aezvBk9AXwp1r3LHJIRsh6s2maxaJpmvqgvkC7WFS3loUnaFJtKRVUCEoV/RpCnHRvAsesVQ1hw+vd7Mpo+424tLs72NplkvQgcdrsvXkW/zJWqH/fA0FT84M/xnQJt4to3+ZLuanbM6X5lfXKHosO9COgREqpCR5i86pf2zPS7j9tTj+9nO7bQz3+xGEyGW9zqgQ1tyQ/VsxEDvce/4dcUPNb5OD9yXvR4Z2QisuP0xiGWPnemgugU5q/troHhGEjIF5sTOyW648aC0TssuaaCEsYEIkGzjWXOp3A0vVsf6kgRyqaDk+T7DIVWrb58b2tT5xpUucKwodOD/5LbrZC1ws6YSaBZJ/8xlh+XZSYXaMJ2ezNqjB3IPXuehPcx2U6b4t1dS/xNdFzguUt8ie7arnPeyCZroxLHzGgGdqVcspwafizPWEXBee+9G1OaufGdvNng/9C+gwgZ3PH3r87G6zXTZ5D5De2G2DeFoANXfbACkT+fxBQ22YFsTTJF9hjFVO6VbqxZXko4WJ8s52P4PnuxO5KRzu0/hlix1ySt8iXjgaQ+4IHPA9nVzNkdduM9LFT/Aacj4FtKrHA7iAw602Vnht6R8Vq1IOS+wNMKLYqayAYfRuufQPGeGb7sZogQQoLZrGPgZ6KoYn70Iw30O92BNEDpvwouCFn6wH2uS+EhRb3WF/HObZk3HuxfRQM3Y/Of/VH0n4MKNHZDiZvO9+m/ABALfkOcuar/7nOo7B95ACGVAFaz4jMiJwJhdaHBkySmzlGTu82gr6FSTik2kJvLnY9nOd/D90qcH268m3I/cgI1xg1maE5CuZYaWLH+UHANCIck0yt7Mx5zBm5vVHXHwChsZ35kKqUpmo5Svq5/fzfAI5g2vDtFPYo1HiEA85QrDeGm9g//LG7K0scO3sdpj2CBDgCa+0OFs0bkvVgnnM/QBDwllOMm+cN7vMSHlB7Uu4haHKaTwgGkv8tlK+hP8fzmFuK/RQTpaLPWvbd58yWIo66HHM0OsPoPhVqmtaEVL7N+wYcTLTbb0DLdgp23Eyy2VYJ2N7bkLFAAibtoLPe5sLt6Oa2bvU+zyeMa8wrixO0gRTn9tO9NCSThTLGqcqtsDvphlfmx/cPBZVvw24jg1LE2lPuEo35Mhi58U0I/Ga8n5w+NS8i34MAQLos5B1u0xL1ZvCVYVRw/Fs2q53KLaXJMWwOZZ/4MPYV19bAHmgGDKB6f01xoeJKFbl63q9J34KdaVNPJWztQyRkzA3KNs1AdAEDowMxh10emXTCx75CkurtbY/ZpdNDGdsn2UcHKHsQ8Ai3WZi48IfkvtjOhsLpuIRSKZTX9FA4o+0d6o/zOWqQzVJMynL9NsxhSJOaourq6nBVQBueMSyubsX2xHrmuABZN2Ns9jr5nwLFlLF/2R6atjW/67Yd11YQ1Z+kA9Zk9dPTM/o6dVo6HHVgC0JR8oUfmI93T9u3gvTG94bAH02Y5xeqRcjuwnKCK6Q2+ajl8KXJ3GSh22P3Zfx6S+n008ROhJn+JRIUVu6o7OXl8w1SeyhuqNDwNI7SjbK08QrqPxS95jy4G7nCXVq6G3HNu0LtK5J0e226CfC005WKK9sVvfxI0eUbcnzutfhWe3rpZHM0nZ/ny/N8tanKYlQ6VEW5Xuym8yV1zZX58vwGhZp/5tFfhybZabdbrQYOs8F+xEhmPsb0/nki6kIyVvzZzUASiOrTfF+Sj9bXC7DoJxeiV8tjQL6loSd0yCx7YyB6rPdLx31U2qCG3F/oXIuDuqd6LFO+4DNIJuxFZqSsU0ea88avovFnWKRYFYRQDfCfcGaBCLn4M4A1ntJ5E57vicwqq2enaZEF5nokCYu9TbKqCC5yCDfL+GhLxT4w4xEJs+anqgou8DOY2q8FMryjb2MehC1dRJ9s4g9NXeTwPkWON4RH+FhIe0AWR/S9ekvQ+t70XHeimGF78LzuU7d7PwrswdIG2VpgF8C53qVQsTDtBJc4CdnkQPbnZY9mbPdDFra3PCXBBQ5QBn2aQqtyhvlyYM4Hb2/mdhsxCUen04GZVvIJZw5PAamMOmjzq8Q+dzAKLXDQ3RUZItWsg4t7W2DP+JDrJDymoMH7E5zQtuEpG03GTIjGCW3LQqOYEsXgFc78x76NeRwY6SNM+IfQoh6myJKRBIcLYxZcwscJ/gI2isTBty2Po9IkYzP0/SS4hGlxRjFAG5z1Jt1LckiB57yWvo35EaolbvA+6fBa24xodL2YjsPpTnj3JgJOqhcgOeLVsYYwoK0wjY+m1D3rGc40CukkaHnkEjarlXrF1B9M6ECQ6Ow0V7R7N4G3LfOHAXtymoyXOb4QhaYHJ/gNBJUkxclpSs7DNcgWWDDmM7Ke5MJpGuioe7w5EOvfTunUKRzOh7G2ylL+6ynHrD54oQO3//cN3yVO+5qMVsPZq0CZIOx4TlcJ8+Vz7V5waL+7WekzUpRFMTnnTlSCq3X5usi8qmIleW/rit1+oQZn1WGSU/sKBYEqMNh1mBOc6PhK8yCfKHdUNQk8o/G19ZPTs5MYfai+DLs5vmee37zEyyH48WW3XA6Xw6+Az8lMhci7N/KleToo7PtTKm+RA887Kqc6E9dyqL/QPTugzMHLbLZtJKqKLFfzVWRNJ63c+95uWT/F7R0U5dDVvuS409AJXhJvD0EwWaWdW8UN11u/7+umaYjT8mJtzZwP/MD4r57fihiHlC5fylHfaqnJdro+Dr7DajvO+vi2EwyD70s8nCH71nzIO1l5Zl+v1DMCb5ebvCMkGHvobXy/hPumGLyX0218/3RyD1GRLOuf9u/OGQyDmto32yMiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIv7GP8YjWPR/czH2AAAAAElFTkSuQmCC",
     ],
 )
-def test_img_url_token_counter(img_url):
+def test_img_url_token_counter(img_url, monkeypatch):
+    """
+    Verify get_image_dimensions returns valid (width, height) for both an
+    HTTPS URL and a base64 data URI. The HTTPS branch is exercised with a
+    mocked HTTP fetch so the test is hermetic - it can't break when a
+    third-party image URL goes away.
+    """
+    import base64
     from litellm.litellm_core_utils.token_counter import get_image_dimensions
 
+    # Minimal valid 1x1 PNG, served by the mocked safe_get for the URL case.
+    _tiny_png = base64.b64decode(
+        "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII="
+    )
+
+    if img_url.startswith(("http://", "https://")):
+
+        class _FakeResponse:
+            headers = {"Content-Length": str(len(_tiny_png))}
+
+            def read(self):
+                return _tiny_png
+
+        monkeypatch.setattr(
+            "litellm.litellm_core_utils.token_counter.safe_get",
+            lambda client, url, **kw: _FakeResponse(),
+        )
+
     width, height = get_image_dimensions(data=img_url)
 
     print(width, height)