From bd2d0ad519cbd95e5ee60bd9b1c1ea60f46f5336 Mon Sep 17 00:00:00 2001 From: Shivam Rawat Date: Tue, 26 May 2026 11:40:44 -0700 Subject: [PATCH] Litellm OpenAI double prefix bug (#28661) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix token cost lookup when deployment ids repeat the provider prefix. Router configs may expose models like openai/openai/; normalize those strings before joining provider/model so model_cost resolves correctly. Co-authored-by: Cursor * scope duplicate-prefix cost fix to explicit providers; isolate test patch state Co-authored-by: Cursor * fix(mypy): narrow custom_llm_provider after resolution in cost_per_token Co-authored-by: Cursor * fix(cost_calculator): guard provider-prefix dedup against non-string model The provider-prefix dedup loop assumed `model` is always a string. When a non-string is passed (e.g. a MagicMock from a mocked transport in router tests), `model.startswith(...)` is always truthy and each slice returns a new object, so the loop never terminates — it spins and OOM-kills the test worker (observed as the litellm_router_testing CI regression, e.g. test_router_pattern_match_e2e). Only run the string-based dedup and prefix-join when `model` is actually a str, preserving the previous graceful behavior for non-string inputs. --------- Co-authored-by: Cursor Co-authored-by: mateo-berri <277851410+mateo-berri@users.noreply.github.com> --- litellm/cost_calculator.py | 34 ++++++++++++++- tests/test_litellm/test_cost_calculator.py | 50 ++++++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index ab882559d3..1d4c57df42 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -417,9 +417,36 @@ def cost_per_token( # noqa: PLR0915 prompt_tokens_cost_usd_dollar: float = 0 completion_tokens_cost_usd_dollar: float = 0 model_cost_ref = litellm.model_cost + # Only callers that explicitly pass `custom_llm_provider` get the + # dedup/prefix-join treatment. When provider is omitted, preserve legacy + # behavior: `model_with_provider` stays equal to the raw `model` string + # (provider is detected below for downstream use only). + caller_supplied_provider = custom_llm_provider is not None + + # `model` is normally a string, but callers that mock the transport can pass + # non-string objects. Only run the string-based dedup/prefix-join when it is + # actually a string — e.g. a MagicMock's `.startswith()` is always truthy and + # its slices return new mocks, which would spin the dedup loop forever. + model_is_str = isinstance(model, str) + + # Router/proxy deployments may repeat the provider segment (e.g. model_name + # "openai/openai/gpt-5.5"). Strip duplicated `{provider}/` chains before joining. + if caller_supplied_provider and model_is_str: + _dup_prefix = f"{custom_llm_provider}/" + while model.startswith(_dup_prefix): + _remainder = model[len(_dup_prefix) :] + if _remainder.startswith(_dup_prefix): + model = _remainder + else: + break + model_with_provider = model - if custom_llm_provider is not None: - model_with_provider = custom_llm_provider + "/" + model + if caller_supplied_provider: + _prov_prefix = f"{custom_llm_provider}/" + if model_is_str and model.startswith(_prov_prefix): + model_with_provider = model + else: + model_with_provider = f"{custom_llm_provider}/{model}" if region_name is not None: model_with_provider_and_region = ( f"{custom_llm_provider}/{region_name}/{model}" @@ -430,6 +457,9 @@ def cost_per_token( # noqa: PLR0915 model_with_provider = model_with_provider_and_region else: _, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model) + + assert custom_llm_provider is not None # caller-supplied or get_llm_provider + model_without_prefix = model model_parts = model.split("/", 1) if len(model_parts) > 1: diff --git a/tests/test_litellm/test_cost_calculator.py b/tests/test_litellm/test_cost_calculator.py index 00902890da..1a9bf5a942 100644 --- a/tests/test_litellm/test_cost_calculator.py +++ b/tests/test_litellm/test_cost_calculator.py @@ -13,6 +13,7 @@ from pydantic import BaseModel import litellm from litellm.cost_calculator import ( completion_cost, + cost_per_token, handle_realtime_stream_cost_calculation, response_cost_calculator, ) @@ -21,6 +22,55 @@ from litellm.types.utils import ModelResponse, PromptTokensDetailsWrapper, Usage from litellm.utils import TranscriptionResponse +def test_cost_per_token_duplicate_openai_prefix_matches_model_cost(monkeypatch): + """ + Router/proxy configs may use deployment ids like openai/openai/. Cost lookup must + resolve to model_prices keys (e.g. gpt-5.5), not fail or multiply prefixes. + """ + monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True") + monkeypatch.setattr(litellm, "model_cost", litellm.get_model_cost_map(url="")) + + prompt_usd, completion_usd = cost_per_token( + model="openai/openai/gpt-5.5", + prompt_tokens=100, + completion_tokens=50, + custom_llm_provider="openai", + ) + + assert prompt_usd + completion_usd > 0 + + +def test_cost_per_token_non_string_model_does_not_hang(): + """ + The provider-prefix dedup loop must not spin forever when `model` is a + non-string object (e.g. a MagicMock from a mocked transport). It should + return or raise promptly instead of looping on a truthy `.startswith()`. + """ + import threading + from unittest.mock import MagicMock + + result: dict = {} + + def _run(): + try: + cost_per_token( + model=MagicMock(), + prompt_tokens=10, + completion_tokens=5, + custom_llm_provider="anthropic", + ) + result["status"] = "returned" + except Exception: + result["status"] = "raised" + + worker = threading.Thread(target=_run, daemon=True) + worker.start() + worker.join(timeout=10) + + assert not worker.is_alive(), "cost_per_token hung on a non-string model" + assert result.get("status") in ("returned", "raised") + + def test_completion_cost_uses_response_model_for_dynamic_routing(): """ Test that completion_cost uses the model from the response object