Litellm OpenAI double prefix bug (#28661)

* Fix token cost lookup when deployment ids repeat the provider prefix.

Router configs may expose models like openai/openai/<model>; normalize those
strings before joining provider/model so model_cost resolves correctly.

Co-authored-by: Cursor <cursoragent@cursor.com>

* scope duplicate-prefix cost fix to explicit providers; isolate test patch state

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix(mypy): narrow custom_llm_provider after resolution in cost_per_token

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix(cost_calculator): guard provider-prefix dedup against non-string model

The provider-prefix dedup loop assumed `model` is always a string. When a
non-string is passed (e.g. a MagicMock from a mocked transport in router
tests), `model.startswith(...)` is always truthy and each slice returns a new
object, so the loop never terminates — it spins and OOM-kills the test worker
(observed as the litellm_router_testing CI regression, e.g.
test_router_pattern_match_e2e). Only run the string-based dedup and prefix-join
when `model` is actually a str, preserving the previous graceful behavior for
non-string inputs.

---------

Co-authored-by: Cursor <cursoragent@cursor.com>
Co-authored-by: mateo-berri <277851410+mateo-berri@users.noreply.github.com>
This commit is contained in:
Shivam Rawat 2026-05-26 11:40:44 -07:00 committed by GitHub
parent fbff60e9d9
commit bd2d0ad519
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 82 additions and 2 deletions

View File

@ -417,9 +417,36 @@ def cost_per_token( # noqa: PLR0915
prompt_tokens_cost_usd_dollar: float = 0
completion_tokens_cost_usd_dollar: float = 0
model_cost_ref = litellm.model_cost
# Only callers that explicitly pass `custom_llm_provider` get the
# dedup/prefix-join treatment. When provider is omitted, preserve legacy
# behavior: `model_with_provider` stays equal to the raw `model` string
# (provider is detected below for downstream use only).
caller_supplied_provider = custom_llm_provider is not None
# `model` is normally a string, but callers that mock the transport can pass
# non-string objects. Only run the string-based dedup/prefix-join when it is
# actually a string — e.g. a MagicMock's `.startswith()` is always truthy and
# its slices return new mocks, which would spin the dedup loop forever.
model_is_str = isinstance(model, str)
# Router/proxy deployments may repeat the provider segment (e.g. model_name
# "openai/openai/gpt-5.5"). Strip duplicated `{provider}/` chains before joining.
if caller_supplied_provider and model_is_str:
_dup_prefix = f"{custom_llm_provider}/"
while model.startswith(_dup_prefix):
_remainder = model[len(_dup_prefix) :]
if _remainder.startswith(_dup_prefix):
model = _remainder
else:
break
model_with_provider = model
if custom_llm_provider is not None:
model_with_provider = custom_llm_provider + "/" + model
if caller_supplied_provider:
_prov_prefix = f"{custom_llm_provider}/"
if model_is_str and model.startswith(_prov_prefix):
model_with_provider = model
else:
model_with_provider = f"{custom_llm_provider}/{model}"
if region_name is not None:
model_with_provider_and_region = (
f"{custom_llm_provider}/{region_name}/{model}"
@ -430,6 +457,9 @@ def cost_per_token( # noqa: PLR0915
model_with_provider = model_with_provider_and_region
else:
_, custom_llm_provider, _, _ = litellm.get_llm_provider(model=model)
assert custom_llm_provider is not None # caller-supplied or get_llm_provider
model_without_prefix = model
model_parts = model.split("/", 1)
if len(model_parts) > 1:

View File

@ -13,6 +13,7 @@ from pydantic import BaseModel
import litellm
from litellm.cost_calculator import (
completion_cost,
cost_per_token,
handle_realtime_stream_cost_calculation,
response_cost_calculator,
)
@ -21,6 +22,55 @@ from litellm.types.utils import ModelResponse, PromptTokensDetailsWrapper, Usage
from litellm.utils import TranscriptionResponse
def test_cost_per_token_duplicate_openai_prefix_matches_model_cost(monkeypatch):
"""
Router/proxy configs may use deployment ids like openai/openai/<model>. Cost lookup must
resolve to model_prices keys (e.g. gpt-5.5), not fail or multiply prefixes.
"""
monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
monkeypatch.setattr(litellm, "model_cost", litellm.get_model_cost_map(url=""))
prompt_usd, completion_usd = cost_per_token(
model="openai/openai/gpt-5.5",
prompt_tokens=100,
completion_tokens=50,
custom_llm_provider="openai",
)
assert prompt_usd + completion_usd > 0
def test_cost_per_token_non_string_model_does_not_hang():
"""
The provider-prefix dedup loop must not spin forever when `model` is a
non-string object (e.g. a MagicMock from a mocked transport). It should
return or raise promptly instead of looping on a truthy `.startswith()`.
"""
import threading
from unittest.mock import MagicMock
result: dict = {}
def _run():
try:
cost_per_token(
model=MagicMock(),
prompt_tokens=10,
completion_tokens=5,
custom_llm_provider="anthropic",
)
result["status"] = "returned"
except Exception:
result["status"] = "raised"
worker = threading.Thread(target=_run, daemon=True)
worker.start()
worker.join(timeout=10)
assert not worker.is_alive(), "cost_per_token hung on a non-string model"
assert result.get("status") in ("returned", "raised")
def test_completion_cost_uses_response_model_for_dynamic_routing():
"""
Test that completion_cost uses the model from the response object