* fix: coerce server_tool_use dict to ServerToolUse in Usage.__init__ (#26153) * fix: coerce server_tool_use to ServerToolUse in stream_chunk_builder (#26153) * fix: dict/pydantic-tolerant access in tool_call_cost_tracking (#26153) * fix: dict/pydantic-tolerant access in anthropic cost_calculation (#26153) * test: assert ServerToolUse type in existing stream_chunk_builder anthropic web search test * test: regression test for #26153 (stream_chunk_builder server_tool_use type) * test: dict/pydantic safety for tool_call_cost_tracking helper * test: dict/pydantic safety for anthropic web_search cost * refactor: consolidate _get_web_search_requests into shared cost-calc utils * test(realtime): use gpt-realtime; openai retired gpt-4o-realtime-preview OpenAI shut down the gpt-4o-realtime-preview family (incl. the undated alias) on 2026-05-07, causing the live realtime test to fail with a 4000 invalid_request_error.invalid_model close. gpt-realtime is the GA successor; switch the live-call tests to it, matching the base branch. * refactor(types): drop redundant server_tool_use coercion in Usage.__init__ --------- Co-authored-by: mateo-berri <277851410+mateo-berri@users.noreply.github.com>
This commit is contained in:
parent
6068bb7781
commit
4a3860df1f
@ -6,6 +6,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple
|
||||
|
||||
import litellm
|
||||
from litellm.constants import OPENAI_FILE_SEARCH_COST_PER_1K_CALLS
|
||||
from litellm.litellm_core_utils.llm_cost_calc.utils import _get_web_search_requests
|
||||
from litellm.types.llms.openai import (
|
||||
FileSearchTool,
|
||||
ResponsesAPIResponse,
|
||||
@ -339,8 +340,7 @@ class StandardBuiltInToolCostTracking:
|
||||
# and _handle_web_search_cost() is never called.
|
||||
if (
|
||||
hasattr(usage, "server_tool_use")
|
||||
and usage.server_tool_use is not None
|
||||
and usage.server_tool_use.web_search_requests is not None
|
||||
and _get_web_search_requests(usage.server_tool_use) is not None
|
||||
):
|
||||
return True
|
||||
return False
|
||||
@ -352,8 +352,7 @@ class StandardBuiltInToolCostTracking:
|
||||
elif usage is not None:
|
||||
if (
|
||||
hasattr(usage, "server_tool_use")
|
||||
and usage.server_tool_use is not None
|
||||
and usage.server_tool_use.web_search_requests is not None
|
||||
and _get_web_search_requests(usage.server_tool_use) is not None
|
||||
):
|
||||
return True
|
||||
elif (
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# What is this?
|
||||
## Helper utilities for cost_per_token()
|
||||
|
||||
from typing import Literal, Optional, Tuple, TypedDict, cast
|
||||
from typing import Any, Literal, Optional, Tuple, TypedDict, cast
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_logger
|
||||
@ -42,6 +42,26 @@ def _get_token_detail_value(details: object, key: str) -> Optional[int]:
|
||||
return value if isinstance(value, int) else None
|
||||
|
||||
|
||||
def _get_web_search_requests(server_tool_use: Any) -> Optional[int]:
|
||||
"""
|
||||
Tolerantly read ``web_search_requests`` from a ``server_tool_use`` value
|
||||
that may be ``None``, a ``dict``, a ``ServerToolUse`` pydantic instance,
|
||||
or any other object supporting attribute access.
|
||||
|
||||
Returns ``None`` when the value cannot be resolved — callers can
|
||||
distinguish "absent" from "zero" using ``is None``.
|
||||
|
||||
See https://github.com/BerriAI/litellm/issues/26153 — ``stream_chunk_builder``
|
||||
historically left this as a plain ``dict``, which broke direct attribute
|
||||
access in cost calculation.
|
||||
"""
|
||||
if server_tool_use is None:
|
||||
return None
|
||||
if isinstance(server_tool_use, dict):
|
||||
return server_tool_use.get("web_search_requests")
|
||||
return getattr(server_tool_use, "web_search_requests", None)
|
||||
|
||||
|
||||
def _is_above_128k(tokens: float) -> bool:
|
||||
if tokens > 128000:
|
||||
return True
|
||||
|
||||
@ -637,7 +637,18 @@ class ChunkProcessor:
|
||||
hasattr(usage_chunk, "server_tool_use")
|
||||
and usage_chunk.server_tool_use is not None
|
||||
):
|
||||
server_tool_use = usage_chunk.server_tool_use
|
||||
# Coerce dict to ServerToolUse so downstream cost-calc code
|
||||
# (which accesses .web_search_requests as an attribute)
|
||||
# doesn't raise AttributeError. Some providers / streaming
|
||||
# paths leave server_tool_use as a plain dict on the chunk.
|
||||
if isinstance(usage_chunk.server_tool_use, dict):
|
||||
server_tool_use = ServerToolUse(**usage_chunk.server_tool_use)
|
||||
elif isinstance(usage_chunk.server_tool_use, ServerToolUse):
|
||||
server_tool_use = usage_chunk.server_tool_use
|
||||
else:
|
||||
server_tool_use = ServerToolUse.model_validate(
|
||||
usage_chunk.server_tool_use
|
||||
)
|
||||
if (
|
||||
usage_chunk_dict["prompt_tokens_details"] is not None
|
||||
and getattr(
|
||||
|
||||
@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Optional, Tuple
|
||||
|
||||
from litellm.litellm_core_utils.llm_cost_calc.utils import (
|
||||
_get_token_base_cost,
|
||||
_get_web_search_requests,
|
||||
_parse_prompt_tokens_details,
|
||||
calculate_cache_writing_cost,
|
||||
generic_cost_per_token,
|
||||
@ -110,11 +111,12 @@ def get_cost_for_anthropic_web_search(
|
||||
if model_info is None:
|
||||
return 0.0
|
||||
|
||||
if (
|
||||
usage is None
|
||||
or usage.server_tool_use is None
|
||||
or usage.server_tool_use.web_search_requests is None
|
||||
):
|
||||
if usage is None:
|
||||
return 0.0
|
||||
web_search_requests = _get_web_search_requests(
|
||||
getattr(usage, "server_tool_use", None)
|
||||
)
|
||||
if web_search_requests is None:
|
||||
return 0.0
|
||||
|
||||
## Get the cost per web search request
|
||||
@ -128,5 +130,5 @@ def get_cost_for_anthropic_web_search(
|
||||
return 0.0
|
||||
|
||||
## Calculate the total cost
|
||||
total_cost = cost_per_web_search_request * usage.server_tool_use.web_search_requests
|
||||
total_cost = cost_per_web_search_request * web_search_requests
|
||||
return total_cost
|
||||
|
||||
@ -0,0 +1,88 @@
|
||||
"""
|
||||
Tests that the cost-tracking call sites tolerate ``server_tool_use`` being
|
||||
either a ``dict`` or a ``ServerToolUse`` pydantic instance.
|
||||
|
||||
See https://github.com/BerriAI/litellm/issues/26153.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../../../.."))
|
||||
|
||||
from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
|
||||
StandardBuiltInToolCostTracking,
|
||||
_get_web_search_requests,
|
||||
)
|
||||
from litellm.types.utils import ModelResponse, ServerToolUse, Usage
|
||||
|
||||
|
||||
class _UsageWithDictServerToolUse:
|
||||
"""
|
||||
Tiny stand-in that mimics the broken streaming-rebuild shape:
|
||||
``server_tool_use`` is a plain dict.
|
||||
"""
|
||||
|
||||
def __init__(self, server_tool_use):
|
||||
self.server_tool_use = server_tool_use
|
||||
self.prompt_tokens_details = None
|
||||
|
||||
|
||||
def test_get_web_search_requests_handles_none():
|
||||
assert _get_web_search_requests(None) is None
|
||||
|
||||
|
||||
def test_get_web_search_requests_handles_dict():
|
||||
assert _get_web_search_requests({"web_search_requests": 5}) == 5
|
||||
|
||||
|
||||
def test_get_web_search_requests_handles_dict_missing_key():
|
||||
assert _get_web_search_requests({}) is None
|
||||
|
||||
|
||||
def test_get_web_search_requests_handles_pydantic():
|
||||
stu = ServerToolUse(web_search_requests=7)
|
||||
assert _get_web_search_requests(stu) == 7
|
||||
|
||||
|
||||
def test_get_web_search_requests_handles_pydantic_with_none_value():
|
||||
stu = ServerToolUse()
|
||||
assert _get_web_search_requests(stu) is None
|
||||
|
||||
|
||||
def test_response_object_includes_web_search_call_with_dict_server_tool_use():
|
||||
"""
|
||||
The exact bug: ``usage.server_tool_use`` is a dict and the check in
|
||||
``response_object_includes_web_search_call`` used to crash with
|
||||
``AttributeError``.
|
||||
"""
|
||||
response = ModelResponse()
|
||||
usage = _UsageWithDictServerToolUse({"web_search_requests": 2})
|
||||
|
||||
# Must not raise — and must correctly detect the web search call.
|
||||
result = StandardBuiltInToolCostTracking.response_object_includes_web_search_call(
|
||||
response_object=response, usage=usage # type: ignore[arg-type]
|
||||
)
|
||||
assert result is True
|
||||
|
||||
|
||||
def test_response_object_includes_web_search_call_with_pydantic_server_tool_use():
|
||||
response = ModelResponse()
|
||||
usage = _UsageWithDictServerToolUse(ServerToolUse(web_search_requests=2))
|
||||
|
||||
result = StandardBuiltInToolCostTracking.response_object_includes_web_search_call(
|
||||
response_object=response, usage=usage # type: ignore[arg-type]
|
||||
)
|
||||
assert result is True
|
||||
|
||||
|
||||
def test_response_object_includes_web_search_call_with_none_server_tool_use():
|
||||
response = ModelResponse()
|
||||
usage = _UsageWithDictServerToolUse(None)
|
||||
|
||||
result = StandardBuiltInToolCostTracking.response_object_includes_web_search_call(
|
||||
response_object=response, usage=usage # type: ignore[arg-type]
|
||||
)
|
||||
assert result is False
|
||||
@ -0,0 +1,130 @@
|
||||
"""
|
||||
Regression tests for https://github.com/BerriAI/litellm/issues/26153
|
||||
|
||||
``stream_chunk_builder`` used to leave ``usage.server_tool_use`` as a plain
|
||||
``dict`` when reconstructing a streaming response. Downstream cost-calculation
|
||||
code (``StandardBuiltInToolCostTracking.response_object_includes_web_search_call``
|
||||
and ``get_cost_for_anthropic_web_search``) accesses
|
||||
``usage.server_tool_use.web_search_requests`` as an attribute, which raised
|
||||
``AttributeError: 'dict' object has no attribute 'web_search_requests'``.
|
||||
|
||||
These tests reconstruct streaming chunks for an Anthropic-style web_search
|
||||
response and assert:
|
||||
|
||||
1. ``stream_chunk_builder`` returns ``ServerToolUse`` (not ``dict``) for
|
||||
``usage.server_tool_use``.
|
||||
2. ``completion_cost`` runs end-to-end on the rebuilt response without
|
||||
raising ``AttributeError``.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../../.."))
|
||||
|
||||
from litellm import completion_cost, stream_chunk_builder
|
||||
from litellm.types.utils import (
|
||||
Delta,
|
||||
ModelResponseStream,
|
||||
ServerToolUse,
|
||||
StreamingChoices,
|
||||
Usage,
|
||||
)
|
||||
|
||||
|
||||
def _make_text_chunk(text: str) -> ModelResponseStream:
|
||||
return ModelResponseStream(
|
||||
id="chatcmpl-test-26153",
|
||||
created=1700000000,
|
||||
model="claude-3-haiku-20240307",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
finish_reason=None,
|
||||
index=0,
|
||||
delta=Delta(role="assistant", content=text),
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _make_finish_chunk_with_usage_dict_server_tool_use() -> ModelResponseStream:
|
||||
"""Final chunk where server_tool_use is a *dict* — reproduces the bug shape."""
|
||||
return ModelResponseStream(
|
||||
id="chatcmpl-test-26153",
|
||||
created=1700000000,
|
||||
model="claude-3-haiku-20240307",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
finish_reason="stop",
|
||||
index=0,
|
||||
delta=Delta(),
|
||||
)
|
||||
],
|
||||
usage=Usage(
|
||||
prompt_tokens=42,
|
||||
completion_tokens=11,
|
||||
total_tokens=53,
|
||||
# NOTE: passed as a dict on purpose — this is the shape that
|
||||
# historically slipped through stream_chunk_builder unchanged.
|
||||
server_tool_use={"web_search_requests": 3},
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def test_stream_chunk_builder_coerces_server_tool_use_to_pydantic():
|
||||
"""
|
||||
Regression: stream_chunk_builder must produce ServerToolUse, not dict.
|
||||
"""
|
||||
chunks = [
|
||||
_make_text_chunk("Otters "),
|
||||
_make_text_chunk("are great."),
|
||||
_make_finish_chunk_with_usage_dict_server_tool_use(),
|
||||
]
|
||||
|
||||
rebuilt = stream_chunk_builder(chunks)
|
||||
|
||||
assert rebuilt is not None
|
||||
assert rebuilt.usage is not None # type: ignore[attr-defined]
|
||||
server_tool_use = rebuilt.usage.server_tool_use # type: ignore[attr-defined]
|
||||
|
||||
assert (
|
||||
server_tool_use is not None
|
||||
), "server_tool_use should be carried through from the final chunk"
|
||||
assert isinstance(server_tool_use, ServerToolUse), (
|
||||
f"expected ServerToolUse, got {type(server_tool_use).__name__}: "
|
||||
f"{server_tool_use!r}"
|
||||
)
|
||||
# Attribute access must not raise (this is exactly what was broken).
|
||||
assert server_tool_use.web_search_requests == 3
|
||||
|
||||
|
||||
def test_completion_cost_does_not_raise_on_streaming_web_search_response():
|
||||
"""
|
||||
Regression: completion_cost(...) must not raise AttributeError when the
|
||||
response was reconstructed by stream_chunk_builder from a streaming
|
||||
Anthropic web_search call.
|
||||
"""
|
||||
chunks = [
|
||||
_make_text_chunk("hello"),
|
||||
_make_finish_chunk_with_usage_dict_server_tool_use(),
|
||||
]
|
||||
|
||||
rebuilt = stream_chunk_builder(chunks)
|
||||
assert rebuilt is not None
|
||||
|
||||
# The exact dollar amount depends on the model-pricing table; what matters
|
||||
# for this regression is that it does NOT raise AttributeError on
|
||||
# `dict has no attribute 'web_search_requests'`.
|
||||
try:
|
||||
cost = completion_cost(completion_response=rebuilt)
|
||||
except AttributeError as e: # pragma: no cover - regression guard
|
||||
pytest.fail(
|
||||
"completion_cost raised AttributeError after stream_chunk_builder "
|
||||
f"(issue #26153 regression): {e}"
|
||||
)
|
||||
|
||||
assert isinstance(cost, (int, float))
|
||||
@ -520,7 +520,10 @@ def test_stream_chunk_builder_anthropic_web_search():
|
||||
assert usage.prompt_tokens == 50
|
||||
assert usage.completion_tokens == 27
|
||||
assert usage.total_tokens == 77
|
||||
assert usage.server_tool_use["web_search_requests"] == 2
|
||||
# server_tool_use must be a ServerToolUse pydantic so downstream cost-calc
|
||||
# (which uses attribute access) works. See issue #26153.
|
||||
assert isinstance(usage.server_tool_use, ServerToolUse)
|
||||
assert usage.server_tool_use.web_search_requests == 2
|
||||
|
||||
|
||||
def test_sort_chunks_handles_dict_hidden_params_created_at():
|
||||
|
||||
@ -0,0 +1,94 @@
|
||||
"""
|
||||
Tests that ``get_cost_for_anthropic_web_search`` tolerates ``server_tool_use``
|
||||
being either a ``dict`` or a ``ServerToolUse`` pydantic instance.
|
||||
|
||||
See https://github.com/BerriAI/litellm/issues/26153.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../../../.."))
|
||||
|
||||
from litellm.llms.anthropic.cost_calculation import (
|
||||
_get_web_search_requests,
|
||||
get_cost_for_anthropic_web_search,
|
||||
)
|
||||
from litellm.types.utils import ModelInfo, ServerToolUse
|
||||
|
||||
|
||||
class _UsageWithServerToolUse:
|
||||
def __init__(self, server_tool_use):
|
||||
self.server_tool_use = server_tool_use
|
||||
|
||||
|
||||
def _make_model_info(cost_per_query: float = 0.01) -> ModelInfo:
|
||||
info: ModelInfo = { # type: ignore[typeddict-item]
|
||||
"search_context_cost_per_query": {
|
||||
"search_context_size_low": cost_per_query,
|
||||
"search_context_size_medium": cost_per_query,
|
||||
"search_context_size_high": cost_per_query,
|
||||
}
|
||||
}
|
||||
return info
|
||||
|
||||
|
||||
def test_get_web_search_requests_handles_none():
|
||||
assert _get_web_search_requests(None) is None
|
||||
|
||||
|
||||
def test_get_web_search_requests_handles_dict():
|
||||
assert _get_web_search_requests({"web_search_requests": 4}) == 4
|
||||
|
||||
|
||||
def test_get_web_search_requests_handles_dict_missing_key():
|
||||
assert _get_web_search_requests({}) is None
|
||||
|
||||
|
||||
def test_get_web_search_requests_handles_pydantic():
|
||||
assert _get_web_search_requests(ServerToolUse(web_search_requests=2)) == 2
|
||||
|
||||
|
||||
def test_get_cost_for_anthropic_web_search_with_dict_server_tool_use():
|
||||
"""
|
||||
Regression: ``server_tool_use`` was a dict from ``stream_chunk_builder`` and
|
||||
direct attribute access on it raised ``AttributeError``.
|
||||
"""
|
||||
usage = _UsageWithServerToolUse({"web_search_requests": 3})
|
||||
info = _make_model_info(cost_per_query=0.01)
|
||||
|
||||
cost = get_cost_for_anthropic_web_search(
|
||||
model_info=info, usage=usage # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
assert cost == pytest.approx(0.03)
|
||||
|
||||
|
||||
def test_get_cost_for_anthropic_web_search_with_pydantic_server_tool_use():
|
||||
usage = _UsageWithServerToolUse(ServerToolUse(web_search_requests=3))
|
||||
info = _make_model_info(cost_per_query=0.01)
|
||||
|
||||
cost = get_cost_for_anthropic_web_search(
|
||||
model_info=info, usage=usage # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
assert cost == pytest.approx(0.03)
|
||||
|
||||
|
||||
def test_get_cost_for_anthropic_web_search_with_none_server_tool_use():
|
||||
usage = _UsageWithServerToolUse(None)
|
||||
info = _make_model_info(cost_per_query=0.01)
|
||||
|
||||
cost = get_cost_for_anthropic_web_search(
|
||||
model_info=info, usage=usage # type: ignore[arg-type]
|
||||
)
|
||||
|
||||
assert cost == 0.0
|
||||
|
||||
|
||||
def test_get_cost_for_anthropic_web_search_with_no_usage():
|
||||
info = _make_model_info(cost_per_query=0.01)
|
||||
cost = get_cost_for_anthropic_web_search(model_info=info, usage=None)
|
||||
assert cost == 0.0
|
||||
Loading…
Reference in New Issue
Block a user