fix: completion_cost AttributeError on streaming Anthropic web_search responses (#26153) (#27346)

* fix: coerce server_tool_use dict to ServerToolUse in Usage.__init__ (#26153)

* fix: coerce server_tool_use to ServerToolUse in stream_chunk_builder (#26153)

* fix: dict/pydantic-tolerant access in tool_call_cost_tracking (#26153)

* fix: dict/pydantic-tolerant access in anthropic cost_calculation (#26153)

* test: assert ServerToolUse type in existing stream_chunk_builder anthropic web search test

* test: regression test for #26153 (stream_chunk_builder server_tool_use type)

* test: dict/pydantic safety for tool_call_cost_tracking helper

* test: dict/pydantic safety for anthropic web_search cost

* refactor: consolidate _get_web_search_requests into shared cost-calc utils

* test(realtime): use gpt-realtime; openai retired gpt-4o-realtime-preview

OpenAI shut down the gpt-4o-realtime-preview family (incl. the undated
alias) on 2026-05-07, causing the live realtime test to fail with a
4000 invalid_request_error.invalid_model close. gpt-realtime is the GA
successor; switch the live-call tests to it, matching the base branch.

* refactor(types): drop redundant server_tool_use coercion in Usage.__init__

---------

Co-authored-by: mateo-berri <277851410+mateo-berri@users.noreply.github.com>
This commit is contained in:
ishaan-berri 2026-06-10 21:20:11 -07:00 committed by GitHub
parent 6068bb7781
commit 4a3860df1f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 360 additions and 13 deletions

View File

@ -6,6 +6,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple
import litellm import litellm
from litellm.constants import OPENAI_FILE_SEARCH_COST_PER_1K_CALLS from litellm.constants import OPENAI_FILE_SEARCH_COST_PER_1K_CALLS
from litellm.litellm_core_utils.llm_cost_calc.utils import _get_web_search_requests
from litellm.types.llms.openai import ( from litellm.types.llms.openai import (
FileSearchTool, FileSearchTool,
ResponsesAPIResponse, ResponsesAPIResponse,
@ -339,8 +340,7 @@ class StandardBuiltInToolCostTracking:
# and _handle_web_search_cost() is never called. # and _handle_web_search_cost() is never called.
if ( if (
hasattr(usage, "server_tool_use") hasattr(usage, "server_tool_use")
and usage.server_tool_use is not None and _get_web_search_requests(usage.server_tool_use) is not None
and usage.server_tool_use.web_search_requests is not None
): ):
return True return True
return False return False
@ -352,8 +352,7 @@ class StandardBuiltInToolCostTracking:
elif usage is not None: elif usage is not None:
if ( if (
hasattr(usage, "server_tool_use") hasattr(usage, "server_tool_use")
and usage.server_tool_use is not None and _get_web_search_requests(usage.server_tool_use) is not None
and usage.server_tool_use.web_search_requests is not None
): ):
return True return True
elif ( elif (

View File

@ -1,7 +1,7 @@
# What is this? # What is this?
## Helper utilities for cost_per_token() ## Helper utilities for cost_per_token()
from typing import Literal, Optional, Tuple, TypedDict, cast from typing import Any, Literal, Optional, Tuple, TypedDict, cast
import litellm import litellm
from litellm._logging import verbose_logger from litellm._logging import verbose_logger
@ -42,6 +42,26 @@ def _get_token_detail_value(details: object, key: str) -> Optional[int]:
return value if isinstance(value, int) else None return value if isinstance(value, int) else None
def _get_web_search_requests(server_tool_use: Any) -> Optional[int]:
"""
Tolerantly read ``web_search_requests`` from a ``server_tool_use`` value
that may be ``None``, a ``dict``, a ``ServerToolUse`` pydantic instance,
or any other object supporting attribute access.
Returns ``None`` when the value cannot be resolved callers can
distinguish "absent" from "zero" using ``is None``.
See https://github.com/BerriAI/litellm/issues/26153 ``stream_chunk_builder``
historically left this as a plain ``dict``, which broke direct attribute
access in cost calculation.
"""
if server_tool_use is None:
return None
if isinstance(server_tool_use, dict):
return server_tool_use.get("web_search_requests")
return getattr(server_tool_use, "web_search_requests", None)
def _is_above_128k(tokens: float) -> bool: def _is_above_128k(tokens: float) -> bool:
if tokens > 128000: if tokens > 128000:
return True return True

View File

@ -637,7 +637,18 @@ class ChunkProcessor:
hasattr(usage_chunk, "server_tool_use") hasattr(usage_chunk, "server_tool_use")
and usage_chunk.server_tool_use is not None and usage_chunk.server_tool_use is not None
): ):
server_tool_use = usage_chunk.server_tool_use # Coerce dict to ServerToolUse so downstream cost-calc code
# (which accesses .web_search_requests as an attribute)
# doesn't raise AttributeError. Some providers / streaming
# paths leave server_tool_use as a plain dict on the chunk.
if isinstance(usage_chunk.server_tool_use, dict):
server_tool_use = ServerToolUse(**usage_chunk.server_tool_use)
elif isinstance(usage_chunk.server_tool_use, ServerToolUse):
server_tool_use = usage_chunk.server_tool_use
else:
server_tool_use = ServerToolUse.model_validate(
usage_chunk.server_tool_use
)
if ( if (
usage_chunk_dict["prompt_tokens_details"] is not None usage_chunk_dict["prompt_tokens_details"] is not None
and getattr( and getattr(

View File

@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Optional, Tuple
from litellm.litellm_core_utils.llm_cost_calc.utils import ( from litellm.litellm_core_utils.llm_cost_calc.utils import (
_get_token_base_cost, _get_token_base_cost,
_get_web_search_requests,
_parse_prompt_tokens_details, _parse_prompt_tokens_details,
calculate_cache_writing_cost, calculate_cache_writing_cost,
generic_cost_per_token, generic_cost_per_token,
@ -110,11 +111,12 @@ def get_cost_for_anthropic_web_search(
if model_info is None: if model_info is None:
return 0.0 return 0.0
if ( if usage is None:
usage is None return 0.0
or usage.server_tool_use is None web_search_requests = _get_web_search_requests(
or usage.server_tool_use.web_search_requests is None getattr(usage, "server_tool_use", None)
): )
if web_search_requests is None:
return 0.0 return 0.0
## Get the cost per web search request ## Get the cost per web search request
@ -128,5 +130,5 @@ def get_cost_for_anthropic_web_search(
return 0.0 return 0.0
## Calculate the total cost ## Calculate the total cost
total_cost = cost_per_web_search_request * usage.server_tool_use.web_search_requests total_cost = cost_per_web_search_request * web_search_requests
return total_cost return total_cost

View File

@ -0,0 +1,88 @@
"""
Tests that the cost-tracking call sites tolerate ``server_tool_use`` being
either a ``dict`` or a ``ServerToolUse`` pydantic instance.
See https://github.com/BerriAI/litellm/issues/26153.
"""
import os
import sys
import pytest
sys.path.insert(0, os.path.abspath("../../../.."))
from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
StandardBuiltInToolCostTracking,
_get_web_search_requests,
)
from litellm.types.utils import ModelResponse, ServerToolUse, Usage
class _UsageWithDictServerToolUse:
"""
Tiny stand-in that mimics the broken streaming-rebuild shape:
``server_tool_use`` is a plain dict.
"""
def __init__(self, server_tool_use):
self.server_tool_use = server_tool_use
self.prompt_tokens_details = None
def test_get_web_search_requests_handles_none():
assert _get_web_search_requests(None) is None
def test_get_web_search_requests_handles_dict():
assert _get_web_search_requests({"web_search_requests": 5}) == 5
def test_get_web_search_requests_handles_dict_missing_key():
assert _get_web_search_requests({}) is None
def test_get_web_search_requests_handles_pydantic():
stu = ServerToolUse(web_search_requests=7)
assert _get_web_search_requests(stu) == 7
def test_get_web_search_requests_handles_pydantic_with_none_value():
stu = ServerToolUse()
assert _get_web_search_requests(stu) is None
def test_response_object_includes_web_search_call_with_dict_server_tool_use():
"""
The exact bug: ``usage.server_tool_use`` is a dict and the check in
``response_object_includes_web_search_call`` used to crash with
``AttributeError``.
"""
response = ModelResponse()
usage = _UsageWithDictServerToolUse({"web_search_requests": 2})
# Must not raise — and must correctly detect the web search call.
result = StandardBuiltInToolCostTracking.response_object_includes_web_search_call(
response_object=response, usage=usage # type: ignore[arg-type]
)
assert result is True
def test_response_object_includes_web_search_call_with_pydantic_server_tool_use():
response = ModelResponse()
usage = _UsageWithDictServerToolUse(ServerToolUse(web_search_requests=2))
result = StandardBuiltInToolCostTracking.response_object_includes_web_search_call(
response_object=response, usage=usage # type: ignore[arg-type]
)
assert result is True
def test_response_object_includes_web_search_call_with_none_server_tool_use():
response = ModelResponse()
usage = _UsageWithDictServerToolUse(None)
result = StandardBuiltInToolCostTracking.response_object_includes_web_search_call(
response_object=response, usage=usage # type: ignore[arg-type]
)
assert result is False

View File

@ -0,0 +1,130 @@
"""
Regression tests for https://github.com/BerriAI/litellm/issues/26153
``stream_chunk_builder`` used to leave ``usage.server_tool_use`` as a plain
``dict`` when reconstructing a streaming response. Downstream cost-calculation
code (``StandardBuiltInToolCostTracking.response_object_includes_web_search_call``
and ``get_cost_for_anthropic_web_search``) accesses
``usage.server_tool_use.web_search_requests`` as an attribute, which raised
``AttributeError: 'dict' object has no attribute 'web_search_requests'``.
These tests reconstruct streaming chunks for an Anthropic-style web_search
response and assert:
1. ``stream_chunk_builder`` returns ``ServerToolUse`` (not ``dict``) for
``usage.server_tool_use``.
2. ``completion_cost`` runs end-to-end on the rebuilt response without
raising ``AttributeError``.
"""
import os
import sys
import pytest
sys.path.insert(0, os.path.abspath("../../.."))
from litellm import completion_cost, stream_chunk_builder
from litellm.types.utils import (
Delta,
ModelResponseStream,
ServerToolUse,
StreamingChoices,
Usage,
)
def _make_text_chunk(text: str) -> ModelResponseStream:
return ModelResponseStream(
id="chatcmpl-test-26153",
created=1700000000,
model="claude-3-haiku-20240307",
object="chat.completion.chunk",
choices=[
StreamingChoices(
finish_reason=None,
index=0,
delta=Delta(role="assistant", content=text),
)
],
)
def _make_finish_chunk_with_usage_dict_server_tool_use() -> ModelResponseStream:
"""Final chunk where server_tool_use is a *dict* — reproduces the bug shape."""
return ModelResponseStream(
id="chatcmpl-test-26153",
created=1700000000,
model="claude-3-haiku-20240307",
object="chat.completion.chunk",
choices=[
StreamingChoices(
finish_reason="stop",
index=0,
delta=Delta(),
)
],
usage=Usage(
prompt_tokens=42,
completion_tokens=11,
total_tokens=53,
# NOTE: passed as a dict on purpose — this is the shape that
# historically slipped through stream_chunk_builder unchanged.
server_tool_use={"web_search_requests": 3},
),
)
def test_stream_chunk_builder_coerces_server_tool_use_to_pydantic():
"""
Regression: stream_chunk_builder must produce ServerToolUse, not dict.
"""
chunks = [
_make_text_chunk("Otters "),
_make_text_chunk("are great."),
_make_finish_chunk_with_usage_dict_server_tool_use(),
]
rebuilt = stream_chunk_builder(chunks)
assert rebuilt is not None
assert rebuilt.usage is not None # type: ignore[attr-defined]
server_tool_use = rebuilt.usage.server_tool_use # type: ignore[attr-defined]
assert (
server_tool_use is not None
), "server_tool_use should be carried through from the final chunk"
assert isinstance(server_tool_use, ServerToolUse), (
f"expected ServerToolUse, got {type(server_tool_use).__name__}: "
f"{server_tool_use!r}"
)
# Attribute access must not raise (this is exactly what was broken).
assert server_tool_use.web_search_requests == 3
def test_completion_cost_does_not_raise_on_streaming_web_search_response():
"""
Regression: completion_cost(...) must not raise AttributeError when the
response was reconstructed by stream_chunk_builder from a streaming
Anthropic web_search call.
"""
chunks = [
_make_text_chunk("hello"),
_make_finish_chunk_with_usage_dict_server_tool_use(),
]
rebuilt = stream_chunk_builder(chunks)
assert rebuilt is not None
# The exact dollar amount depends on the model-pricing table; what matters
# for this regression is that it does NOT raise AttributeError on
# `dict has no attribute 'web_search_requests'`.
try:
cost = completion_cost(completion_response=rebuilt)
except AttributeError as e: # pragma: no cover - regression guard
pytest.fail(
"completion_cost raised AttributeError after stream_chunk_builder "
f"(issue #26153 regression): {e}"
)
assert isinstance(cost, (int, float))

View File

@ -520,7 +520,10 @@ def test_stream_chunk_builder_anthropic_web_search():
assert usage.prompt_tokens == 50 assert usage.prompt_tokens == 50
assert usage.completion_tokens == 27 assert usage.completion_tokens == 27
assert usage.total_tokens == 77 assert usage.total_tokens == 77
assert usage.server_tool_use["web_search_requests"] == 2 # server_tool_use must be a ServerToolUse pydantic so downstream cost-calc
# (which uses attribute access) works. See issue #26153.
assert isinstance(usage.server_tool_use, ServerToolUse)
assert usage.server_tool_use.web_search_requests == 2
def test_sort_chunks_handles_dict_hidden_params_created_at(): def test_sort_chunks_handles_dict_hidden_params_created_at():

View File

@ -0,0 +1,94 @@
"""
Tests that ``get_cost_for_anthropic_web_search`` tolerates ``server_tool_use``
being either a ``dict`` or a ``ServerToolUse`` pydantic instance.
See https://github.com/BerriAI/litellm/issues/26153.
"""
import os
import sys
import pytest
sys.path.insert(0, os.path.abspath("../../../.."))
from litellm.llms.anthropic.cost_calculation import (
_get_web_search_requests,
get_cost_for_anthropic_web_search,
)
from litellm.types.utils import ModelInfo, ServerToolUse
class _UsageWithServerToolUse:
def __init__(self, server_tool_use):
self.server_tool_use = server_tool_use
def _make_model_info(cost_per_query: float = 0.01) -> ModelInfo:
info: ModelInfo = { # type: ignore[typeddict-item]
"search_context_cost_per_query": {
"search_context_size_low": cost_per_query,
"search_context_size_medium": cost_per_query,
"search_context_size_high": cost_per_query,
}
}
return info
def test_get_web_search_requests_handles_none():
assert _get_web_search_requests(None) is None
def test_get_web_search_requests_handles_dict():
assert _get_web_search_requests({"web_search_requests": 4}) == 4
def test_get_web_search_requests_handles_dict_missing_key():
assert _get_web_search_requests({}) is None
def test_get_web_search_requests_handles_pydantic():
assert _get_web_search_requests(ServerToolUse(web_search_requests=2)) == 2
def test_get_cost_for_anthropic_web_search_with_dict_server_tool_use():
"""
Regression: ``server_tool_use`` was a dict from ``stream_chunk_builder`` and
direct attribute access on it raised ``AttributeError``.
"""
usage = _UsageWithServerToolUse({"web_search_requests": 3})
info = _make_model_info(cost_per_query=0.01)
cost = get_cost_for_anthropic_web_search(
model_info=info, usage=usage # type: ignore[arg-type]
)
assert cost == pytest.approx(0.03)
def test_get_cost_for_anthropic_web_search_with_pydantic_server_tool_use():
usage = _UsageWithServerToolUse(ServerToolUse(web_search_requests=3))
info = _make_model_info(cost_per_query=0.01)
cost = get_cost_for_anthropic_web_search(
model_info=info, usage=usage # type: ignore[arg-type]
)
assert cost == pytest.approx(0.03)
def test_get_cost_for_anthropic_web_search_with_none_server_tool_use():
usage = _UsageWithServerToolUse(None)
info = _make_model_info(cost_per_query=0.01)
cost = get_cost_for_anthropic_web_search(
model_info=info, usage=usage # type: ignore[arg-type]
)
assert cost == 0.0
def test_get_cost_for_anthropic_web_search_with_no_usage():
info = _make_model_info(cost_per_query=0.01)
cost = get_cost_for_anthropic_web_search(model_info=info, usage=None)
assert cost == 0.0