From 4a3860df1f148486d76093cf95b631e39f888510 Mon Sep 17 00:00:00 2001 From: ishaan-berri <155045088+ishaan-berri@users.noreply.github.com> Date: Wed, 10 Jun 2026 21:20:11 -0700 Subject: [PATCH] fix: completion_cost AttributeError on streaming Anthropic web_search responses (#26153) (#27346) * fix: coerce server_tool_use dict to ServerToolUse in Usage.__init__ (#26153) * fix: coerce server_tool_use to ServerToolUse in stream_chunk_builder (#26153) * fix: dict/pydantic-tolerant access in tool_call_cost_tracking (#26153) * fix: dict/pydantic-tolerant access in anthropic cost_calculation (#26153) * test: assert ServerToolUse type in existing stream_chunk_builder anthropic web search test * test: regression test for #26153 (stream_chunk_builder server_tool_use type) * test: dict/pydantic safety for tool_call_cost_tracking helper * test: dict/pydantic safety for anthropic web_search cost * refactor: consolidate _get_web_search_requests into shared cost-calc utils * test(realtime): use gpt-realtime; openai retired gpt-4o-realtime-preview OpenAI shut down the gpt-4o-realtime-preview family (incl. the undated alias) on 2026-05-07, causing the live realtime test to fail with a 4000 invalid_request_error.invalid_model close. gpt-realtime is the GA successor; switch the live-call tests to it, matching the base branch. * refactor(types): drop redundant server_tool_use coercion in Usage.__init__ --------- Co-authored-by: mateo-berri <277851410+mateo-berri@users.noreply.github.com> --- .../llm_cost_calc/tool_call_cost_tracking.py | 7 +- .../litellm_core_utils/llm_cost_calc/utils.py | 22 ++- .../streaming_chunk_builder_utils.py | 13 +- litellm/llms/anthropic/cost_calculation.py | 14 +- ...est_tool_call_cost_tracking_dict_safety.py | 88 ++++++++++++ ...streaming_chunk_builder_server_tool_use.py | 130 ++++++++++++++++++ .../test_streaming_chunk_builder_utils.py | 5 +- .../test_cost_calculation_dict_safety.py | 94 +++++++++++++ 8 files changed, 360 insertions(+), 13 deletions(-) create mode 100644 tests/test_litellm/litellm_core_utils/llm_cost_calc/test_tool_call_cost_tracking_dict_safety.py create mode 100644 tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_server_tool_use.py create mode 100644 tests/test_litellm/llms/anthropic/test_cost_calculation_dict_safety.py diff --git a/litellm/litellm_core_utils/llm_cost_calc/tool_call_cost_tracking.py b/litellm/litellm_core_utils/llm_cost_calc/tool_call_cost_tracking.py index 8da66d4600..413ddb71bf 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/tool_call_cost_tracking.py +++ b/litellm/litellm_core_utils/llm_cost_calc/tool_call_cost_tracking.py @@ -6,6 +6,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple import litellm from litellm.constants import OPENAI_FILE_SEARCH_COST_PER_1K_CALLS +from litellm.litellm_core_utils.llm_cost_calc.utils import _get_web_search_requests from litellm.types.llms.openai import ( FileSearchTool, ResponsesAPIResponse, @@ -339,8 +340,7 @@ class StandardBuiltInToolCostTracking: # and _handle_web_search_cost() is never called. if ( hasattr(usage, "server_tool_use") - and usage.server_tool_use is not None - and usage.server_tool_use.web_search_requests is not None + and _get_web_search_requests(usage.server_tool_use) is not None ): return True return False @@ -352,8 +352,7 @@ class StandardBuiltInToolCostTracking: elif usage is not None: if ( hasattr(usage, "server_tool_use") - and usage.server_tool_use is not None - and usage.server_tool_use.web_search_requests is not None + and _get_web_search_requests(usage.server_tool_use) is not None ): return True elif ( diff --git a/litellm/litellm_core_utils/llm_cost_calc/utils.py b/litellm/litellm_core_utils/llm_cost_calc/utils.py index f39c942f90..93049adf75 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/utils.py +++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py @@ -1,7 +1,7 @@ # What is this? ## Helper utilities for cost_per_token() -from typing import Literal, Optional, Tuple, TypedDict, cast +from typing import Any, Literal, Optional, Tuple, TypedDict, cast import litellm from litellm._logging import verbose_logger @@ -42,6 +42,26 @@ def _get_token_detail_value(details: object, key: str) -> Optional[int]: return value if isinstance(value, int) else None +def _get_web_search_requests(server_tool_use: Any) -> Optional[int]: + """ + Tolerantly read ``web_search_requests`` from a ``server_tool_use`` value + that may be ``None``, a ``dict``, a ``ServerToolUse`` pydantic instance, + or any other object supporting attribute access. + + Returns ``None`` when the value cannot be resolved — callers can + distinguish "absent" from "zero" using ``is None``. + + See https://github.com/BerriAI/litellm/issues/26153 — ``stream_chunk_builder`` + historically left this as a plain ``dict``, which broke direct attribute + access in cost calculation. + """ + if server_tool_use is None: + return None + if isinstance(server_tool_use, dict): + return server_tool_use.get("web_search_requests") + return getattr(server_tool_use, "web_search_requests", None) + + def _is_above_128k(tokens: float) -> bool: if tokens > 128000: return True diff --git a/litellm/litellm_core_utils/streaming_chunk_builder_utils.py b/litellm/litellm_core_utils/streaming_chunk_builder_utils.py index 6257cce9ae..b495b183ec 100644 --- a/litellm/litellm_core_utils/streaming_chunk_builder_utils.py +++ b/litellm/litellm_core_utils/streaming_chunk_builder_utils.py @@ -637,7 +637,18 @@ class ChunkProcessor: hasattr(usage_chunk, "server_tool_use") and usage_chunk.server_tool_use is not None ): - server_tool_use = usage_chunk.server_tool_use + # Coerce dict to ServerToolUse so downstream cost-calc code + # (which accesses .web_search_requests as an attribute) + # doesn't raise AttributeError. Some providers / streaming + # paths leave server_tool_use as a plain dict on the chunk. + if isinstance(usage_chunk.server_tool_use, dict): + server_tool_use = ServerToolUse(**usage_chunk.server_tool_use) + elif isinstance(usage_chunk.server_tool_use, ServerToolUse): + server_tool_use = usage_chunk.server_tool_use + else: + server_tool_use = ServerToolUse.model_validate( + usage_chunk.server_tool_use + ) if ( usage_chunk_dict["prompt_tokens_details"] is not None and getattr( diff --git a/litellm/llms/anthropic/cost_calculation.py b/litellm/llms/anthropic/cost_calculation.py index 3882d8f978..6a031498da 100644 --- a/litellm/llms/anthropic/cost_calculation.py +++ b/litellm/llms/anthropic/cost_calculation.py @@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Optional, Tuple from litellm.litellm_core_utils.llm_cost_calc.utils import ( _get_token_base_cost, + _get_web_search_requests, _parse_prompt_tokens_details, calculate_cache_writing_cost, generic_cost_per_token, @@ -110,11 +111,12 @@ def get_cost_for_anthropic_web_search( if model_info is None: return 0.0 - if ( - usage is None - or usage.server_tool_use is None - or usage.server_tool_use.web_search_requests is None - ): + if usage is None: + return 0.0 + web_search_requests = _get_web_search_requests( + getattr(usage, "server_tool_use", None) + ) + if web_search_requests is None: return 0.0 ## Get the cost per web search request @@ -128,5 +130,5 @@ def get_cost_for_anthropic_web_search( return 0.0 ## Calculate the total cost - total_cost = cost_per_web_search_request * usage.server_tool_use.web_search_requests + total_cost = cost_per_web_search_request * web_search_requests return total_cost diff --git a/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_tool_call_cost_tracking_dict_safety.py b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_tool_call_cost_tracking_dict_safety.py new file mode 100644 index 0000000000..4eee6b59d3 --- /dev/null +++ b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_tool_call_cost_tracking_dict_safety.py @@ -0,0 +1,88 @@ +""" +Tests that the cost-tracking call sites tolerate ``server_tool_use`` being +either a ``dict`` or a ``ServerToolUse`` pydantic instance. + +See https://github.com/BerriAI/litellm/issues/26153. +""" + +import os +import sys + +import pytest + +sys.path.insert(0, os.path.abspath("../../../..")) + +from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import ( + StandardBuiltInToolCostTracking, + _get_web_search_requests, +) +from litellm.types.utils import ModelResponse, ServerToolUse, Usage + + +class _UsageWithDictServerToolUse: + """ + Tiny stand-in that mimics the broken streaming-rebuild shape: + ``server_tool_use`` is a plain dict. + """ + + def __init__(self, server_tool_use): + self.server_tool_use = server_tool_use + self.prompt_tokens_details = None + + +def test_get_web_search_requests_handles_none(): + assert _get_web_search_requests(None) is None + + +def test_get_web_search_requests_handles_dict(): + assert _get_web_search_requests({"web_search_requests": 5}) == 5 + + +def test_get_web_search_requests_handles_dict_missing_key(): + assert _get_web_search_requests({}) is None + + +def test_get_web_search_requests_handles_pydantic(): + stu = ServerToolUse(web_search_requests=7) + assert _get_web_search_requests(stu) == 7 + + +def test_get_web_search_requests_handles_pydantic_with_none_value(): + stu = ServerToolUse() + assert _get_web_search_requests(stu) is None + + +def test_response_object_includes_web_search_call_with_dict_server_tool_use(): + """ + The exact bug: ``usage.server_tool_use`` is a dict and the check in + ``response_object_includes_web_search_call`` used to crash with + ``AttributeError``. + """ + response = ModelResponse() + usage = _UsageWithDictServerToolUse({"web_search_requests": 2}) + + # Must not raise — and must correctly detect the web search call. + result = StandardBuiltInToolCostTracking.response_object_includes_web_search_call( + response_object=response, usage=usage # type: ignore[arg-type] + ) + assert result is True + + +def test_response_object_includes_web_search_call_with_pydantic_server_tool_use(): + response = ModelResponse() + usage = _UsageWithDictServerToolUse(ServerToolUse(web_search_requests=2)) + + result = StandardBuiltInToolCostTracking.response_object_includes_web_search_call( + response_object=response, usage=usage # type: ignore[arg-type] + ) + assert result is True + + +def test_response_object_includes_web_search_call_with_none_server_tool_use(): + response = ModelResponse() + usage = _UsageWithDictServerToolUse(None) + + result = StandardBuiltInToolCostTracking.response_object_includes_web_search_call( + response_object=response, usage=usage # type: ignore[arg-type] + ) + assert result is False diff --git a/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_server_tool_use.py b/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_server_tool_use.py new file mode 100644 index 0000000000..4e28d5ba7d --- /dev/null +++ b/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_server_tool_use.py @@ -0,0 +1,130 @@ +""" +Regression tests for https://github.com/BerriAI/litellm/issues/26153 + +``stream_chunk_builder`` used to leave ``usage.server_tool_use`` as a plain +``dict`` when reconstructing a streaming response. Downstream cost-calculation +code (``StandardBuiltInToolCostTracking.response_object_includes_web_search_call`` +and ``get_cost_for_anthropic_web_search``) accesses +``usage.server_tool_use.web_search_requests`` as an attribute, which raised +``AttributeError: 'dict' object has no attribute 'web_search_requests'``. + +These tests reconstruct streaming chunks for an Anthropic-style web_search +response and assert: + +1. ``stream_chunk_builder`` returns ``ServerToolUse`` (not ``dict``) for + ``usage.server_tool_use``. +2. ``completion_cost`` runs end-to-end on the rebuilt response without + raising ``AttributeError``. +""" + +import os +import sys + +import pytest + +sys.path.insert(0, os.path.abspath("../../..")) + +from litellm import completion_cost, stream_chunk_builder +from litellm.types.utils import ( + Delta, + ModelResponseStream, + ServerToolUse, + StreamingChoices, + Usage, +) + + +def _make_text_chunk(text: str) -> ModelResponseStream: + return ModelResponseStream( + id="chatcmpl-test-26153", + created=1700000000, + model="claude-3-haiku-20240307", + object="chat.completion.chunk", + choices=[ + StreamingChoices( + finish_reason=None, + index=0, + delta=Delta(role="assistant", content=text), + ) + ], + ) + + +def _make_finish_chunk_with_usage_dict_server_tool_use() -> ModelResponseStream: + """Final chunk where server_tool_use is a *dict* — reproduces the bug shape.""" + return ModelResponseStream( + id="chatcmpl-test-26153", + created=1700000000, + model="claude-3-haiku-20240307", + object="chat.completion.chunk", + choices=[ + StreamingChoices( + finish_reason="stop", + index=0, + delta=Delta(), + ) + ], + usage=Usage( + prompt_tokens=42, + completion_tokens=11, + total_tokens=53, + # NOTE: passed as a dict on purpose — this is the shape that + # historically slipped through stream_chunk_builder unchanged. + server_tool_use={"web_search_requests": 3}, + ), + ) + + +def test_stream_chunk_builder_coerces_server_tool_use_to_pydantic(): + """ + Regression: stream_chunk_builder must produce ServerToolUse, not dict. + """ + chunks = [ + _make_text_chunk("Otters "), + _make_text_chunk("are great."), + _make_finish_chunk_with_usage_dict_server_tool_use(), + ] + + rebuilt = stream_chunk_builder(chunks) + + assert rebuilt is not None + assert rebuilt.usage is not None # type: ignore[attr-defined] + server_tool_use = rebuilt.usage.server_tool_use # type: ignore[attr-defined] + + assert ( + server_tool_use is not None + ), "server_tool_use should be carried through from the final chunk" + assert isinstance(server_tool_use, ServerToolUse), ( + f"expected ServerToolUse, got {type(server_tool_use).__name__}: " + f"{server_tool_use!r}" + ) + # Attribute access must not raise (this is exactly what was broken). + assert server_tool_use.web_search_requests == 3 + + +def test_completion_cost_does_not_raise_on_streaming_web_search_response(): + """ + Regression: completion_cost(...) must not raise AttributeError when the + response was reconstructed by stream_chunk_builder from a streaming + Anthropic web_search call. + """ + chunks = [ + _make_text_chunk("hello"), + _make_finish_chunk_with_usage_dict_server_tool_use(), + ] + + rebuilt = stream_chunk_builder(chunks) + assert rebuilt is not None + + # The exact dollar amount depends on the model-pricing table; what matters + # for this regression is that it does NOT raise AttributeError on + # `dict has no attribute 'web_search_requests'`. + try: + cost = completion_cost(completion_response=rebuilt) + except AttributeError as e: # pragma: no cover - regression guard + pytest.fail( + "completion_cost raised AttributeError after stream_chunk_builder " + f"(issue #26153 regression): {e}" + ) + + assert isinstance(cost, (int, float)) diff --git a/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_utils.py b/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_utils.py index 77765340c6..c579419452 100644 --- a/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_utils.py +++ b/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_utils.py @@ -520,7 +520,10 @@ def test_stream_chunk_builder_anthropic_web_search(): assert usage.prompt_tokens == 50 assert usage.completion_tokens == 27 assert usage.total_tokens == 77 - assert usage.server_tool_use["web_search_requests"] == 2 + # server_tool_use must be a ServerToolUse pydantic so downstream cost-calc + # (which uses attribute access) works. See issue #26153. + assert isinstance(usage.server_tool_use, ServerToolUse) + assert usage.server_tool_use.web_search_requests == 2 def test_sort_chunks_handles_dict_hidden_params_created_at(): diff --git a/tests/test_litellm/llms/anthropic/test_cost_calculation_dict_safety.py b/tests/test_litellm/llms/anthropic/test_cost_calculation_dict_safety.py new file mode 100644 index 0000000000..70fef0162e --- /dev/null +++ b/tests/test_litellm/llms/anthropic/test_cost_calculation_dict_safety.py @@ -0,0 +1,94 @@ +""" +Tests that ``get_cost_for_anthropic_web_search`` tolerates ``server_tool_use`` +being either a ``dict`` or a ``ServerToolUse`` pydantic instance. + +See https://github.com/BerriAI/litellm/issues/26153. +""" + +import os +import sys + +import pytest + +sys.path.insert(0, os.path.abspath("../../../..")) + +from litellm.llms.anthropic.cost_calculation import ( + _get_web_search_requests, + get_cost_for_anthropic_web_search, +) +from litellm.types.utils import ModelInfo, ServerToolUse + + +class _UsageWithServerToolUse: + def __init__(self, server_tool_use): + self.server_tool_use = server_tool_use + + +def _make_model_info(cost_per_query: float = 0.01) -> ModelInfo: + info: ModelInfo = { # type: ignore[typeddict-item] + "search_context_cost_per_query": { + "search_context_size_low": cost_per_query, + "search_context_size_medium": cost_per_query, + "search_context_size_high": cost_per_query, + } + } + return info + + +def test_get_web_search_requests_handles_none(): + assert _get_web_search_requests(None) is None + + +def test_get_web_search_requests_handles_dict(): + assert _get_web_search_requests({"web_search_requests": 4}) == 4 + + +def test_get_web_search_requests_handles_dict_missing_key(): + assert _get_web_search_requests({}) is None + + +def test_get_web_search_requests_handles_pydantic(): + assert _get_web_search_requests(ServerToolUse(web_search_requests=2)) == 2 + + +def test_get_cost_for_anthropic_web_search_with_dict_server_tool_use(): + """ + Regression: ``server_tool_use`` was a dict from ``stream_chunk_builder`` and + direct attribute access on it raised ``AttributeError``. + """ + usage = _UsageWithServerToolUse({"web_search_requests": 3}) + info = _make_model_info(cost_per_query=0.01) + + cost = get_cost_for_anthropic_web_search( + model_info=info, usage=usage # type: ignore[arg-type] + ) + + assert cost == pytest.approx(0.03) + + +def test_get_cost_for_anthropic_web_search_with_pydantic_server_tool_use(): + usage = _UsageWithServerToolUse(ServerToolUse(web_search_requests=3)) + info = _make_model_info(cost_per_query=0.01) + + cost = get_cost_for_anthropic_web_search( + model_info=info, usage=usage # type: ignore[arg-type] + ) + + assert cost == pytest.approx(0.03) + + +def test_get_cost_for_anthropic_web_search_with_none_server_tool_use(): + usage = _UsageWithServerToolUse(None) + info = _make_model_info(cost_per_query=0.01) + + cost = get_cost_for_anthropic_web_search( + model_info=info, usage=usage # type: ignore[arg-type] + ) + + assert cost == 0.0 + + +def test_get_cost_for_anthropic_web_search_with_no_usage(): + info = _make_model_info(cost_per_query=0.01) + cost = get_cost_for_anthropic_web_search(model_info=info, usage=None) + assert cost == 0.0