fix: completion_cost AttributeError on streaming Anthropic web_search responses (#26153) (#27346)

* fix: coerce server_tool_use dict to ServerToolUse in Usage.__init__ (#26153) * fix: coerce server_tool_use to ServerToolUse in stream_chunk_builder (#26153) * fix: dict/pydantic-tolerant access in tool_call_cost_tracking (#26153) * fix: dict/pydantic-tolerant access in anthropic cost_calculation (#26153) * test: assert ServerToolUse type in existing stream_chunk_builder anthropic web search test * test: regression test for #26153 (stream_chunk_builder server_tool_use type) * test: dict/pydantic safety for tool_call_cost_tracking helper * test: dict/pydantic safety for anthropic web_search cost * refactor: consolidate _get_web_search_requests into shared cost-calc utils * test(realtime): use gpt-realtime; openai retired gpt-4o-realtime-preview OpenAI shut down the gpt-4o-realtime-preview family (incl. the undated alias) on 2026-05-07, causing the live realtime test to fail with a 4000 invalid_request_error.invalid_model close. gpt-realtime is the GA successor; switch the live-call tests to it, matching the base branch. * refactor(types): drop redundant server_tool_use coercion in Usage.__init__ --------- Co-authored-by: mateo-berri <277851410+mateo-berri@users.noreply.github.com>
2026-06-10 21:20:11 -07:00 · 2026-06-10 21:20:11 -07:00 · 4a3860df1f
commit 4a3860df1f
parent 6068bb7781
8 changed files with 360 additions and 13 deletions
--- a/litellm/litellm_core_utils/llm_cost_calc/tool_call_cost_tracking.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/tool_call_cost_tracking.py
@ -6,6 +6,7 @@ from typing import Any, Dict, List, Literal, Optional, Tuple

 import litellm
 from litellm.constants import OPENAI_FILE_SEARCH_COST_PER_1K_CALLS
+from litellm.litellm_core_utils.llm_cost_calc.utils import _get_web_search_requests
 from litellm.types.llms.openai import (
    FileSearchTool,
    ResponsesAPIResponse,
@ -339,8 +340,7 @@ class StandardBuiltInToolCostTracking:
                # and _handle_web_search_cost() is never called.
                if (
                    hasattr(usage, "server_tool_use")
-                    and usage.server_tool_use is not None
-                    and usage.server_tool_use.web_search_requests is not None
+                    and _get_web_search_requests(usage.server_tool_use) is not None
                ):
                    return True
            return False
@ -352,8 +352,7 @@ class StandardBuiltInToolCostTracking:
        elif usage is not None:
            if (
                hasattr(usage, "server_tool_use")
-                and usage.server_tool_use is not None
-                and usage.server_tool_use.web_search_requests is not None
+                and _get_web_search_requests(usage.server_tool_use) is not None
            ):
                return True
            elif (
--- a/litellm/litellm_core_utils/llm_cost_calc/utils.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@ -1,7 +1,7 @@
 # What is this?
 ## Helper utilities for cost_per_token()

-from typing import Literal, Optional, Tuple, TypedDict, cast
+from typing import Any, Literal, Optional, Tuple, TypedDict, cast

 import litellm
 from litellm._logging import verbose_logger
@ -42,6 +42,26 @@ def _get_token_detail_value(details: object, key: str) -> Optional[int]:
    return value if isinstance(value, int) else None


+def _get_web_search_requests(server_tool_use: Any) -> Optional[int]:
+    """
+    Tolerantly read ``web_search_requests`` from a ``server_tool_use`` value
+    that may be ``None``, a ``dict``, a ``ServerToolUse`` pydantic instance,
+    or any other object supporting attribute access.
+
+    Returns ``None`` when the value cannot be resolved — callers can
+    distinguish "absent" from "zero" using ``is None``.
+
+    See https://github.com/BerriAI/litellm/issues/26153 — ``stream_chunk_builder``
+    historically left this as a plain ``dict``, which broke direct attribute
+    access in cost calculation.
+    """
+    if server_tool_use is None:
+        return None
+    if isinstance(server_tool_use, dict):
+        return server_tool_use.get("web_search_requests")
+    return getattr(server_tool_use, "web_search_requests", None)
+
+
 def _is_above_128k(tokens: float) -> bool:
    if tokens > 128000:
        return True
--- a/litellm/litellm_core_utils/streaming_chunk_builder_utils.py
+++ b/litellm/litellm_core_utils/streaming_chunk_builder_utils.py
@ -637,7 +637,18 @@ class ChunkProcessor:
                    hasattr(usage_chunk, "server_tool_use")
                    and usage_chunk.server_tool_use is not None
                ):
-                    server_tool_use = usage_chunk.server_tool_use
+                    # Coerce dict to ServerToolUse so downstream cost-calc code
+                    # (which accesses .web_search_requests as an attribute)
+                    # doesn't raise AttributeError. Some providers / streaming
+                    # paths leave server_tool_use as a plain dict on the chunk.
+                    if isinstance(usage_chunk.server_tool_use, dict):
+                        server_tool_use = ServerToolUse(**usage_chunk.server_tool_use)
+                    elif isinstance(usage_chunk.server_tool_use, ServerToolUse):
+                        server_tool_use = usage_chunk.server_tool_use
+                    else:
+                        server_tool_use = ServerToolUse.model_validate(
+                            usage_chunk.server_tool_use
+                        )
                if (
                    usage_chunk_dict["prompt_tokens_details"] is not None
                    and getattr(
--- a/litellm/llms/anthropic/cost_calculation.py
+++ b/litellm/llms/anthropic/cost_calculation.py
@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Optional, Tuple

 from litellm.litellm_core_utils.llm_cost_calc.utils import (
    _get_token_base_cost,
+    _get_web_search_requests,
    _parse_prompt_tokens_details,
    calculate_cache_writing_cost,
    generic_cost_per_token,
@ -110,11 +111,12 @@ def get_cost_for_anthropic_web_search(
    if model_info is None:
        return 0.0

-    if (
-        usage is None
-        or usage.server_tool_use is None
-        or usage.server_tool_use.web_search_requests is None
-    ):
+    if usage is None:
+        return 0.0
+    web_search_requests = _get_web_search_requests(
+        getattr(usage, "server_tool_use", None)
+    )
+    if web_search_requests is None:
        return 0.0

    ## Get the cost per web search request
@ -128,5 +130,5 @@ def get_cost_for_anthropic_web_search(
        return 0.0

    ## Calculate the total cost
-    total_cost = cost_per_web_search_request * usage.server_tool_use.web_search_requests
+    total_cost = cost_per_web_search_request * web_search_requests
    return total_cost
--- a/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_tool_call_cost_tracking_dict_safety.py
+++ b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_tool_call_cost_tracking_dict_safety.py
@ -0,0 +1,88 @@
+"""
+Tests that the cost-tracking call sites tolerate ``server_tool_use`` being
+either a ``dict`` or a ``ServerToolUse`` pydantic instance.
+
+See https://github.com/BerriAI/litellm/issues/26153.
+"""
+
+import os
+import sys
+
+import pytest
+
+sys.path.insert(0, os.path.abspath("../../../.."))
+
+from litellm.litellm_core_utils.llm_cost_calc.tool_call_cost_tracking import (
+    StandardBuiltInToolCostTracking,
+    _get_web_search_requests,
+)
+from litellm.types.utils import ModelResponse, ServerToolUse, Usage
+
+
+class _UsageWithDictServerToolUse:
+    """
+    Tiny stand-in that mimics the broken streaming-rebuild shape:
+    ``server_tool_use`` is a plain dict.
+    """
+
+    def __init__(self, server_tool_use):
+        self.server_tool_use = server_tool_use
+        self.prompt_tokens_details = None
+
+
+def test_get_web_search_requests_handles_none():
+    assert _get_web_search_requests(None) is None
+
+
+def test_get_web_search_requests_handles_dict():
+    assert _get_web_search_requests({"web_search_requests": 5}) == 5
+
+
+def test_get_web_search_requests_handles_dict_missing_key():
+    assert _get_web_search_requests({}) is None
+
+
+def test_get_web_search_requests_handles_pydantic():
+    stu = ServerToolUse(web_search_requests=7)
+    assert _get_web_search_requests(stu) == 7
+
+
+def test_get_web_search_requests_handles_pydantic_with_none_value():
+    stu = ServerToolUse()
+    assert _get_web_search_requests(stu) is None
+
+
+def test_response_object_includes_web_search_call_with_dict_server_tool_use():
+    """
+    The exact bug: ``usage.server_tool_use`` is a dict and the check in
+    ``response_object_includes_web_search_call`` used to crash with
+    ``AttributeError``.
+    """
+    response = ModelResponse()
+    usage = _UsageWithDictServerToolUse({"web_search_requests": 2})
+
+    # Must not raise — and must correctly detect the web search call.
+    result = StandardBuiltInToolCostTracking.response_object_includes_web_search_call(
+        response_object=response, usage=usage  # type: ignore[arg-type]
+    )
+    assert result is True
+
+
+def test_response_object_includes_web_search_call_with_pydantic_server_tool_use():
+    response = ModelResponse()
+    usage = _UsageWithDictServerToolUse(ServerToolUse(web_search_requests=2))
+
+    result = StandardBuiltInToolCostTracking.response_object_includes_web_search_call(
+        response_object=response, usage=usage  # type: ignore[arg-type]
+    )
+    assert result is True
+
+
+def test_response_object_includes_web_search_call_with_none_server_tool_use():
+    response = ModelResponse()
+    usage = _UsageWithDictServerToolUse(None)
+
+    result = StandardBuiltInToolCostTracking.response_object_includes_web_search_call(
+        response_object=response, usage=usage  # type: ignore[arg-type]
+    )
+    assert result is False
--- a/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_server_tool_use.py
+++ b/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_server_tool_use.py
@ -0,0 +1,130 @@
+"""
+Regression tests for https://github.com/BerriAI/litellm/issues/26153
+
+``stream_chunk_builder`` used to leave ``usage.server_tool_use`` as a plain
+``dict`` when reconstructing a streaming response. Downstream cost-calculation
+code (``StandardBuiltInToolCostTracking.response_object_includes_web_search_call``
+and ``get_cost_for_anthropic_web_search``) accesses
+``usage.server_tool_use.web_search_requests`` as an attribute, which raised
+``AttributeError: 'dict' object has no attribute 'web_search_requests'``.
+
+These tests reconstruct streaming chunks for an Anthropic-style web_search
+response and assert:
+
+1. ``stream_chunk_builder`` returns ``ServerToolUse`` (not ``dict``) for
+   ``usage.server_tool_use``.
+2. ``completion_cost`` runs end-to-end on the rebuilt response without
+   raising ``AttributeError``.
+"""
+
+import os
+import sys
+
+import pytest
+
+sys.path.insert(0, os.path.abspath("../../.."))
+
+from litellm import completion_cost, stream_chunk_builder
+from litellm.types.utils import (
+    Delta,
+    ModelResponseStream,
+    ServerToolUse,
+    StreamingChoices,
+    Usage,
+)
+
+
+def _make_text_chunk(text: str) -> ModelResponseStream:
+    return ModelResponseStream(
+        id="chatcmpl-test-26153",
+        created=1700000000,
+        model="claude-3-haiku-20240307",
+        object="chat.completion.chunk",
+        choices=[
+            StreamingChoices(
+                finish_reason=None,
+                index=0,
+                delta=Delta(role="assistant", content=text),
+            )
+        ],
+    )
+
+
+def _make_finish_chunk_with_usage_dict_server_tool_use() -> ModelResponseStream:
+    """Final chunk where server_tool_use is a *dict* — reproduces the bug shape."""
+    return ModelResponseStream(
+        id="chatcmpl-test-26153",
+        created=1700000000,
+        model="claude-3-haiku-20240307",
+        object="chat.completion.chunk",
+        choices=[
+            StreamingChoices(
+                finish_reason="stop",
+                index=0,
+                delta=Delta(),
+            )
+        ],
+        usage=Usage(
+            prompt_tokens=42,
+            completion_tokens=11,
+            total_tokens=53,
+            # NOTE: passed as a dict on purpose — this is the shape that
+            # historically slipped through stream_chunk_builder unchanged.
+            server_tool_use={"web_search_requests": 3},
+        ),
+    )
+
+
+def test_stream_chunk_builder_coerces_server_tool_use_to_pydantic():
+    """
+    Regression: stream_chunk_builder must produce ServerToolUse, not dict.
+    """
+    chunks = [
+        _make_text_chunk("Otters "),
+        _make_text_chunk("are great."),
+        _make_finish_chunk_with_usage_dict_server_tool_use(),
+    ]
+
+    rebuilt = stream_chunk_builder(chunks)
+
+    assert rebuilt is not None
+    assert rebuilt.usage is not None  # type: ignore[attr-defined]
+    server_tool_use = rebuilt.usage.server_tool_use  # type: ignore[attr-defined]
+
+    assert (
+        server_tool_use is not None
+    ), "server_tool_use should be carried through from the final chunk"
+    assert isinstance(server_tool_use, ServerToolUse), (
+        f"expected ServerToolUse, got {type(server_tool_use).__name__}: "
+        f"{server_tool_use!r}"
+    )
+    # Attribute access must not raise (this is exactly what was broken).
+    assert server_tool_use.web_search_requests == 3
+
+
+def test_completion_cost_does_not_raise_on_streaming_web_search_response():
+    """
+    Regression: completion_cost(...) must not raise AttributeError when the
+    response was reconstructed by stream_chunk_builder from a streaming
+    Anthropic web_search call.
+    """
+    chunks = [
+        _make_text_chunk("hello"),
+        _make_finish_chunk_with_usage_dict_server_tool_use(),
+    ]
+
+    rebuilt = stream_chunk_builder(chunks)
+    assert rebuilt is not None
+
+    # The exact dollar amount depends on the model-pricing table; what matters
+    # for this regression is that it does NOT raise AttributeError on
+    # `dict has no attribute 'web_search_requests'`.
+    try:
+        cost = completion_cost(completion_response=rebuilt)
+    except AttributeError as e:  # pragma: no cover - regression guard
+        pytest.fail(
+            "completion_cost raised AttributeError after stream_chunk_builder "
+            f"(issue #26153 regression): {e}"
+        )
+
+    assert isinstance(cost, (int, float))
--- a/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_utils.py
+++ b/tests/test_litellm/litellm_core_utils/test_streaming_chunk_builder_utils.py
@ -520,7 +520,10 @@ def test_stream_chunk_builder_anthropic_web_search():
    assert usage.prompt_tokens == 50
    assert usage.completion_tokens == 27
    assert usage.total_tokens == 77
-    assert usage.server_tool_use["web_search_requests"] == 2
+    # server_tool_use must be a ServerToolUse pydantic so downstream cost-calc
+    # (which uses attribute access) works. See issue #26153.
+    assert isinstance(usage.server_tool_use, ServerToolUse)
+    assert usage.server_tool_use.web_search_requests == 2


 def test_sort_chunks_handles_dict_hidden_params_created_at():
--- a/tests/test_litellm/llms/anthropic/test_cost_calculation_dict_safety.py
+++ b/tests/test_litellm/llms/anthropic/test_cost_calculation_dict_safety.py
@ -0,0 +1,94 @@
+"""
+Tests that ``get_cost_for_anthropic_web_search`` tolerates ``server_tool_use``
+being either a ``dict`` or a ``ServerToolUse`` pydantic instance.
+
+See https://github.com/BerriAI/litellm/issues/26153.
+"""
+
+import os
+import sys
+
+import pytest
+
+sys.path.insert(0, os.path.abspath("../../../.."))
+
+from litellm.llms.anthropic.cost_calculation import (
+    _get_web_search_requests,
+    get_cost_for_anthropic_web_search,
+)
+from litellm.types.utils import ModelInfo, ServerToolUse
+
+
+class _UsageWithServerToolUse:
+    def __init__(self, server_tool_use):
+        self.server_tool_use = server_tool_use
+
+
+def _make_model_info(cost_per_query: float = 0.01) -> ModelInfo:
+    info: ModelInfo = {  # type: ignore[typeddict-item]
+        "search_context_cost_per_query": {
+            "search_context_size_low": cost_per_query,
+            "search_context_size_medium": cost_per_query,
+            "search_context_size_high": cost_per_query,
+        }
+    }
+    return info
+
+
+def test_get_web_search_requests_handles_none():
+    assert _get_web_search_requests(None) is None
+
+
+def test_get_web_search_requests_handles_dict():
+    assert _get_web_search_requests({"web_search_requests": 4}) == 4
+
+
+def test_get_web_search_requests_handles_dict_missing_key():
+    assert _get_web_search_requests({}) is None
+
+
+def test_get_web_search_requests_handles_pydantic():
+    assert _get_web_search_requests(ServerToolUse(web_search_requests=2)) == 2
+
+
+def test_get_cost_for_anthropic_web_search_with_dict_server_tool_use():
+    """
+    Regression: ``server_tool_use`` was a dict from ``stream_chunk_builder`` and
+    direct attribute access on it raised ``AttributeError``.
+    """
+    usage = _UsageWithServerToolUse({"web_search_requests": 3})
+    info = _make_model_info(cost_per_query=0.01)
+
+    cost = get_cost_for_anthropic_web_search(
+        model_info=info, usage=usage  # type: ignore[arg-type]
+    )
+
+    assert cost == pytest.approx(0.03)
+
+
+def test_get_cost_for_anthropic_web_search_with_pydantic_server_tool_use():
+    usage = _UsageWithServerToolUse(ServerToolUse(web_search_requests=3))
+    info = _make_model_info(cost_per_query=0.01)
+
+    cost = get_cost_for_anthropic_web_search(
+        model_info=info, usage=usage  # type: ignore[arg-type]
+    )
+
+    assert cost == pytest.approx(0.03)
+
+
+def test_get_cost_for_anthropic_web_search_with_none_server_tool_use():
+    usage = _UsageWithServerToolUse(None)
+    info = _make_model_info(cost_per_query=0.01)
+
+    cost = get_cost_for_anthropic_web_search(
+        model_info=info, usage=usage  # type: ignore[arg-type]
+    )
+
+    assert cost == 0.0
+
+
+def test_get_cost_for_anthropic_web_search_with_no_usage():
+    info = _make_model_info(cost_per_query=0.01)
+    cost = get_cost_for_anthropic_web_search(model_info=info, usage=None)
+    assert cost == 0.0