fix(bedrock-mantle): use /anthropic/v1/messages path for Mantle endpo… (#27976)

* fix(bedrock-mantle): use /anthropic/v1/messages path for Mantle endpoint (#27943) * docs: add one-line docstring to _disable_debugging (#27894) Squash-merged by litellm-agent from oss-agent-shin's PR. * Add jp. Bedrock cross-region inference profile for claude-sonnet-4-6 (#27831) Squash-merged by litellm-agent from Cyberfilo's PR. * Sanitize empty text content blocks on /v1/messages (#27832) Squash-merged by litellm-agent from Cyberfilo's PR. * fix(bedrock-mantle): use /anthropic/v1/messages path for Mantle endpoint The bedrock-mantle gateway (Claude Mythos Preview) serves the Anthropic Messages API at /anthropic/v1/messages; /v1/messages returns 404 Not Found. Both AmazonMantleConfig (chat/completions caller route) and AmazonMantleMessagesConfig (anthropic-messages caller route) hardcoded the wrong path, so every Mantle request 404'd before reaching the model. Per the Anthropic docs: "[Claude in Amazon Bedrock] uses the Messages API at /anthropic/v1/messages with SSE streaming." https://platform.claude.com/docs/en/api/claude-on-amazon-bedrock Confirmed independently against the live endpoint: /v1/chat/completions -> 200 OK /v1/messages -> 404 Not Found (what litellm used) /anthropic/v1/messages -> 200 OK (Claude only) Adds a regression test asserting both Mantle configs build the /anthropic/v1/messages path, and updates the existing assertions that encoded the wrong path. --------- Co-authored-by: oss-agent-shin <ext-agent-shin@berri.ai> Co-authored-by: Filippo Menghi <113345637+Cyberfilo@users.noreply.github.com> * fix: sanitize empty text blocks in sync anthropic_messages_handler path Co-authored-by: Yassin Kortam <yassin@berri.ai> --------- Co-authored-by: João Costa <13508071+jpv-costa@users.noreply.github.com> Co-authored-by: oss-agent-shin <ext-agent-shin@berri.ai> Co-authored-by: Filippo Menghi <113345637+Cyberfilo@users.noreply.github.com> Co-authored-by: Cursor Agent <cursoragent@cursor.com> Co-authored-by: Yassin Kortam <yassin@berri.ai>
2026-05-15 13:31:59 -07:00 · 2026-05-15 13:31:59 -07:00 · f9ba70d357
commit f9ba70d357
parent 50df072d95
11 changed files with 328 additions and 8 deletions
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@ -404,6 +404,7 @@ def _turn_on_debug():


 def _disable_debugging():
+    """Disable the package, router, and proxy verbose loggers."""
    verbose_logger.disabled = True
    verbose_router_logger.disabled = True
    verbose_proxy_logger.disabled = True
--- a/litellm/llms/anthropic/common_utils.py
+++ b/litellm/llms/anthropic/common_utils.py
@ -832,6 +832,49 @@ def strip_thinking_blocks_from_anthropic_messages_request_dict(
    data.pop("thinking", None)


+def strip_empty_text_blocks_from_anthropic_messages(
+    messages: List[Any],
+) -> List[Any]:
+    """
+    Return a new message list with empty or whitespace-only ``{"type": "text"}``
+    content blocks removed.
+
+    Anthropic's API rejects requests containing such blocks with
+    ``"messages: text content blocks must be non-empty"``, but assistant
+    messages from Anthropic routinely arrive with ``{"type": "text", "text": ""}``
+    alongside ``tool_use`` blocks (see anthropics/anthropic-sdk-python#461).
+    Multi-turn tool-use clients (e.g. Claude Code) loop these prior responses
+    back as conversation history, which then causes the next request to 400
+    on the unified ``/v1/messages`` path.  ``/v1/chat/completions`` already
+    handles this in ``anthropic_messages_pt``; this helper provides the
+    equivalent guarantee for the native Anthropic Messages path.
+
+    Messages whose content is a list and becomes empty after stripping are
+    omitted, matching :func:`strip_thinking_blocks_from_anthropic_messages`.
+    The caller's list and its content blocks are never mutated; modified
+    messages are returned as shallow copies with a fresh content list.
+    """
+    out: List[Any] = []
+    for m in messages:
+        if not isinstance(m, dict) or not isinstance(m.get("content"), list):
+            out.append(m)
+            continue
+        content = m["content"]
+        filtered = [b for b in content if not _is_empty_text_block(b)]
+        if len(filtered) == len(content):
+            out.append(m)
+        elif filtered:
+            out.append({**m, "content": filtered})
+    return out
+
+
+def _is_empty_text_block(block: Any) -> bool:
+    if not isinstance(block, dict) or block.get("type") != "text":
+        return False
+    text = block.get("text")
+    return not isinstance(text, str) or not text.strip()
+
+
 def process_anthropic_headers(headers: Union[httpx.Headers, dict]) -> dict:
    openai_headers = {}
    if "anthropic-ratelimit-requests-limit" in headers:
--- a/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
+++ b/litellm/llms/anthropic/experimental_pass_through/messages/handler.py
@ -12,6 +12,9 @@ from typing import Any, AsyncIterator, Coroutine, Dict, List, Optional, Union, c

 import litellm
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.anthropic.common_utils import (
+    strip_empty_text_blocks_from_anthropic_messages,
+)
 from litellm.llms.base_llm.anthropic_messages.transformation import (
    BaseAnthropicMessagesConfig,
 )
@ -188,8 +191,20 @@ async def anthropic_messages(
    **kwargs,
 ) -> Union[AnthropicMessagesResponse, AsyncIterator]:
    """
-    Async: Make llm api request in Anthropic /messages API spec
+    Async: Make llm api request in Anthropic /messages API spec.
+
+    Runs the empty-text-block sanitizer before any backend dispatch.
    """
+    # Anthropic's API rejects requests containing empty / whitespace-only
+    # text content blocks with "messages: text content blocks must be
+    # non-empty".  Multi-turn tool-use clients (e.g. Claude Code) routinely
+    # loop assistant responses that contain {"type": "text", "text": ""}
+    # alongside tool_use blocks back as conversation history, which then
+    # causes the next /v1/messages call to 400.  /v1/chat/completions
+    # already handles this in anthropic_messages_pt; sanitize the native
+    # Anthropic Messages path here for the same guarantee.  See #22930.
+    messages = strip_empty_text_blocks_from_anthropic_messages(messages)
+
    original_stream = stream or kwargs.get(
        "_websearch_interception_converted_stream", False
    )
@ -336,6 +351,11 @@ def anthropic_messages_handler(
    """
    from litellm.types.utils import LlmProviders

+    # Sanitize empty text blocks here too so the sync entry point
+    # (litellm.messages.create -> anthropic_messages_handler) gets the same
+    # protection as the async wrapper.  Idempotent when called twice.
+    messages = strip_empty_text_blocks_from_anthropic_messages(messages)
+
    metadata = validate_anthropic_api_metadata(metadata)

    local_vars = locals()
--- a/litellm/llms/bedrock/chat/mantle/transformation.py
+++ b/litellm/llms/bedrock/chat/mantle/transformation.py
@ -21,7 +21,9 @@ if TYPE_CHECKING:
 else:
    LiteLLMLoggingObj = Any

-MANTLE_ENDPOINT_TEMPLATE = "https://bedrock-mantle.{region}.api.aws/v1/messages"
+MANTLE_ENDPOINT_TEMPLATE = (
+    "https://bedrock-mantle.{region}.api.aws/anthropic/v1/messages"
+)


 class AmazonMantleConfig(AmazonAnthropicClaudeConfig):
--- a/litellm/llms/bedrock/messages/mantle_transformation.py
+++ b/litellm/llms/bedrock/messages/mantle_transformation.py
@ -20,7 +20,9 @@ if TYPE_CHECKING:
 else:
    LiteLLMLoggingObj = Any

-MANTLE_ENDPOINT_TEMPLATE = "https://bedrock-mantle.{region}.api.aws/v1/messages"
+MANTLE_ENDPOINT_TEMPLATE = (
+    "https://bedrock-mantle.{region}.api.aws/anthropic/v1/messages"
+)


 class AmazonMantleMessagesConfig(AmazonAnthropicClaudeMessagesConfig):
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -1448,6 +1448,35 @@
        "supports_native_structured_output": true,
        "supports_minimal_reasoning_effort": true
    },
+    "jp.anthropic.claude-sonnet-4-6": {
+        "cache_creation_input_token_cost": 4.125e-06,
+        "cache_read_input_token_cost": 3.3e-07,
+        "input_cost_per_token": 3.3e-06,
+        "litellm_provider": "bedrock_converse",
+        "max_input_tokens": 1000000,
+        "max_output_tokens": 64000,
+        "max_tokens": 64000,
+        "mode": "chat",
+        "output_cost_per_token": 1.65e-05,
+        "search_context_cost_per_query": {
+            "search_context_size_high": 0.01,
+            "search_context_size_low": 0.01,
+            "search_context_size_medium": 0.01
+        },
+        "supports_assistant_prefill": true,
+        "supports_computer_use": true,
+        "supports_function_calling": true,
+        "supports_pdf_input": true,
+        "supports_prompt_caching": true,
+        "supports_reasoning": true,
+        "supports_response_schema": true,
+        "supports_max_reasoning_effort": true,
+        "supports_tool_choice": true,
+        "supports_vision": true,
+        "tool_use_system_prompt_tokens": 346,
+        "supports_native_structured_output": true,
+        "supports_minimal_reasoning_effort": true
+    },
    "anthropic.claude-sonnet-4-20250514-v1:0": {
        "cache_creation_input_token_cost": 3.75e-06,
        "cache_read_input_token_cost": 3e-07,
--- a/tests/llm_translation/test_bedrock_mantle.py
+++ b/tests/llm_translation/test_bedrock_mantle.py
@ -23,7 +23,7 @@ from litellm.llms.custom_httpx.http_handler import HTTPHandler

 MODEL = "bedrock/mantle/anthropic.claude-mythos-preview"
 REGION = "us-east-1"
-EXPECTED_URL = f"https://bedrock-mantle.{REGION}.api.aws/v1/messages"
+EXPECTED_URL = f"https://bedrock-mantle.{REGION}.api.aws/anthropic/v1/messages"

 FAKE_ANTHROPIC_RESPONSE = {
    "id": "msg_fake123",
@ -143,7 +143,7 @@ def test_mantle_region_reflected_in_url():
                pass

            call_kwargs = mock_post.call_args.kwargs
-            expected = f"https://bedrock-mantle.{region}.api.aws/v1/messages"
+            expected = f"https://bedrock-mantle.{region}.api.aws/anthropic/v1/messages"
            assert (
                call_kwargs["url"] == expected
            ), f"region={region}: expected URL {expected}, got {call_kwargs['url']}"
--- a/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py
+++ b/tests/test_litellm/llms/anthropic/experimental_pass_through/messages/test_anthropic_experimental_pass_through_messages_handler.py
@ -64,6 +64,51 @@ def test_anthropic_experimental_pass_through_messages_handler_dynamic_api_key_an
        assert mock_completion.call_args.kwargs["custom_key"] == "custom_value"


+@pytest.mark.asyncio
+async def test_anthropic_messages_sanitizes_empty_text_blocks_before_dispatch():
+    """Regression test for #22930.  The unified /v1/messages path must
+    strip empty text blocks before forwarding, otherwise Anthropic
+    returns 400 "text content blocks must be non-empty"."""
+    from litellm.llms.anthropic.experimental_pass_through.messages import handler
+
+    msgs = [
+        {
+            "role": "assistant",
+            "content": [
+                {"type": "text", "text": ""},
+                {"type": "tool_use", "id": "t", "name": "B", "input": {}},
+            ],
+        }
+    ]
+    captured = {}
+
+    def fake_handler(*args, **kwargs):
+        captured["messages"] = kwargs.get("messages")
+        return "stub"
+
+    fake_loop = MagicMock()
+    fake_loop.run_in_executor = lambda _e, func: _async_return(func())
+
+    with (
+        patch.object(handler, "anthropic_messages_handler", side_effect=fake_handler),
+        patch("asyncio.get_event_loop", return_value=fake_loop),
+    ):
+        await handler.anthropic_messages(
+            max_tokens=100,
+            messages=msgs,
+            model="anthropic/claude-sonnet-4-5-20250929",
+            custom_llm_provider="anthropic",
+            api_key="k",
+        )
+
+    assert [b["type"] for b in captured["messages"][0]["content"]] == ["tool_use"]
+    assert len(msgs[0]["content"]) == 2  # caller untouched
+
+
+async def _async_return(value):
+    return value
+
+
 def test_anthropic_experimental_pass_through_messages_handler_custom_llm_provider():
    """
    Test that litellm.completion is called when a custom LLM provider is given
--- a/tests/test_litellm/llms/anthropic/test_anthropic_common_utils.py
+++ b/tests/test_litellm/llms/anthropic/test_anthropic_common_utils.py
@ -1229,6 +1229,104 @@ class TestAnthropicThinkingSignatureSelfHeal:
        assert "thinking" not in data
        assert data["messages"] == []

+    def test_strip_empty_text_blocks_from_anthropic_messages(self):
+        """Covers #22930.  The core regression scenario: an assistant message
+        with an empty text block alongside ``tool_use`` loses the empty block
+        and keeps the ``tool_use``; a whole message that reduces to no blocks
+        is dropped; whitespace-only text counts as empty; the caller's list
+        is never mutated."""
+        from litellm.llms.anthropic.common_utils import (
+            strip_empty_text_blocks_from_anthropic_messages,
+        )
+
+        tu = {"type": "tool_use", "id": "x", "name": "Bash", "input": {}}
+        msgs = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": [{"type": "text", "text": "  \n "}, tu]},
+            {"role": "assistant", "content": [{"type": "text", "text": ""}]},
+        ]
+        out = strip_empty_text_blocks_from_anthropic_messages(msgs)
+        assert len(out) == 2 and out[0] is msgs[0]
+        assert [b["type"] for b in out[1]["content"]] == ["tool_use"]
+        assert len(msgs[1]["content"]) == 2  # caller's content unchanged
+
+    def test_strip_empty_text_blocks_preserves_thinking_blocks(self):
+        from litellm.llms.anthropic.common_utils import (
+            strip_empty_text_blocks_from_anthropic_messages,
+        )
+
+        msgs = [
+            {
+                "role": "assistant",
+                "content": [
+                    {"type": "thinking", "thinking": "plan", "signature": "sig"},
+                    {"type": "text", "text": ""},
+                ],
+            }
+        ]
+        out = strip_empty_text_blocks_from_anthropic_messages(msgs)
+        assert [b["type"] for b in out[0]["content"]] == ["thinking"]
+
+    def test_strip_empty_text_blocks_treats_null_text_as_empty(self):
+        from litellm.llms.anthropic.common_utils import (
+            strip_empty_text_blocks_from_anthropic_messages,
+        )
+
+        msgs = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": None},
+                    {"type": "tool_result", "tool_use_id": "x", "content": "y"},
+                ],
+            }
+        ]
+        out = strip_empty_text_blocks_from_anthropic_messages(msgs)
+        assert [b["type"] for b in out[0]["content"]] == ["tool_result"]
+
+    def test_strip_empty_text_blocks_treats_missing_text_key_as_empty(self):
+        from litellm.llms.anthropic.common_utils import (
+            strip_empty_text_blocks_from_anthropic_messages,
+        )
+
+        msgs = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text"},
+                    {"type": "tool_result", "tool_use_id": "x", "content": "y"},
+                ],
+            }
+        ]
+        out = strip_empty_text_blocks_from_anthropic_messages(msgs)
+        assert [b["type"] for b in out[0]["content"]] == ["tool_result"]
+
+    def test_strip_empty_text_blocks_leaves_non_empty_text_alone(self):
+        from litellm.llms.anthropic.common_utils import (
+            strip_empty_text_blocks_from_anthropic_messages,
+        )
+
+        msgs = [{"role": "assistant", "content": [{"type": "text", "text": "hi"}]}]
+        out = strip_empty_text_blocks_from_anthropic_messages(msgs)
+        assert out[0] is msgs[0]  # untouched messages keep identity
+
+    def test_strip_empty_text_blocks_treats_non_string_text_value_as_empty(self):
+        from litellm.llms.anthropic.common_utils import (
+            strip_empty_text_blocks_from_anthropic_messages,
+        )
+
+        msgs = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": 123},
+                    {"type": "tool_result", "tool_use_id": "x", "content": "y"},
+                ],
+            }
+        ]
+        out = strip_empty_text_blocks_from_anthropic_messages(msgs)
+        assert [b["type"] for b in out[0]["content"]] == ["tool_result"]
+
    def test_anthropic_messages_config_http_retry_helpers(self):
        import httpx

--- a/tests/test_litellm/llms/bedrock/test_mantle.py
+++ b/tests/test_litellm/llms/bedrock/test_mantle.py
@ -53,7 +53,7 @@ def test_mantle_url_construction():
        optional_params={"aws_region_name": "us-east-1"},
        litellm_params={},
    )
-    assert url == "https://bedrock-mantle.us-east-1.api.aws/v1/messages"
+    assert url == "https://bedrock-mantle.us-east-1.api.aws/anthropic/v1/messages"


 def test_mantle_url_construction_different_region():
@ -65,7 +65,7 @@ def test_mantle_url_construction_different_region():
        optional_params={"aws_region_name": "us-west-2"},
        litellm_params={},
    )
-    assert url == "https://bedrock-mantle.us-west-2.api.aws/v1/messages"
+    assert url == "https://bedrock-mantle.us-west-2.api.aws/anthropic/v1/messages"


 def test_get_bedrock_chat_config_returns_mantle_config():
@ -89,7 +89,7 @@ def test_mantle_messages_url_construction():
        optional_params={"aws_region_name": "us-east-1"},
        litellm_params={},
    )
-    assert url == "https://bedrock-mantle.us-east-1.api.aws/v1/messages"
+    assert url == "https://bedrock-mantle.us-east-1.api.aws/anthropic/v1/messages"


 def test_mantle_transform_request_strips_prefix_and_adds_model():
--- a/tests/test_litellm/test_claude_sonnet_4_6_config.py
+++ b/tests/test_litellm/test_claude_sonnet_4_6_config.py
@ -0,0 +1,80 @@
+"""
+Test Claude Sonnet 4.6 model configurations for Bedrock cross-region inference.
+
+Pins the set of region-prefixed entries in model_prices_and_context_window.json
+so future drops of a region (or pricing drift between regions) is caught.
+
+https://github.com/BerriAI/litellm/issues/22972
+"""
+
+import json
+import os
+
+
+def test_bedrock_sonnet_4_6_region_prefixes():
+    """All documented Bedrock cross-region inference prefixes for
+    claude-sonnet-4-6 must be present in model_prices_and_context_window.json.
+    """
+    json_path = os.path.join(
+        os.path.dirname(__file__), "../../model_prices_and_context_window.json"
+    )
+    with open(json_path) as f:
+        model_data = json.load(f)
+
+    bedrock_sonnet_4_6_models = [
+        "anthropic.claude-sonnet-4-6",
+        "global.anthropic.claude-sonnet-4-6",
+        "us.anthropic.claude-sonnet-4-6",
+        "eu.anthropic.claude-sonnet-4-6",
+        "au.anthropic.claude-sonnet-4-6",
+        "jp.anthropic.claude-sonnet-4-6",
+    ]
+
+    for model in bedrock_sonnet_4_6_models:
+        assert model in model_data, f"Model {model} not found in config"
+        model_info = model_data[model]
+
+        assert (
+            model_info["litellm_provider"] == "bedrock_converse"
+        ), f"{model} should use bedrock_converse, got {model_info['litellm_provider']}"
+        assert model_info["mode"] == "chat"
+        assert model_info["max_input_tokens"] == 1000000
+        assert model_info["max_output_tokens"] == 64000
+        assert model_info["max_tokens"] == 64000
+        assert model_info.get("supports_vision") is True
+        assert model_info.get("supports_computer_use") is True
+        assert model_info.get("supports_function_calling") is True
+        assert model_info.get("supports_tool_choice") is True
+        assert model_info.get("supports_prompt_caching") is True
+        assert model_info.get("supports_response_schema") is True
+        assert model_info.get("supports_pdf_input") is True
+        assert model_info.get("supports_assistant_prefill") is True
+        assert model_info.get("supports_reasoning") is True
+        assert model_info.get("tool_use_system_prompt_tokens") == 346
+
+
+def test_bedrock_sonnet_4_6_jp_matches_other_regional_pricing():
+    """The jp. cross-region inference profile shares pricing with the other
+    regional profiles (us./eu./au.), which carry a 10% premium over the
+    base/global entries.
+    """
+    json_path = os.path.join(
+        os.path.dirname(__file__), "../../model_prices_and_context_window.json"
+    )
+    with open(json_path) as f:
+        model_data = json.load(f)
+
+    jp_info = model_data["jp.anthropic.claude-sonnet-4-6"]
+    au_info = model_data["au.anthropic.claude-sonnet-4-6"]
+
+    pricing_fields = [
+        "input_cost_per_token",
+        "output_cost_per_token",
+        "cache_creation_input_token_cost",
+        "cache_read_input_token_cost",
+    ]
+    for field in pricing_fields:
+        assert jp_info[field] == au_info[field], (
+            f"{field} mismatch between jp. and au. variants: "
+            f"jp={jp_info[field]}, au={au_info[field]}"
+        )