fix(bedrock-mantle): use /anthropic/v1/messages path for Mantle endpo… (#27976)

* fix(bedrock-mantle): use /anthropic/v1/messages path for Mantle endpoint (#27943)

* docs: add one-line docstring to _disable_debugging (#27894)

Squash-merged by litellm-agent from oss-agent-shin's PR.

* Add jp. Bedrock cross-region inference profile for claude-sonnet-4-6 (#27831)

Squash-merged by litellm-agent from Cyberfilo's PR.

* Sanitize empty text content blocks on /v1/messages (#27832)

Squash-merged by litellm-agent from Cyberfilo's PR.

* fix(bedrock-mantle): use /anthropic/v1/messages path for Mantle endpoint

The bedrock-mantle gateway (Claude Mythos Preview) serves the Anthropic
Messages API at /anthropic/v1/messages; /v1/messages returns 404 Not
Found. Both AmazonMantleConfig (chat/completions caller route) and
AmazonMantleMessagesConfig (anthropic-messages caller route) hardcoded
the wrong path, so every Mantle request 404'd before reaching the model.

Per the Anthropic docs: "[Claude in Amazon Bedrock] uses the Messages
API at /anthropic/v1/messages with SSE streaming."
https://platform.claude.com/docs/en/api/claude-on-amazon-bedrock

Confirmed independently against the live endpoint:
  /v1/chat/completions      -> 200 OK
  /v1/messages              -> 404 Not Found  (what litellm used)
  /anthropic/v1/messages    -> 200 OK         (Claude only)

Adds a regression test asserting both Mantle configs build the
/anthropic/v1/messages path, and updates the existing assertions that
encoded the wrong path.

---------

Co-authored-by: oss-agent-shin <ext-agent-shin@berri.ai>
Co-authored-by: Filippo Menghi <113345637+Cyberfilo@users.noreply.github.com>

* fix: sanitize empty text blocks in sync anthropic_messages_handler path

Co-authored-by: Yassin Kortam <yassin@berri.ai>

---------

Co-authored-by: João Costa <13508071+jpv-costa@users.noreply.github.com>
Co-authored-by: oss-agent-shin <ext-agent-shin@berri.ai>
Co-authored-by: Filippo Menghi <113345637+Cyberfilo@users.noreply.github.com>
Co-authored-by: Cursor Agent <cursoragent@cursor.com>
Co-authored-by: Yassin Kortam <yassin@berri.ai>
This commit is contained in:
ishaan-berri 2026-05-15 13:31:59 -07:00 committed by GitHub
parent 50df072d95
commit f9ba70d357
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 328 additions and 8 deletions

View File

@ -404,6 +404,7 @@ def _turn_on_debug():
def _disable_debugging():
"""Disable the package, router, and proxy verbose loggers."""
verbose_logger.disabled = True
verbose_router_logger.disabled = True
verbose_proxy_logger.disabled = True

View File

@ -832,6 +832,49 @@ def strip_thinking_blocks_from_anthropic_messages_request_dict(
data.pop("thinking", None)
def strip_empty_text_blocks_from_anthropic_messages(
messages: List[Any],
) -> List[Any]:
"""
Return a new message list with empty or whitespace-only ``{"type": "text"}``
content blocks removed.
Anthropic's API rejects requests containing such blocks with
``"messages: text content blocks must be non-empty"``, but assistant
messages from Anthropic routinely arrive with ``{"type": "text", "text": ""}``
alongside ``tool_use`` blocks (see anthropics/anthropic-sdk-python#461).
Multi-turn tool-use clients (e.g. Claude Code) loop these prior responses
back as conversation history, which then causes the next request to 400
on the unified ``/v1/messages`` path. ``/v1/chat/completions`` already
handles this in ``anthropic_messages_pt``; this helper provides the
equivalent guarantee for the native Anthropic Messages path.
Messages whose content is a list and becomes empty after stripping are
omitted, matching :func:`strip_thinking_blocks_from_anthropic_messages`.
The caller's list and its content blocks are never mutated; modified
messages are returned as shallow copies with a fresh content list.
"""
out: List[Any] = []
for m in messages:
if not isinstance(m, dict) or not isinstance(m.get("content"), list):
out.append(m)
continue
content = m["content"]
filtered = [b for b in content if not _is_empty_text_block(b)]
if len(filtered) == len(content):
out.append(m)
elif filtered:
out.append({**m, "content": filtered})
return out
def _is_empty_text_block(block: Any) -> bool:
if not isinstance(block, dict) or block.get("type") != "text":
return False
text = block.get("text")
return not isinstance(text, str) or not text.strip()
def process_anthropic_headers(headers: Union[httpx.Headers, dict]) -> dict:
openai_headers = {}
if "anthropic-ratelimit-requests-limit" in headers:

View File

@ -12,6 +12,9 @@ from typing import Any, AsyncIterator, Coroutine, Dict, List, Optional, Union, c
import litellm
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.anthropic.common_utils import (
strip_empty_text_blocks_from_anthropic_messages,
)
from litellm.llms.base_llm.anthropic_messages.transformation import (
BaseAnthropicMessagesConfig,
)
@ -188,8 +191,20 @@ async def anthropic_messages(
**kwargs,
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
"""
Async: Make llm api request in Anthropic /messages API spec
Async: Make llm api request in Anthropic /messages API spec.
Runs the empty-text-block sanitizer before any backend dispatch.
"""
# Anthropic's API rejects requests containing empty / whitespace-only
# text content blocks with "messages: text content blocks must be
# non-empty". Multi-turn tool-use clients (e.g. Claude Code) routinely
# loop assistant responses that contain {"type": "text", "text": ""}
# alongside tool_use blocks back as conversation history, which then
# causes the next /v1/messages call to 400. /v1/chat/completions
# already handles this in anthropic_messages_pt; sanitize the native
# Anthropic Messages path here for the same guarantee. See #22930.
messages = strip_empty_text_blocks_from_anthropic_messages(messages)
original_stream = stream or kwargs.get(
"_websearch_interception_converted_stream", False
)
@ -336,6 +351,11 @@ def anthropic_messages_handler(
"""
from litellm.types.utils import LlmProviders
# Sanitize empty text blocks here too so the sync entry point
# (litellm.messages.create -> anthropic_messages_handler) gets the same
# protection as the async wrapper. Idempotent when called twice.
messages = strip_empty_text_blocks_from_anthropic_messages(messages)
metadata = validate_anthropic_api_metadata(metadata)
local_vars = locals()

View File

@ -21,7 +21,9 @@ if TYPE_CHECKING:
else:
LiteLLMLoggingObj = Any
MANTLE_ENDPOINT_TEMPLATE = "https://bedrock-mantle.{region}.api.aws/v1/messages"
MANTLE_ENDPOINT_TEMPLATE = (
"https://bedrock-mantle.{region}.api.aws/anthropic/v1/messages"
)
class AmazonMantleConfig(AmazonAnthropicClaudeConfig):

View File

@ -20,7 +20,9 @@ if TYPE_CHECKING:
else:
LiteLLMLoggingObj = Any
MANTLE_ENDPOINT_TEMPLATE = "https://bedrock-mantle.{region}.api.aws/v1/messages"
MANTLE_ENDPOINT_TEMPLATE = (
"https://bedrock-mantle.{region}.api.aws/anthropic/v1/messages"
)
class AmazonMantleMessagesConfig(AmazonAnthropicClaudeMessagesConfig):

View File

@ -1448,6 +1448,35 @@
"supports_native_structured_output": true,
"supports_minimal_reasoning_effort": true
},
"jp.anthropic.claude-sonnet-4-6": {
"cache_creation_input_token_cost": 4.125e-06,
"cache_read_input_token_cost": 3.3e-07,
"input_cost_per_token": 3.3e-06,
"litellm_provider": "bedrock_converse",
"max_input_tokens": 1000000,
"max_output_tokens": 64000,
"max_tokens": 64000,
"mode": "chat",
"output_cost_per_token": 1.65e-05,
"search_context_cost_per_query": {
"search_context_size_high": 0.01,
"search_context_size_low": 0.01,
"search_context_size_medium": 0.01
},
"supports_assistant_prefill": true,
"supports_computer_use": true,
"supports_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_max_reasoning_effort": true,
"supports_tool_choice": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_native_structured_output": true,
"supports_minimal_reasoning_effort": true
},
"anthropic.claude-sonnet-4-20250514-v1:0": {
"cache_creation_input_token_cost": 3.75e-06,
"cache_read_input_token_cost": 3e-07,

View File

@ -23,7 +23,7 @@ from litellm.llms.custom_httpx.http_handler import HTTPHandler
MODEL = "bedrock/mantle/anthropic.claude-mythos-preview"
REGION = "us-east-1"
EXPECTED_URL = f"https://bedrock-mantle.{REGION}.api.aws/v1/messages"
EXPECTED_URL = f"https://bedrock-mantle.{REGION}.api.aws/anthropic/v1/messages"
FAKE_ANTHROPIC_RESPONSE = {
"id": "msg_fake123",
@ -143,7 +143,7 @@ def test_mantle_region_reflected_in_url():
pass
call_kwargs = mock_post.call_args.kwargs
expected = f"https://bedrock-mantle.{region}.api.aws/v1/messages"
expected = f"https://bedrock-mantle.{region}.api.aws/anthropic/v1/messages"
assert (
call_kwargs["url"] == expected
), f"region={region}: expected URL {expected}, got {call_kwargs['url']}"

View File

@ -64,6 +64,51 @@ def test_anthropic_experimental_pass_through_messages_handler_dynamic_api_key_an
assert mock_completion.call_args.kwargs["custom_key"] == "custom_value"
@pytest.mark.asyncio
async def test_anthropic_messages_sanitizes_empty_text_blocks_before_dispatch():
"""Regression test for #22930. The unified /v1/messages path must
strip empty text blocks before forwarding, otherwise Anthropic
returns 400 "text content blocks must be non-empty"."""
from litellm.llms.anthropic.experimental_pass_through.messages import handler
msgs = [
{
"role": "assistant",
"content": [
{"type": "text", "text": ""},
{"type": "tool_use", "id": "t", "name": "B", "input": {}},
],
}
]
captured = {}
def fake_handler(*args, **kwargs):
captured["messages"] = kwargs.get("messages")
return "stub"
fake_loop = MagicMock()
fake_loop.run_in_executor = lambda _e, func: _async_return(func())
with (
patch.object(handler, "anthropic_messages_handler", side_effect=fake_handler),
patch("asyncio.get_event_loop", return_value=fake_loop),
):
await handler.anthropic_messages(
max_tokens=100,
messages=msgs,
model="anthropic/claude-sonnet-4-5-20250929",
custom_llm_provider="anthropic",
api_key="k",
)
assert [b["type"] for b in captured["messages"][0]["content"]] == ["tool_use"]
assert len(msgs[0]["content"]) == 2 # caller untouched
async def _async_return(value):
return value
def test_anthropic_experimental_pass_through_messages_handler_custom_llm_provider():
"""
Test that litellm.completion is called when a custom LLM provider is given

View File

@ -1229,6 +1229,104 @@ class TestAnthropicThinkingSignatureSelfHeal:
assert "thinking" not in data
assert data["messages"] == []
def test_strip_empty_text_blocks_from_anthropic_messages(self):
"""Covers #22930. The core regression scenario: an assistant message
with an empty text block alongside ``tool_use`` loses the empty block
and keeps the ``tool_use``; a whole message that reduces to no blocks
is dropped; whitespace-only text counts as empty; the caller's list
is never mutated."""
from litellm.llms.anthropic.common_utils import (
strip_empty_text_blocks_from_anthropic_messages,
)
tu = {"type": "tool_use", "id": "x", "name": "Bash", "input": {}}
msgs = [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": [{"type": "text", "text": " \n "}, tu]},
{"role": "assistant", "content": [{"type": "text", "text": ""}]},
]
out = strip_empty_text_blocks_from_anthropic_messages(msgs)
assert len(out) == 2 and out[0] is msgs[0]
assert [b["type"] for b in out[1]["content"]] == ["tool_use"]
assert len(msgs[1]["content"]) == 2 # caller's content unchanged
def test_strip_empty_text_blocks_preserves_thinking_blocks(self):
from litellm.llms.anthropic.common_utils import (
strip_empty_text_blocks_from_anthropic_messages,
)
msgs = [
{
"role": "assistant",
"content": [
{"type": "thinking", "thinking": "plan", "signature": "sig"},
{"type": "text", "text": ""},
],
}
]
out = strip_empty_text_blocks_from_anthropic_messages(msgs)
assert [b["type"] for b in out[0]["content"]] == ["thinking"]
def test_strip_empty_text_blocks_treats_null_text_as_empty(self):
from litellm.llms.anthropic.common_utils import (
strip_empty_text_blocks_from_anthropic_messages,
)
msgs = [
{
"role": "user",
"content": [
{"type": "text", "text": None},
{"type": "tool_result", "tool_use_id": "x", "content": "y"},
],
}
]
out = strip_empty_text_blocks_from_anthropic_messages(msgs)
assert [b["type"] for b in out[0]["content"]] == ["tool_result"]
def test_strip_empty_text_blocks_treats_missing_text_key_as_empty(self):
from litellm.llms.anthropic.common_utils import (
strip_empty_text_blocks_from_anthropic_messages,
)
msgs = [
{
"role": "user",
"content": [
{"type": "text"},
{"type": "tool_result", "tool_use_id": "x", "content": "y"},
],
}
]
out = strip_empty_text_blocks_from_anthropic_messages(msgs)
assert [b["type"] for b in out[0]["content"]] == ["tool_result"]
def test_strip_empty_text_blocks_leaves_non_empty_text_alone(self):
from litellm.llms.anthropic.common_utils import (
strip_empty_text_blocks_from_anthropic_messages,
)
msgs = [{"role": "assistant", "content": [{"type": "text", "text": "hi"}]}]
out = strip_empty_text_blocks_from_anthropic_messages(msgs)
assert out[0] is msgs[0] # untouched messages keep identity
def test_strip_empty_text_blocks_treats_non_string_text_value_as_empty(self):
from litellm.llms.anthropic.common_utils import (
strip_empty_text_blocks_from_anthropic_messages,
)
msgs = [
{
"role": "user",
"content": [
{"type": "text", "text": 123},
{"type": "tool_result", "tool_use_id": "x", "content": "y"},
],
}
]
out = strip_empty_text_blocks_from_anthropic_messages(msgs)
assert [b["type"] for b in out[0]["content"]] == ["tool_result"]
def test_anthropic_messages_config_http_retry_helpers(self):
import httpx

View File

@ -53,7 +53,7 @@ def test_mantle_url_construction():
optional_params={"aws_region_name": "us-east-1"},
litellm_params={},
)
assert url == "https://bedrock-mantle.us-east-1.api.aws/v1/messages"
assert url == "https://bedrock-mantle.us-east-1.api.aws/anthropic/v1/messages"
def test_mantle_url_construction_different_region():
@ -65,7 +65,7 @@ def test_mantle_url_construction_different_region():
optional_params={"aws_region_name": "us-west-2"},
litellm_params={},
)
assert url == "https://bedrock-mantle.us-west-2.api.aws/v1/messages"
assert url == "https://bedrock-mantle.us-west-2.api.aws/anthropic/v1/messages"
def test_get_bedrock_chat_config_returns_mantle_config():
@ -89,7 +89,7 @@ def test_mantle_messages_url_construction():
optional_params={"aws_region_name": "us-east-1"},
litellm_params={},
)
assert url == "https://bedrock-mantle.us-east-1.api.aws/v1/messages"
assert url == "https://bedrock-mantle.us-east-1.api.aws/anthropic/v1/messages"
def test_mantle_transform_request_strips_prefix_and_adds_model():

View File

@ -0,0 +1,80 @@
"""
Test Claude Sonnet 4.6 model configurations for Bedrock cross-region inference.
Pins the set of region-prefixed entries in model_prices_and_context_window.json
so future drops of a region (or pricing drift between regions) is caught.
https://github.com/BerriAI/litellm/issues/22972
"""
import json
import os
def test_bedrock_sonnet_4_6_region_prefixes():
"""All documented Bedrock cross-region inference prefixes for
claude-sonnet-4-6 must be present in model_prices_and_context_window.json.
"""
json_path = os.path.join(
os.path.dirname(__file__), "../../model_prices_and_context_window.json"
)
with open(json_path) as f:
model_data = json.load(f)
bedrock_sonnet_4_6_models = [
"anthropic.claude-sonnet-4-6",
"global.anthropic.claude-sonnet-4-6",
"us.anthropic.claude-sonnet-4-6",
"eu.anthropic.claude-sonnet-4-6",
"au.anthropic.claude-sonnet-4-6",
"jp.anthropic.claude-sonnet-4-6",
]
for model in bedrock_sonnet_4_6_models:
assert model in model_data, f"Model {model} not found in config"
model_info = model_data[model]
assert (
model_info["litellm_provider"] == "bedrock_converse"
), f"{model} should use bedrock_converse, got {model_info['litellm_provider']}"
assert model_info["mode"] == "chat"
assert model_info["max_input_tokens"] == 1000000
assert model_info["max_output_tokens"] == 64000
assert model_info["max_tokens"] == 64000
assert model_info.get("supports_vision") is True
assert model_info.get("supports_computer_use") is True
assert model_info.get("supports_function_calling") is True
assert model_info.get("supports_tool_choice") is True
assert model_info.get("supports_prompt_caching") is True
assert model_info.get("supports_response_schema") is True
assert model_info.get("supports_pdf_input") is True
assert model_info.get("supports_assistant_prefill") is True
assert model_info.get("supports_reasoning") is True
assert model_info.get("tool_use_system_prompt_tokens") == 346
def test_bedrock_sonnet_4_6_jp_matches_other_regional_pricing():
"""The jp. cross-region inference profile shares pricing with the other
regional profiles (us./eu./au.), which carry a 10% premium over the
base/global entries.
"""
json_path = os.path.join(
os.path.dirname(__file__), "../../model_prices_and_context_window.json"
)
with open(json_path) as f:
model_data = json.load(f)
jp_info = model_data["jp.anthropic.claude-sonnet-4-6"]
au_info = model_data["au.anthropic.claude-sonnet-4-6"]
pricing_fields = [
"input_cost_per_token",
"output_cost_per_token",
"cache_creation_input_token_cost",
"cache_read_input_token_cost",
]
for field in pricing_fields:
assert jp_info[field] == au_info[field], (
f"{field} mismatch between jp. and au. variants: "
f"jp={jp_info[field]}, au={au_info[field]}"
)