fix(bedrock-mantle): use /anthropic/v1/messages path for Mantle endpo… (#27976)
* fix(bedrock-mantle): use /anthropic/v1/messages path for Mantle endpoint (#27943) * docs: add one-line docstring to _disable_debugging (#27894) Squash-merged by litellm-agent from oss-agent-shin's PR. * Add jp. Bedrock cross-region inference profile for claude-sonnet-4-6 (#27831) Squash-merged by litellm-agent from Cyberfilo's PR. * Sanitize empty text content blocks on /v1/messages (#27832) Squash-merged by litellm-agent from Cyberfilo's PR. * fix(bedrock-mantle): use /anthropic/v1/messages path for Mantle endpoint The bedrock-mantle gateway (Claude Mythos Preview) serves the Anthropic Messages API at /anthropic/v1/messages; /v1/messages returns 404 Not Found. Both AmazonMantleConfig (chat/completions caller route) and AmazonMantleMessagesConfig (anthropic-messages caller route) hardcoded the wrong path, so every Mantle request 404'd before reaching the model. Per the Anthropic docs: "[Claude in Amazon Bedrock] uses the Messages API at /anthropic/v1/messages with SSE streaming." https://platform.claude.com/docs/en/api/claude-on-amazon-bedrock Confirmed independently against the live endpoint: /v1/chat/completions -> 200 OK /v1/messages -> 404 Not Found (what litellm used) /anthropic/v1/messages -> 200 OK (Claude only) Adds a regression test asserting both Mantle configs build the /anthropic/v1/messages path, and updates the existing assertions that encoded the wrong path. --------- Co-authored-by: oss-agent-shin <ext-agent-shin@berri.ai> Co-authored-by: Filippo Menghi <113345637+Cyberfilo@users.noreply.github.com> * fix: sanitize empty text blocks in sync anthropic_messages_handler path Co-authored-by: Yassin Kortam <yassin@berri.ai> --------- Co-authored-by: João Costa <13508071+jpv-costa@users.noreply.github.com> Co-authored-by: oss-agent-shin <ext-agent-shin@berri.ai> Co-authored-by: Filippo Menghi <113345637+Cyberfilo@users.noreply.github.com> Co-authored-by: Cursor Agent <cursoragent@cursor.com> Co-authored-by: Yassin Kortam <yassin@berri.ai>
This commit is contained in:
parent
50df072d95
commit
f9ba70d357
@ -404,6 +404,7 @@ def _turn_on_debug():
|
||||
|
||||
|
||||
def _disable_debugging():
|
||||
"""Disable the package, router, and proxy verbose loggers."""
|
||||
verbose_logger.disabled = True
|
||||
verbose_router_logger.disabled = True
|
||||
verbose_proxy_logger.disabled = True
|
||||
|
||||
@ -832,6 +832,49 @@ def strip_thinking_blocks_from_anthropic_messages_request_dict(
|
||||
data.pop("thinking", None)
|
||||
|
||||
|
||||
def strip_empty_text_blocks_from_anthropic_messages(
|
||||
messages: List[Any],
|
||||
) -> List[Any]:
|
||||
"""
|
||||
Return a new message list with empty or whitespace-only ``{"type": "text"}``
|
||||
content blocks removed.
|
||||
|
||||
Anthropic's API rejects requests containing such blocks with
|
||||
``"messages: text content blocks must be non-empty"``, but assistant
|
||||
messages from Anthropic routinely arrive with ``{"type": "text", "text": ""}``
|
||||
alongside ``tool_use`` blocks (see anthropics/anthropic-sdk-python#461).
|
||||
Multi-turn tool-use clients (e.g. Claude Code) loop these prior responses
|
||||
back as conversation history, which then causes the next request to 400
|
||||
on the unified ``/v1/messages`` path. ``/v1/chat/completions`` already
|
||||
handles this in ``anthropic_messages_pt``; this helper provides the
|
||||
equivalent guarantee for the native Anthropic Messages path.
|
||||
|
||||
Messages whose content is a list and becomes empty after stripping are
|
||||
omitted, matching :func:`strip_thinking_blocks_from_anthropic_messages`.
|
||||
The caller's list and its content blocks are never mutated; modified
|
||||
messages are returned as shallow copies with a fresh content list.
|
||||
"""
|
||||
out: List[Any] = []
|
||||
for m in messages:
|
||||
if not isinstance(m, dict) or not isinstance(m.get("content"), list):
|
||||
out.append(m)
|
||||
continue
|
||||
content = m["content"]
|
||||
filtered = [b for b in content if not _is_empty_text_block(b)]
|
||||
if len(filtered) == len(content):
|
||||
out.append(m)
|
||||
elif filtered:
|
||||
out.append({**m, "content": filtered})
|
||||
return out
|
||||
|
||||
|
||||
def _is_empty_text_block(block: Any) -> bool:
|
||||
if not isinstance(block, dict) or block.get("type") != "text":
|
||||
return False
|
||||
text = block.get("text")
|
||||
return not isinstance(text, str) or not text.strip()
|
||||
|
||||
|
||||
def process_anthropic_headers(headers: Union[httpx.Headers, dict]) -> dict:
|
||||
openai_headers = {}
|
||||
if "anthropic-ratelimit-requests-limit" in headers:
|
||||
|
||||
@ -12,6 +12,9 @@ from typing import Any, AsyncIterator, Coroutine, Dict, List, Optional, Union, c
|
||||
|
||||
import litellm
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.llms.anthropic.common_utils import (
|
||||
strip_empty_text_blocks_from_anthropic_messages,
|
||||
)
|
||||
from litellm.llms.base_llm.anthropic_messages.transformation import (
|
||||
BaseAnthropicMessagesConfig,
|
||||
)
|
||||
@ -188,8 +191,20 @@ async def anthropic_messages(
|
||||
**kwargs,
|
||||
) -> Union[AnthropicMessagesResponse, AsyncIterator]:
|
||||
"""
|
||||
Async: Make llm api request in Anthropic /messages API spec
|
||||
Async: Make llm api request in Anthropic /messages API spec.
|
||||
|
||||
Runs the empty-text-block sanitizer before any backend dispatch.
|
||||
"""
|
||||
# Anthropic's API rejects requests containing empty / whitespace-only
|
||||
# text content blocks with "messages: text content blocks must be
|
||||
# non-empty". Multi-turn tool-use clients (e.g. Claude Code) routinely
|
||||
# loop assistant responses that contain {"type": "text", "text": ""}
|
||||
# alongside tool_use blocks back as conversation history, which then
|
||||
# causes the next /v1/messages call to 400. /v1/chat/completions
|
||||
# already handles this in anthropic_messages_pt; sanitize the native
|
||||
# Anthropic Messages path here for the same guarantee. See #22930.
|
||||
messages = strip_empty_text_blocks_from_anthropic_messages(messages)
|
||||
|
||||
original_stream = stream or kwargs.get(
|
||||
"_websearch_interception_converted_stream", False
|
||||
)
|
||||
@ -336,6 +351,11 @@ def anthropic_messages_handler(
|
||||
"""
|
||||
from litellm.types.utils import LlmProviders
|
||||
|
||||
# Sanitize empty text blocks here too so the sync entry point
|
||||
# (litellm.messages.create -> anthropic_messages_handler) gets the same
|
||||
# protection as the async wrapper. Idempotent when called twice.
|
||||
messages = strip_empty_text_blocks_from_anthropic_messages(messages)
|
||||
|
||||
metadata = validate_anthropic_api_metadata(metadata)
|
||||
|
||||
local_vars = locals()
|
||||
|
||||
@ -21,7 +21,9 @@ if TYPE_CHECKING:
|
||||
else:
|
||||
LiteLLMLoggingObj = Any
|
||||
|
||||
MANTLE_ENDPOINT_TEMPLATE = "https://bedrock-mantle.{region}.api.aws/v1/messages"
|
||||
MANTLE_ENDPOINT_TEMPLATE = (
|
||||
"https://bedrock-mantle.{region}.api.aws/anthropic/v1/messages"
|
||||
)
|
||||
|
||||
|
||||
class AmazonMantleConfig(AmazonAnthropicClaudeConfig):
|
||||
|
||||
@ -20,7 +20,9 @@ if TYPE_CHECKING:
|
||||
else:
|
||||
LiteLLMLoggingObj = Any
|
||||
|
||||
MANTLE_ENDPOINT_TEMPLATE = "https://bedrock-mantle.{region}.api.aws/v1/messages"
|
||||
MANTLE_ENDPOINT_TEMPLATE = (
|
||||
"https://bedrock-mantle.{region}.api.aws/anthropic/v1/messages"
|
||||
)
|
||||
|
||||
|
||||
class AmazonMantleMessagesConfig(AmazonAnthropicClaudeMessagesConfig):
|
||||
|
||||
@ -1448,6 +1448,35 @@
|
||||
"supports_native_structured_output": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
"jp.anthropic.claude-sonnet-4-6": {
|
||||
"cache_creation_input_token_cost": 4.125e-06,
|
||||
"cache_read_input_token_cost": 3.3e-07,
|
||||
"input_cost_per_token": 3.3e-06,
|
||||
"litellm_provider": "bedrock_converse",
|
||||
"max_input_tokens": 1000000,
|
||||
"max_output_tokens": 64000,
|
||||
"max_tokens": 64000,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1.65e-05,
|
||||
"search_context_cost_per_query": {
|
||||
"search_context_size_high": 0.01,
|
||||
"search_context_size_low": 0.01,
|
||||
"search_context_size_medium": 0.01
|
||||
},
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_computer_use": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_max_reasoning_effort": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_native_structured_output": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
"anthropic.claude-sonnet-4-20250514-v1:0": {
|
||||
"cache_creation_input_token_cost": 3.75e-06,
|
||||
"cache_read_input_token_cost": 3e-07,
|
||||
|
||||
@ -23,7 +23,7 @@ from litellm.llms.custom_httpx.http_handler import HTTPHandler
|
||||
|
||||
MODEL = "bedrock/mantle/anthropic.claude-mythos-preview"
|
||||
REGION = "us-east-1"
|
||||
EXPECTED_URL = f"https://bedrock-mantle.{REGION}.api.aws/v1/messages"
|
||||
EXPECTED_URL = f"https://bedrock-mantle.{REGION}.api.aws/anthropic/v1/messages"
|
||||
|
||||
FAKE_ANTHROPIC_RESPONSE = {
|
||||
"id": "msg_fake123",
|
||||
@ -143,7 +143,7 @@ def test_mantle_region_reflected_in_url():
|
||||
pass
|
||||
|
||||
call_kwargs = mock_post.call_args.kwargs
|
||||
expected = f"https://bedrock-mantle.{region}.api.aws/v1/messages"
|
||||
expected = f"https://bedrock-mantle.{region}.api.aws/anthropic/v1/messages"
|
||||
assert (
|
||||
call_kwargs["url"] == expected
|
||||
), f"region={region}: expected URL {expected}, got {call_kwargs['url']}"
|
||||
|
||||
@ -64,6 +64,51 @@ def test_anthropic_experimental_pass_through_messages_handler_dynamic_api_key_an
|
||||
assert mock_completion.call_args.kwargs["custom_key"] == "custom_value"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_anthropic_messages_sanitizes_empty_text_blocks_before_dispatch():
|
||||
"""Regression test for #22930. The unified /v1/messages path must
|
||||
strip empty text blocks before forwarding, otherwise Anthropic
|
||||
returns 400 "text content blocks must be non-empty"."""
|
||||
from litellm.llms.anthropic.experimental_pass_through.messages import handler
|
||||
|
||||
msgs = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "text", "text": ""},
|
||||
{"type": "tool_use", "id": "t", "name": "B", "input": {}},
|
||||
],
|
||||
}
|
||||
]
|
||||
captured = {}
|
||||
|
||||
def fake_handler(*args, **kwargs):
|
||||
captured["messages"] = kwargs.get("messages")
|
||||
return "stub"
|
||||
|
||||
fake_loop = MagicMock()
|
||||
fake_loop.run_in_executor = lambda _e, func: _async_return(func())
|
||||
|
||||
with (
|
||||
patch.object(handler, "anthropic_messages_handler", side_effect=fake_handler),
|
||||
patch("asyncio.get_event_loop", return_value=fake_loop),
|
||||
):
|
||||
await handler.anthropic_messages(
|
||||
max_tokens=100,
|
||||
messages=msgs,
|
||||
model="anthropic/claude-sonnet-4-5-20250929",
|
||||
custom_llm_provider="anthropic",
|
||||
api_key="k",
|
||||
)
|
||||
|
||||
assert [b["type"] for b in captured["messages"][0]["content"]] == ["tool_use"]
|
||||
assert len(msgs[0]["content"]) == 2 # caller untouched
|
||||
|
||||
|
||||
async def _async_return(value):
|
||||
return value
|
||||
|
||||
|
||||
def test_anthropic_experimental_pass_through_messages_handler_custom_llm_provider():
|
||||
"""
|
||||
Test that litellm.completion is called when a custom LLM provider is given
|
||||
|
||||
@ -1229,6 +1229,104 @@ class TestAnthropicThinkingSignatureSelfHeal:
|
||||
assert "thinking" not in data
|
||||
assert data["messages"] == []
|
||||
|
||||
def test_strip_empty_text_blocks_from_anthropic_messages(self):
|
||||
"""Covers #22930. The core regression scenario: an assistant message
|
||||
with an empty text block alongside ``tool_use`` loses the empty block
|
||||
and keeps the ``tool_use``; a whole message that reduces to no blocks
|
||||
is dropped; whitespace-only text counts as empty; the caller's list
|
||||
is never mutated."""
|
||||
from litellm.llms.anthropic.common_utils import (
|
||||
strip_empty_text_blocks_from_anthropic_messages,
|
||||
)
|
||||
|
||||
tu = {"type": "tool_use", "id": "x", "name": "Bash", "input": {}}
|
||||
msgs = [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": [{"type": "text", "text": " \n "}, tu]},
|
||||
{"role": "assistant", "content": [{"type": "text", "text": ""}]},
|
||||
]
|
||||
out = strip_empty_text_blocks_from_anthropic_messages(msgs)
|
||||
assert len(out) == 2 and out[0] is msgs[0]
|
||||
assert [b["type"] for b in out[1]["content"]] == ["tool_use"]
|
||||
assert len(msgs[1]["content"]) == 2 # caller's content unchanged
|
||||
|
||||
def test_strip_empty_text_blocks_preserves_thinking_blocks(self):
|
||||
from litellm.llms.anthropic.common_utils import (
|
||||
strip_empty_text_blocks_from_anthropic_messages,
|
||||
)
|
||||
|
||||
msgs = [
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "plan", "signature": "sig"},
|
||||
{"type": "text", "text": ""},
|
||||
],
|
||||
}
|
||||
]
|
||||
out = strip_empty_text_blocks_from_anthropic_messages(msgs)
|
||||
assert [b["type"] for b in out[0]["content"]] == ["thinking"]
|
||||
|
||||
def test_strip_empty_text_blocks_treats_null_text_as_empty(self):
|
||||
from litellm.llms.anthropic.common_utils import (
|
||||
strip_empty_text_blocks_from_anthropic_messages,
|
||||
)
|
||||
|
||||
msgs = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": None},
|
||||
{"type": "tool_result", "tool_use_id": "x", "content": "y"},
|
||||
],
|
||||
}
|
||||
]
|
||||
out = strip_empty_text_blocks_from_anthropic_messages(msgs)
|
||||
assert [b["type"] for b in out[0]["content"]] == ["tool_result"]
|
||||
|
||||
def test_strip_empty_text_blocks_treats_missing_text_key_as_empty(self):
|
||||
from litellm.llms.anthropic.common_utils import (
|
||||
strip_empty_text_blocks_from_anthropic_messages,
|
||||
)
|
||||
|
||||
msgs = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text"},
|
||||
{"type": "tool_result", "tool_use_id": "x", "content": "y"},
|
||||
],
|
||||
}
|
||||
]
|
||||
out = strip_empty_text_blocks_from_anthropic_messages(msgs)
|
||||
assert [b["type"] for b in out[0]["content"]] == ["tool_result"]
|
||||
|
||||
def test_strip_empty_text_blocks_leaves_non_empty_text_alone(self):
|
||||
from litellm.llms.anthropic.common_utils import (
|
||||
strip_empty_text_blocks_from_anthropic_messages,
|
||||
)
|
||||
|
||||
msgs = [{"role": "assistant", "content": [{"type": "text", "text": "hi"}]}]
|
||||
out = strip_empty_text_blocks_from_anthropic_messages(msgs)
|
||||
assert out[0] is msgs[0] # untouched messages keep identity
|
||||
|
||||
def test_strip_empty_text_blocks_treats_non_string_text_value_as_empty(self):
|
||||
from litellm.llms.anthropic.common_utils import (
|
||||
strip_empty_text_blocks_from_anthropic_messages,
|
||||
)
|
||||
|
||||
msgs = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": 123},
|
||||
{"type": "tool_result", "tool_use_id": "x", "content": "y"},
|
||||
],
|
||||
}
|
||||
]
|
||||
out = strip_empty_text_blocks_from_anthropic_messages(msgs)
|
||||
assert [b["type"] for b in out[0]["content"]] == ["tool_result"]
|
||||
|
||||
def test_anthropic_messages_config_http_retry_helpers(self):
|
||||
import httpx
|
||||
|
||||
|
||||
@ -53,7 +53,7 @@ def test_mantle_url_construction():
|
||||
optional_params={"aws_region_name": "us-east-1"},
|
||||
litellm_params={},
|
||||
)
|
||||
assert url == "https://bedrock-mantle.us-east-1.api.aws/v1/messages"
|
||||
assert url == "https://bedrock-mantle.us-east-1.api.aws/anthropic/v1/messages"
|
||||
|
||||
|
||||
def test_mantle_url_construction_different_region():
|
||||
@ -65,7 +65,7 @@ def test_mantle_url_construction_different_region():
|
||||
optional_params={"aws_region_name": "us-west-2"},
|
||||
litellm_params={},
|
||||
)
|
||||
assert url == "https://bedrock-mantle.us-west-2.api.aws/v1/messages"
|
||||
assert url == "https://bedrock-mantle.us-west-2.api.aws/anthropic/v1/messages"
|
||||
|
||||
|
||||
def test_get_bedrock_chat_config_returns_mantle_config():
|
||||
@ -89,7 +89,7 @@ def test_mantle_messages_url_construction():
|
||||
optional_params={"aws_region_name": "us-east-1"},
|
||||
litellm_params={},
|
||||
)
|
||||
assert url == "https://bedrock-mantle.us-east-1.api.aws/v1/messages"
|
||||
assert url == "https://bedrock-mantle.us-east-1.api.aws/anthropic/v1/messages"
|
||||
|
||||
|
||||
def test_mantle_transform_request_strips_prefix_and_adds_model():
|
||||
|
||||
80
tests/test_litellm/test_claude_sonnet_4_6_config.py
Normal file
80
tests/test_litellm/test_claude_sonnet_4_6_config.py
Normal file
@ -0,0 +1,80 @@
|
||||
"""
|
||||
Test Claude Sonnet 4.6 model configurations for Bedrock cross-region inference.
|
||||
|
||||
Pins the set of region-prefixed entries in model_prices_and_context_window.json
|
||||
so future drops of a region (or pricing drift between regions) is caught.
|
||||
|
||||
https://github.com/BerriAI/litellm/issues/22972
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
|
||||
def test_bedrock_sonnet_4_6_region_prefixes():
|
||||
"""All documented Bedrock cross-region inference prefixes for
|
||||
claude-sonnet-4-6 must be present in model_prices_and_context_window.json.
|
||||
"""
|
||||
json_path = os.path.join(
|
||||
os.path.dirname(__file__), "../../model_prices_and_context_window.json"
|
||||
)
|
||||
with open(json_path) as f:
|
||||
model_data = json.load(f)
|
||||
|
||||
bedrock_sonnet_4_6_models = [
|
||||
"anthropic.claude-sonnet-4-6",
|
||||
"global.anthropic.claude-sonnet-4-6",
|
||||
"us.anthropic.claude-sonnet-4-6",
|
||||
"eu.anthropic.claude-sonnet-4-6",
|
||||
"au.anthropic.claude-sonnet-4-6",
|
||||
"jp.anthropic.claude-sonnet-4-6",
|
||||
]
|
||||
|
||||
for model in bedrock_sonnet_4_6_models:
|
||||
assert model in model_data, f"Model {model} not found in config"
|
||||
model_info = model_data[model]
|
||||
|
||||
assert (
|
||||
model_info["litellm_provider"] == "bedrock_converse"
|
||||
), f"{model} should use bedrock_converse, got {model_info['litellm_provider']}"
|
||||
assert model_info["mode"] == "chat"
|
||||
assert model_info["max_input_tokens"] == 1000000
|
||||
assert model_info["max_output_tokens"] == 64000
|
||||
assert model_info["max_tokens"] == 64000
|
||||
assert model_info.get("supports_vision") is True
|
||||
assert model_info.get("supports_computer_use") is True
|
||||
assert model_info.get("supports_function_calling") is True
|
||||
assert model_info.get("supports_tool_choice") is True
|
||||
assert model_info.get("supports_prompt_caching") is True
|
||||
assert model_info.get("supports_response_schema") is True
|
||||
assert model_info.get("supports_pdf_input") is True
|
||||
assert model_info.get("supports_assistant_prefill") is True
|
||||
assert model_info.get("supports_reasoning") is True
|
||||
assert model_info.get("tool_use_system_prompt_tokens") == 346
|
||||
|
||||
|
||||
def test_bedrock_sonnet_4_6_jp_matches_other_regional_pricing():
|
||||
"""The jp. cross-region inference profile shares pricing with the other
|
||||
regional profiles (us./eu./au.), which carry a 10% premium over the
|
||||
base/global entries.
|
||||
"""
|
||||
json_path = os.path.join(
|
||||
os.path.dirname(__file__), "../../model_prices_and_context_window.json"
|
||||
)
|
||||
with open(json_path) as f:
|
||||
model_data = json.load(f)
|
||||
|
||||
jp_info = model_data["jp.anthropic.claude-sonnet-4-6"]
|
||||
au_info = model_data["au.anthropic.claude-sonnet-4-6"]
|
||||
|
||||
pricing_fields = [
|
||||
"input_cost_per_token",
|
||||
"output_cost_per_token",
|
||||
"cache_creation_input_token_cost",
|
||||
"cache_read_input_token_cost",
|
||||
]
|
||||
for field in pricing_fields:
|
||||
assert jp_info[field] == au_info[field], (
|
||||
f"{field} mismatch between jp. and au. variants: "
|
||||
f"jp={jp_info[field]}, au={au_info[field]}"
|
||||
)
|
||||
Loading…
Reference in New Issue
Block a user