fix(vertex): strip output_config.effort for Vertex Claude models that reject it (Haiku 4.5) (#29585)

* fix(vertex): strip output_config.effort for models that reject it

Haiku 4.5 on Vertex AI does not support output_config.effort and 400s with
"output_config.effort: Extra inputs are not permitted". PR #27074 emptied
VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS so effort would forward for Opus/Sonnet
4.6+, but that made the strip unconditional across every Vertex Anthropic
model, including ones that don't support it. Claude Code injects effort into
its default Messages payload, so `claude --model claude-haiku-4.5` started
failing.

Make the sanitizer model-aware: drop output_config.effort for models that
don't advertise output_config support (or any reasoning effort level) while
forwarding it for those that do. The fix covers both the chat-completion and
Messages pass-through transformation paths since they share the helper.

* chore(vertex): log at debug when dropping unsupported output_config.effort

Operators pointing an unregistered Vertex Claude alias that does support
effort would otherwise see it stripped with no signal. Debug level keeps it
out of normal logs since Claude Code sends effort on every request.
This commit is contained in:
Mateo Wang 2026-06-03 11:34:04 -07:00 committed by GitHub
parent 34293fa80a
commit cc55662e5f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 112 additions and 23 deletions

View File

@ -159,6 +159,6 @@ class VertexAIPartnerModelsAnthropicMessagesConfig(AnthropicMessagesConfig, Vert
"model", None
) # do not pass model in request body to vertex ai
sanitize_vertex_anthropic_output_params(anthropic_messages_request)
sanitize_vertex_anthropic_output_params(anthropic_messages_request, model)
return anthropic_messages_request

View File

@ -10,23 +10,38 @@ import; extracting the helper into a leaf module resolves the warning and
keeps the parent module's import surface narrow.
"""
# Keys inside ``output_config`` that Vertex AI Claude does not accept.
# Add an entry only when a 400 "Extra inputs are not permitted" is
# reproducible against the live Vertex endpoint.
# Keys inside ``output_config`` that Vertex AI Claude rejects regardless of
# the target model. Add an entry only when a 400 "Extra inputs are not
# permitted" is reproducible against the live Vertex endpoint for every model.
VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS: frozenset = frozenset()
def sanitize_vertex_anthropic_output_params(data: dict) -> None:
def _model_accepts_output_config_effort(model: str) -> bool:
"""Whether ``model`` accepts ``output_config.effort`` on Vertex.
Opus/Sonnet 4.6+ advertise ``supports_output_config`` (or a reasoning
effort level) and accept it; Haiku 4.5 advertises neither and 400s on
``output_config.effort: Extra inputs are not permitted``. Imported lazily
so this stays a leaf module (see module docstring).
"""
from litellm.llms.anthropic.chat.transformation import AnthropicConfig
return AnthropicConfig._model_supports_effort_param(model)
def sanitize_vertex_anthropic_output_params(data: dict, model: str) -> None:
"""
Strip Vertex-unsupported keys from ``output_config`` /
``output_format`` in-place; forward whatever remains.
Behavior:
* ``output_config`` containing only unsupported keys (e.g. ``effort``
alone) is removed entirely so the request body has no empty dict.
* ``output_config`` containing a mix of supported + unsupported keys
has the unsupported subset filtered out and the rest forwarded.
* ``output_config`` that is supported in full passes through unchanged.
* ``output_config.effort`` is dropped for models that don't accept it
(e.g. Haiku 4.5) and forwarded for those that do (Opus/Sonnet 4.6+).
Clients like Claude Code inject it into every Messages payload, so the
gate has to live here rather than rely on the caller.
* Keys in ``VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS`` are always filtered.
* ``output_config`` left empty after filtering is removed so the request
body has no empty dict.
* ``output_format`` is forwarded as-is (Vertex AI Claude accepts it).
* Non-dict values for ``output_config`` are dropped to avoid sending
malformed payloads downstream.
@ -37,11 +52,19 @@ def sanitize_vertex_anthropic_output_params(data: dict) -> None:
if not isinstance(output_config, dict):
data.pop("output_config", None)
return
sanitized = {
k: v
for k, v in output_config.items()
if k not in VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS
}
drop_keys = set(VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS)
if "effort" in output_config and not _model_accepts_output_config_effort(model):
from litellm._logging import verbose_logger
verbose_logger.debug(
"Dropping unsupported output_config.effort for vertex_ai model=%s "
"(no supports_output_config in the model map)",
model,
)
drop_keys.add("effort")
sanitized = {k: v for k, v in output_config.items() if k not in drop_keys}
if sanitized:
data["output_config"] = sanitized
else:

View File

@ -106,7 +106,7 @@ class VertexAIAnthropicConfig(AnthropicConfig):
data.pop("model", None) # vertex anthropic doesn't accept 'model' parameter
sanitize_vertex_anthropic_output_params(data)
sanitize_vertex_anthropic_output_params(data, model)
tools = optional_params.get("tools")
tool_search_used = self.is_tool_search_used(tools)

View File

@ -313,6 +313,40 @@ def test_transform_anthropic_messages_request_removes_scope_from_cache_control()
assert result["messages"][0]["content"][0]["cache_control"]["type"] == "ephemeral"
def test_messages_request_strips_effort_for_haiku_45():
"""Regression: Claude Code (``claude --model claude-haiku-4.5``) sends
``output_config.effort`` in its default Messages payload. Haiku 4.5 on
Vertex rejects it with 400 ``output_config.effort: Extra inputs are not
permitted``, so the pass-through must strip it for Haiku while keeping it
for Opus/Sonnet 4.6+."""
config = VertexAIPartnerModelsAnthropicMessagesConfig()
messages = [{"role": "user", "content": "Hello"}]
haiku_result = config.transform_anthropic_messages_request(
model="claude-haiku-4-5@20251001",
messages=messages,
anthropic_messages_optional_request_params={
"max_tokens": 1024,
"output_config": {"effort": "high"},
},
litellm_params=GenericLiteLLMParams(),
headers={},
)
assert "output_config" not in haiku_result
opus_result = config.transform_anthropic_messages_request(
model="claude-opus-4-6",
messages=messages,
anthropic_messages_optional_request_params={
"max_tokens": 1024,
"output_config": {"effort": "high"},
},
litellm_params=GenericLiteLLMParams(),
headers={},
)
assert opus_result["output_config"] == {"effort": "high"}
def test_provider_config_manager_reuses_vertex_anthropic_messages_config_instance():
"""
Regression test: repeated provider config lookups for the same Vertex Claude model

View File

@ -675,28 +675,60 @@ def test_sanitize_vertex_anthropic_output_params_unit():
sanitize_vertex_anthropic_output_params,
)
supported = "claude-opus-4-6"
# No-op when output_config absent.
data: dict = {"max_tokens": 8}
sanitize_vertex_anthropic_output_params(data)
sanitize_vertex_anthropic_output_params(data, supported)
assert data == {"max_tokens": 8}
# Effort-only → preserved (Vertex 4.6/4.7 accept it on rawPredict).
# Effort-only on a supporting model → preserved (Vertex 4.6/4.7 accept it).
data = {"output_config": {"effort": "high"}}
sanitize_vertex_anthropic_output_params(data)
sanitize_vertex_anthropic_output_params(data, supported)
assert data["output_config"] == {"effort": "high"}
# Format-only → preserved unchanged.
fmt = {"format": {"type": "json_schema", "schema": {"type": "object"}}}
data = {"output_config": dict(fmt)}
sanitize_vertex_anthropic_output_params(data)
sanitize_vertex_anthropic_output_params(data, supported)
assert data["output_config"] == fmt
# Mixed → both effort and format kept (no current Vertex-unsupported keys).
# Mixed on a supporting model → both effort and format kept.
data = {"output_config": {"format": fmt["format"], "effort": "high"}}
sanitize_vertex_anthropic_output_params(data)
sanitize_vertex_anthropic_output_params(data, supported)
assert data["output_config"] == {"format": fmt["format"], "effort": "high"}
# Non-dict → dropped defensively.
data = {"output_config": "garbage"}
sanitize_vertex_anthropic_output_params(data)
sanitize_vertex_anthropic_output_params(data, supported)
assert "output_config" not in data
def test_sanitize_strips_effort_for_haiku_45():
"""Regression: Haiku 4.5 on Vertex does not support ``output_config.effort``
and 400s with ``Extra inputs are not permitted``. Claude Code injects
``effort`` into every Messages payload, so the helper must strip it for
models that don't advertise output_config support while leaving it intact
for Opus/Sonnet 4.6+."""
from litellm.llms.vertex_ai.vertex_ai_partner_models.anthropic.output_params_utils import (
sanitize_vertex_anthropic_output_params,
)
haiku = "claude-haiku-4-5@20251001"
# Effort-only → output_config removed entirely (no empty dict on the wire).
data: dict = {"output_config": {"effort": "high"}, "max_tokens": 8}
sanitize_vertex_anthropic_output_params(data, haiku)
assert "output_config" not in data
assert data["max_tokens"] == 8
# Mixed → effort stripped, format preserved.
fmt = {"type": "json_schema", "schema": {"type": "object"}}
data = {"output_config": {"effort": "high", "format": fmt}}
sanitize_vertex_anthropic_output_params(data, haiku)
assert data["output_config"] == {"format": fmt}
# Same payload on a supporting model keeps effort untouched.
data = {"output_config": {"effort": "high"}}
sanitize_vertex_anthropic_output_params(data, "vertex_ai/claude-opus-4-6")
assert data["output_config"] == {"effort": "high"}