fix(vertex): strip output_config.effort for Vertex Claude models that reject it (Haiku 4.5) (#29585)

* fix(vertex): strip output_config.effort for models that reject it Haiku 4.5 on Vertex AI does not support output_config.effort and 400s with "output_config.effort: Extra inputs are not permitted". PR #27074 emptied VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS so effort would forward for Opus/Sonnet 4.6+, but that made the strip unconditional across every Vertex Anthropic model, including ones that don't support it. Claude Code injects effort into its default Messages payload, so `claude --model claude-haiku-4.5` started failing. Make the sanitizer model-aware: drop output_config.effort for models that don't advertise output_config support (or any reasoning effort level) while forwarding it for those that do. The fix covers both the chat-completion and Messages pass-through transformation paths since they share the helper. * chore(vertex): log at debug when dropping unsupported output_config.effort Operators pointing an unregistered Vertex Claude alias that does support effort would otherwise see it stripped with no signal. Debug level keeps it out of normal logs since Claude Code sends effort on every request.
2026-06-03 11:34:04 -07:00 · 2026-06-03 11:34:04 -07:00 · cc55662e5f
commit cc55662e5f
parent 34293fa80a
5 changed files with 112 additions and 23 deletions
--- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py
+++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py
@ -159,6 +159,6 @@ class VertexAIPartnerModelsAnthropicMessagesConfig(AnthropicMessagesConfig, Vert
            "model", None
        )  # do not pass model in request body to vertex ai

-        sanitize_vertex_anthropic_output_params(anthropic_messages_request)
+        sanitize_vertex_anthropic_output_params(anthropic_messages_request, model)

        return anthropic_messages_request
--- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/output_params_utils.py
+++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/output_params_utils.py
@ -10,23 +10,38 @@ import; extracting the helper into a leaf module resolves the warning and
 keeps the parent module's import surface narrow.
 """

-# Keys inside ``output_config`` that Vertex AI Claude does not accept.
-# Add an entry only when a 400 "Extra inputs are not permitted" is
-# reproducible against the live Vertex endpoint.
+# Keys inside ``output_config`` that Vertex AI Claude rejects regardless of
+# the target model. Add an entry only when a 400 "Extra inputs are not
+# permitted" is reproducible against the live Vertex endpoint for every model.
 VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS: frozenset = frozenset()


-def sanitize_vertex_anthropic_output_params(data: dict) -> None:
+def _model_accepts_output_config_effort(model: str) -> bool:
+    """Whether ``model`` accepts ``output_config.effort`` on Vertex.
+
+    Opus/Sonnet 4.6+ advertise ``supports_output_config`` (or a reasoning
+    effort level) and accept it; Haiku 4.5 advertises neither and 400s on
+    ``output_config.effort: Extra inputs are not permitted``. Imported lazily
+    so this stays a leaf module (see module docstring).
+    """
+    from litellm.llms.anthropic.chat.transformation import AnthropicConfig
+
+    return AnthropicConfig._model_supports_effort_param(model)
+
+
+def sanitize_vertex_anthropic_output_params(data: dict, model: str) -> None:
    """
    Strip Vertex-unsupported keys from ``output_config`` /
    ``output_format`` in-place; forward whatever remains.

    Behavior:
-      * ``output_config`` containing only unsupported keys (e.g. ``effort``
-        alone) is removed entirely so the request body has no empty dict.
-      * ``output_config`` containing a mix of supported + unsupported keys
-        has the unsupported subset filtered out and the rest forwarded.
-      * ``output_config`` that is supported in full passes through unchanged.
+      * ``output_config.effort`` is dropped for models that don't accept it
+        (e.g. Haiku 4.5) and forwarded for those that do (Opus/Sonnet 4.6+).
+        Clients like Claude Code inject it into every Messages payload, so the
+        gate has to live here rather than rely on the caller.
+      * Keys in ``VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS`` are always filtered.
+      * ``output_config`` left empty after filtering is removed so the request
+        body has no empty dict.
      * ``output_format`` is forwarded as-is (Vertex AI Claude accepts it).
      * Non-dict values for ``output_config`` are dropped to avoid sending
        malformed payloads downstream.
@ -37,11 +52,19 @@ def sanitize_vertex_anthropic_output_params(data: dict) -> None:
    if not isinstance(output_config, dict):
        data.pop("output_config", None)
        return
-    sanitized = {
-        k: v
-        for k, v in output_config.items()
-        if k not in VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS
-    }
+
+    drop_keys = set(VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS)
+    if "effort" in output_config and not _model_accepts_output_config_effort(model):
+        from litellm._logging import verbose_logger
+
+        verbose_logger.debug(
+            "Dropping unsupported output_config.effort for vertex_ai model=%s "
+            "(no supports_output_config in the model map)",
+            model,
+        )
+        drop_keys.add("effort")
+
+    sanitized = {k: v for k, v in output_config.items() if k not in drop_keys}
    if sanitized:
        data["output_config"] = sanitized
    else:
--- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py
+++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py
@ -106,7 +106,7 @@ class VertexAIAnthropicConfig(AnthropicConfig):

        data.pop("model", None)  # vertex anthropic doesn't accept 'model' parameter

-        sanitize_vertex_anthropic_output_params(data)
+        sanitize_vertex_anthropic_output_params(data, model)

        tools = optional_params.get("tools")
        tool_search_used = self.is_tool_search_used(tools)
--- a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_messages_config.py
+++ b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_messages_config.py
@ -313,6 +313,40 @@ def test_transform_anthropic_messages_request_removes_scope_from_cache_control()
    assert result["messages"][0]["content"][0]["cache_control"]["type"] == "ephemeral"


+def test_messages_request_strips_effort_for_haiku_45():
+    """Regression: Claude Code (``claude --model claude-haiku-4.5``) sends
+    ``output_config.effort`` in its default Messages payload. Haiku 4.5 on
+    Vertex rejects it with 400 ``output_config.effort: Extra inputs are not
+    permitted``, so the pass-through must strip it for Haiku while keeping it
+    for Opus/Sonnet 4.6+."""
+    config = VertexAIPartnerModelsAnthropicMessagesConfig()
+    messages = [{"role": "user", "content": "Hello"}]
+
+    haiku_result = config.transform_anthropic_messages_request(
+        model="claude-haiku-4-5@20251001",
+        messages=messages,
+        anthropic_messages_optional_request_params={
+            "max_tokens": 1024,
+            "output_config": {"effort": "high"},
+        },
+        litellm_params=GenericLiteLLMParams(),
+        headers={},
+    )
+    assert "output_config" not in haiku_result
+
+    opus_result = config.transform_anthropic_messages_request(
+        model="claude-opus-4-6",
+        messages=messages,
+        anthropic_messages_optional_request_params={
+            "max_tokens": 1024,
+            "output_config": {"effort": "high"},
+        },
+        litellm_params=GenericLiteLLMParams(),
+        headers={},
+    )
+    assert opus_result["output_config"] == {"effort": "high"}
+
+
 def test_provider_config_manager_reuses_vertex_anthropic_messages_config_instance():
    """
    Regression test: repeated provider config lookups for the same Vertex Claude model
--- a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py
+++ b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py
@ -675,28 +675,60 @@ def test_sanitize_vertex_anthropic_output_params_unit():
        sanitize_vertex_anthropic_output_params,
    )

+    supported = "claude-opus-4-6"
+
    # No-op when output_config absent.
    data: dict = {"max_tokens": 8}
-    sanitize_vertex_anthropic_output_params(data)
+    sanitize_vertex_anthropic_output_params(data, supported)
    assert data == {"max_tokens": 8}

-    # Effort-only → preserved (Vertex 4.6/4.7 accept it on rawPredict).
+    # Effort-only on a supporting model → preserved (Vertex 4.6/4.7 accept it).
    data = {"output_config": {"effort": "high"}}
-    sanitize_vertex_anthropic_output_params(data)
+    sanitize_vertex_anthropic_output_params(data, supported)
    assert data["output_config"] == {"effort": "high"}

    # Format-only → preserved unchanged.
    fmt = {"format": {"type": "json_schema", "schema": {"type": "object"}}}
    data = {"output_config": dict(fmt)}
-    sanitize_vertex_anthropic_output_params(data)
+    sanitize_vertex_anthropic_output_params(data, supported)
    assert data["output_config"] == fmt

-    # Mixed → both effort and format kept (no current Vertex-unsupported keys).
+    # Mixed on a supporting model → both effort and format kept.
    data = {"output_config": {"format": fmt["format"], "effort": "high"}}
-    sanitize_vertex_anthropic_output_params(data)
+    sanitize_vertex_anthropic_output_params(data, supported)
    assert data["output_config"] == {"format": fmt["format"], "effort": "high"}

    # Non-dict → dropped defensively.
    data = {"output_config": "garbage"}
-    sanitize_vertex_anthropic_output_params(data)
+    sanitize_vertex_anthropic_output_params(data, supported)
    assert "output_config" not in data
+
+
+def test_sanitize_strips_effort_for_haiku_45():
+    """Regression: Haiku 4.5 on Vertex does not support ``output_config.effort``
+    and 400s with ``Extra inputs are not permitted``. Claude Code injects
+    ``effort`` into every Messages payload, so the helper must strip it for
+    models that don't advertise output_config support while leaving it intact
+    for Opus/Sonnet 4.6+."""
+    from litellm.llms.vertex_ai.vertex_ai_partner_models.anthropic.output_params_utils import (
+        sanitize_vertex_anthropic_output_params,
+    )
+
+    haiku = "claude-haiku-4-5@20251001"
+
+    # Effort-only → output_config removed entirely (no empty dict on the wire).
+    data: dict = {"output_config": {"effort": "high"}, "max_tokens": 8}
+    sanitize_vertex_anthropic_output_params(data, haiku)
+    assert "output_config" not in data
+    assert data["max_tokens"] == 8
+
+    # Mixed → effort stripped, format preserved.
+    fmt = {"type": "json_schema", "schema": {"type": "object"}}
+    data = {"output_config": {"effort": "high", "format": fmt}}
+    sanitize_vertex_anthropic_output_params(data, haiku)
+    assert data["output_config"] == {"format": fmt}
+
+    # Same payload on a supporting model keeps effort untouched.
+    data = {"output_config": {"effort": "high"}}
+    sanitize_vertex_anthropic_output_params(data, "vertex_ai/claude-opus-4-6")
+    assert data["output_config"] == {"effort": "high"}