From cc55662e5fdc6af4f118a1f3ff885068b75450d9 Mon Sep 17 00:00:00 2001
From: Mateo Wang <277851410+mateo-berri@users.noreply.github.com>
Date: Wed, 3 Jun 2026 11:34:04 -0700
Subject: [PATCH] fix(vertex): strip output_config.effort for Vertex Claude
 models that reject it (Haiku 4.5) (#29585)

* fix(vertex): strip output_config.effort for models that reject it

Haiku 4.5 on Vertex AI does not support output_config.effort and 400s with
"output_config.effort: Extra inputs are not permitted". PR #27074 emptied
VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS so effort would forward for Opus/Sonnet
4.6+, but that made the strip unconditional across every Vertex Anthropic
model, including ones that don't support it. Claude Code injects effort into
its default Messages payload, so `claude --model claude-haiku-4.5` started
failing.

Make the sanitizer model-aware: drop output_config.effort for models that
don't advertise output_config support (or any reasoning effort level) while
forwarding it for those that do. The fix covers both the chat-completion and
Messages pass-through transformation paths since they share the helper.

* chore(vertex): log at debug when dropping unsupported output_config.effort

Operators pointing an unregistered Vertex Claude alias that does support
effort would otherwise see it stripped with no signal. Debug level keeps it
out of normal logs since Claude Code sends effort on every request.
---
 .../transformation.py                         |  2 +-
 .../anthropic/output_params_utils.py          | 51 ++++++++++++++-----
 .../anthropic/transformation.py               |  2 +-
 ...artner_models_anthropic_messages_config.py | 34 +++++++++++++
 ...partner_models_anthropic_transformation.py | 46 ++++++++++++++---
 5 files changed, 112 insertions(+), 23 deletions(-)

diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py
index 4be4c2d5e7..1e92754857 100644
--- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py
+++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py
@@ -159,6 +159,6 @@ class VertexAIPartnerModelsAnthropicMessagesConfig(AnthropicMessagesConfig, Vert
             "model", None
         )  # do not pass model in request body to vertex ai
 
-        sanitize_vertex_anthropic_output_params(anthropic_messages_request)
+        sanitize_vertex_anthropic_output_params(anthropic_messages_request, model)
 
         return anthropic_messages_request
diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/output_params_utils.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/output_params_utils.py
index a33ad67778..280cc1c888 100644
--- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/output_params_utils.py
+++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/output_params_utils.py
@@ -10,23 +10,38 @@ import; extracting the helper into a leaf module resolves the warning and
 keeps the parent module's import surface narrow.
 """
 
-# Keys inside ``output_config`` that Vertex AI Claude does not accept.
-# Add an entry only when a 400 "Extra inputs are not permitted" is
-# reproducible against the live Vertex endpoint.
+# Keys inside ``output_config`` that Vertex AI Claude rejects regardless of
+# the target model. Add an entry only when a 400 "Extra inputs are not
+# permitted" is reproducible against the live Vertex endpoint for every model.
 VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS: frozenset = frozenset()
 
 
-def sanitize_vertex_anthropic_output_params(data: dict) -> None:
+def _model_accepts_output_config_effort(model: str) -> bool:
+    """Whether ``model`` accepts ``output_config.effort`` on Vertex.
+
+    Opus/Sonnet 4.6+ advertise ``supports_output_config`` (or a reasoning
+    effort level) and accept it; Haiku 4.5 advertises neither and 400s on
+    ``output_config.effort: Extra inputs are not permitted``. Imported lazily
+    so this stays a leaf module (see module docstring).
+    """
+    from litellm.llms.anthropic.chat.transformation import AnthropicConfig
+
+    return AnthropicConfig._model_supports_effort_param(model)
+
+
+def sanitize_vertex_anthropic_output_params(data: dict, model: str) -> None:
     """
     Strip Vertex-unsupported keys from ``output_config`` /
     ``output_format`` in-place; forward whatever remains.
 
     Behavior:
-      * ``output_config`` containing only unsupported keys (e.g. ``effort``
-        alone) is removed entirely so the request body has no empty dict.
-      * ``output_config`` containing a mix of supported + unsupported keys
-        has the unsupported subset filtered out and the rest forwarded.
-      * ``output_config`` that is supported in full passes through unchanged.
+      * ``output_config.effort`` is dropped for models that don't accept it
+        (e.g. Haiku 4.5) and forwarded for those that do (Opus/Sonnet 4.6+).
+        Clients like Claude Code inject it into every Messages payload, so the
+        gate has to live here rather than rely on the caller.
+      * Keys in ``VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS`` are always filtered.
+      * ``output_config`` left empty after filtering is removed so the request
+        body has no empty dict.
       * ``output_format`` is forwarded as-is (Vertex AI Claude accepts it).
       * Non-dict values for ``output_config`` are dropped to avoid sending
         malformed payloads downstream.
@@ -37,11 +52,19 @@ def sanitize_vertex_anthropic_output_params(data: dict) -> None:
     if not isinstance(output_config, dict):
         data.pop("output_config", None)
         return
-    sanitized = {
-        k: v
-        for k, v in output_config.items()
-        if k not in VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS
-    }
+
+    drop_keys = set(VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS)
+    if "effort" in output_config and not _model_accepts_output_config_effort(model):
+        from litellm._logging import verbose_logger
+
+        verbose_logger.debug(
+            "Dropping unsupported output_config.effort for vertex_ai model=%s "
+            "(no supports_output_config in the model map)",
+            model,
+        )
+        drop_keys.add("effort")
+
+    sanitized = {k: v for k, v in output_config.items() if k not in drop_keys}
     if sanitized:
         data["output_config"] = sanitized
     else:
diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py
index 4627d9f6df..c852909d47 100644
--- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py
+++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py
@@ -106,7 +106,7 @@ class VertexAIAnthropicConfig(AnthropicConfig):
 
         data.pop("model", None)  # vertex anthropic doesn't accept 'model' parameter
 
-        sanitize_vertex_anthropic_output_params(data)
+        sanitize_vertex_anthropic_output_params(data, model)
 
         tools = optional_params.get("tools")
         tool_search_used = self.is_tool_search_used(tools)
diff --git a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_messages_config.py b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_messages_config.py
index b8cd65d3c9..6f4bb4e59c 100644
--- a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_messages_config.py
+++ b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_messages_config.py
@@ -313,6 +313,40 @@ def test_transform_anthropic_messages_request_removes_scope_from_cache_control()
     assert result["messages"][0]["content"][0]["cache_control"]["type"] == "ephemeral"
 
 
+def test_messages_request_strips_effort_for_haiku_45():
+    """Regression: Claude Code (``claude --model claude-haiku-4.5``) sends
+    ``output_config.effort`` in its default Messages payload. Haiku 4.5 on
+    Vertex rejects it with 400 ``output_config.effort: Extra inputs are not
+    permitted``, so the pass-through must strip it for Haiku while keeping it
+    for Opus/Sonnet 4.6+."""
+    config = VertexAIPartnerModelsAnthropicMessagesConfig()
+    messages = [{"role": "user", "content": "Hello"}]
+
+    haiku_result = config.transform_anthropic_messages_request(
+        model="claude-haiku-4-5@20251001",
+        messages=messages,
+        anthropic_messages_optional_request_params={
+            "max_tokens": 1024,
+            "output_config": {"effort": "high"},
+        },
+        litellm_params=GenericLiteLLMParams(),
+        headers={},
+    )
+    assert "output_config" not in haiku_result
+
+    opus_result = config.transform_anthropic_messages_request(
+        model="claude-opus-4-6",
+        messages=messages,
+        anthropic_messages_optional_request_params={
+            "max_tokens": 1024,
+            "output_config": {"effort": "high"},
+        },
+        litellm_params=GenericLiteLLMParams(),
+        headers={},
+    )
+    assert opus_result["output_config"] == {"effort": "high"}
+
+
 def test_provider_config_manager_reuses_vertex_anthropic_messages_config_instance():
     """
     Regression test: repeated provider config lookups for the same Vertex Claude model
diff --git a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py
index d89d09a4e6..ac2368130d 100644
--- a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py
+++ b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py
@@ -675,28 +675,60 @@ def test_sanitize_vertex_anthropic_output_params_unit():
         sanitize_vertex_anthropic_output_params,
     )
 
+    supported = "claude-opus-4-6"
+
     # No-op when output_config absent.
     data: dict = {"max_tokens": 8}
-    sanitize_vertex_anthropic_output_params(data)
+    sanitize_vertex_anthropic_output_params(data, supported)
     assert data == {"max_tokens": 8}
 
-    # Effort-only → preserved (Vertex 4.6/4.7 accept it on rawPredict).
+    # Effort-only on a supporting model → preserved (Vertex 4.6/4.7 accept it).
     data = {"output_config": {"effort": "high"}}
-    sanitize_vertex_anthropic_output_params(data)
+    sanitize_vertex_anthropic_output_params(data, supported)
     assert data["output_config"] == {"effort": "high"}
 
     # Format-only → preserved unchanged.
     fmt = {"format": {"type": "json_schema", "schema": {"type": "object"}}}
     data = {"output_config": dict(fmt)}
-    sanitize_vertex_anthropic_output_params(data)
+    sanitize_vertex_anthropic_output_params(data, supported)
     assert data["output_config"] == fmt
 
-    # Mixed → both effort and format kept (no current Vertex-unsupported keys).
+    # Mixed on a supporting model → both effort and format kept.
     data = {"output_config": {"format": fmt["format"], "effort": "high"}}
-    sanitize_vertex_anthropic_output_params(data)
+    sanitize_vertex_anthropic_output_params(data, supported)
     assert data["output_config"] == {"format": fmt["format"], "effort": "high"}
 
     # Non-dict → dropped defensively.
     data = {"output_config": "garbage"}
-    sanitize_vertex_anthropic_output_params(data)
+    sanitize_vertex_anthropic_output_params(data, supported)
     assert "output_config" not in data
+
+
+def test_sanitize_strips_effort_for_haiku_45():
+    """Regression: Haiku 4.5 on Vertex does not support ``output_config.effort``
+    and 400s with ``Extra inputs are not permitted``. Claude Code injects
+    ``effort`` into every Messages payload, so the helper must strip it for
+    models that don't advertise output_config support while leaving it intact
+    for Opus/Sonnet 4.6+."""
+    from litellm.llms.vertex_ai.vertex_ai_partner_models.anthropic.output_params_utils import (
+        sanitize_vertex_anthropic_output_params,
+    )
+
+    haiku = "claude-haiku-4-5@20251001"
+
+    # Effort-only → output_config removed entirely (no empty dict on the wire).
+    data: dict = {"output_config": {"effort": "high"}, "max_tokens": 8}
+    sanitize_vertex_anthropic_output_params(data, haiku)
+    assert "output_config" not in data
+    assert data["max_tokens"] == 8
+
+    # Mixed → effort stripped, format preserved.
+    fmt = {"type": "json_schema", "schema": {"type": "object"}}
+    data = {"output_config": {"effort": "high", "format": fmt}}
+    sanitize_vertex_anthropic_output_params(data, haiku)
+    assert data["output_config"] == {"format": fmt}
+
+    # Same payload on a supporting model keeps effort untouched.
+    data = {"output_config": {"effort": "high"}}
+    sanitize_vertex_anthropic_output_params(data, "vertex_ai/claude-opus-4-6")
+    assert data["output_config"] == {"effort": "high"}