diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py index 4be4c2d5e7..1e92754857 100644 --- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py +++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/experimental_pass_through/transformation.py @@ -159,6 +159,6 @@ class VertexAIPartnerModelsAnthropicMessagesConfig(AnthropicMessagesConfig, Vert "model", None ) # do not pass model in request body to vertex ai - sanitize_vertex_anthropic_output_params(anthropic_messages_request) + sanitize_vertex_anthropic_output_params(anthropic_messages_request, model) return anthropic_messages_request diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/output_params_utils.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/output_params_utils.py index a33ad67778..280cc1c888 100644 --- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/output_params_utils.py +++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/output_params_utils.py @@ -10,23 +10,38 @@ import; extracting the helper into a leaf module resolves the warning and keeps the parent module's import surface narrow. """ -# Keys inside ``output_config`` that Vertex AI Claude does not accept. -# Add an entry only when a 400 "Extra inputs are not permitted" is -# reproducible against the live Vertex endpoint. +# Keys inside ``output_config`` that Vertex AI Claude rejects regardless of +# the target model. Add an entry only when a 400 "Extra inputs are not +# permitted" is reproducible against the live Vertex endpoint for every model. VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS: frozenset = frozenset() -def sanitize_vertex_anthropic_output_params(data: dict) -> None: +def _model_accepts_output_config_effort(model: str) -> bool: + """Whether ``model`` accepts ``output_config.effort`` on Vertex. + + Opus/Sonnet 4.6+ advertise ``supports_output_config`` (or a reasoning + effort level) and accept it; Haiku 4.5 advertises neither and 400s on + ``output_config.effort: Extra inputs are not permitted``. Imported lazily + so this stays a leaf module (see module docstring). + """ + from litellm.llms.anthropic.chat.transformation import AnthropicConfig + + return AnthropicConfig._model_supports_effort_param(model) + + +def sanitize_vertex_anthropic_output_params(data: dict, model: str) -> None: """ Strip Vertex-unsupported keys from ``output_config`` / ``output_format`` in-place; forward whatever remains. Behavior: - * ``output_config`` containing only unsupported keys (e.g. ``effort`` - alone) is removed entirely so the request body has no empty dict. - * ``output_config`` containing a mix of supported + unsupported keys - has the unsupported subset filtered out and the rest forwarded. - * ``output_config`` that is supported in full passes through unchanged. + * ``output_config.effort`` is dropped for models that don't accept it + (e.g. Haiku 4.5) and forwarded for those that do (Opus/Sonnet 4.6+). + Clients like Claude Code inject it into every Messages payload, so the + gate has to live here rather than rely on the caller. + * Keys in ``VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS`` are always filtered. + * ``output_config`` left empty after filtering is removed so the request + body has no empty dict. * ``output_format`` is forwarded as-is (Vertex AI Claude accepts it). * Non-dict values for ``output_config`` are dropped to avoid sending malformed payloads downstream. @@ -37,11 +52,19 @@ def sanitize_vertex_anthropic_output_params(data: dict) -> None: if not isinstance(output_config, dict): data.pop("output_config", None) return - sanitized = { - k: v - for k, v in output_config.items() - if k not in VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS - } + + drop_keys = set(VERTEX_UNSUPPORTED_OUTPUT_CONFIG_KEYS) + if "effort" in output_config and not _model_accepts_output_config_effort(model): + from litellm._logging import verbose_logger + + verbose_logger.debug( + "Dropping unsupported output_config.effort for vertex_ai model=%s " + "(no supports_output_config in the model map)", + model, + ) + drop_keys.add("effort") + + sanitized = {k: v for k, v in output_config.items() if k not in drop_keys} if sanitized: data["output_config"] = sanitized else: diff --git a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py index 4627d9f6df..c852909d47 100644 --- a/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py +++ b/litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/transformation.py @@ -106,7 +106,7 @@ class VertexAIAnthropicConfig(AnthropicConfig): data.pop("model", None) # vertex anthropic doesn't accept 'model' parameter - sanitize_vertex_anthropic_output_params(data) + sanitize_vertex_anthropic_output_params(data, model) tools = optional_params.get("tools") tool_search_used = self.is_tool_search_used(tools) diff --git a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_messages_config.py b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_messages_config.py index b8cd65d3c9..6f4bb4e59c 100644 --- a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_messages_config.py +++ b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_messages_config.py @@ -313,6 +313,40 @@ def test_transform_anthropic_messages_request_removes_scope_from_cache_control() assert result["messages"][0]["content"][0]["cache_control"]["type"] == "ephemeral" +def test_messages_request_strips_effort_for_haiku_45(): + """Regression: Claude Code (``claude --model claude-haiku-4.5``) sends + ``output_config.effort`` in its default Messages payload. Haiku 4.5 on + Vertex rejects it with 400 ``output_config.effort: Extra inputs are not + permitted``, so the pass-through must strip it for Haiku while keeping it + for Opus/Sonnet 4.6+.""" + config = VertexAIPartnerModelsAnthropicMessagesConfig() + messages = [{"role": "user", "content": "Hello"}] + + haiku_result = config.transform_anthropic_messages_request( + model="claude-haiku-4-5@20251001", + messages=messages, + anthropic_messages_optional_request_params={ + "max_tokens": 1024, + "output_config": {"effort": "high"}, + }, + litellm_params=GenericLiteLLMParams(), + headers={}, + ) + assert "output_config" not in haiku_result + + opus_result = config.transform_anthropic_messages_request( + model="claude-opus-4-6", + messages=messages, + anthropic_messages_optional_request_params={ + "max_tokens": 1024, + "output_config": {"effort": "high"}, + }, + litellm_params=GenericLiteLLMParams(), + headers={}, + ) + assert opus_result["output_config"] == {"effort": "high"} + + def test_provider_config_manager_reuses_vertex_anthropic_messages_config_instance(): """ Regression test: repeated provider config lookups for the same Vertex Claude model diff --git a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py index d89d09a4e6..ac2368130d 100644 --- a/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py +++ b/tests/test_litellm/llms/vertex_ai/vertex_ai_partner_models/anthropic/test_vertex_ai_partner_models_anthropic_transformation.py @@ -675,28 +675,60 @@ def test_sanitize_vertex_anthropic_output_params_unit(): sanitize_vertex_anthropic_output_params, ) + supported = "claude-opus-4-6" + # No-op when output_config absent. data: dict = {"max_tokens": 8} - sanitize_vertex_anthropic_output_params(data) + sanitize_vertex_anthropic_output_params(data, supported) assert data == {"max_tokens": 8} - # Effort-only → preserved (Vertex 4.6/4.7 accept it on rawPredict). + # Effort-only on a supporting model → preserved (Vertex 4.6/4.7 accept it). data = {"output_config": {"effort": "high"}} - sanitize_vertex_anthropic_output_params(data) + sanitize_vertex_anthropic_output_params(data, supported) assert data["output_config"] == {"effort": "high"} # Format-only → preserved unchanged. fmt = {"format": {"type": "json_schema", "schema": {"type": "object"}}} data = {"output_config": dict(fmt)} - sanitize_vertex_anthropic_output_params(data) + sanitize_vertex_anthropic_output_params(data, supported) assert data["output_config"] == fmt - # Mixed → both effort and format kept (no current Vertex-unsupported keys). + # Mixed on a supporting model → both effort and format kept. data = {"output_config": {"format": fmt["format"], "effort": "high"}} - sanitize_vertex_anthropic_output_params(data) + sanitize_vertex_anthropic_output_params(data, supported) assert data["output_config"] == {"format": fmt["format"], "effort": "high"} # Non-dict → dropped defensively. data = {"output_config": "garbage"} - sanitize_vertex_anthropic_output_params(data) + sanitize_vertex_anthropic_output_params(data, supported) assert "output_config" not in data + + +def test_sanitize_strips_effort_for_haiku_45(): + """Regression: Haiku 4.5 on Vertex does not support ``output_config.effort`` + and 400s with ``Extra inputs are not permitted``. Claude Code injects + ``effort`` into every Messages payload, so the helper must strip it for + models that don't advertise output_config support while leaving it intact + for Opus/Sonnet 4.6+.""" + from litellm.llms.vertex_ai.vertex_ai_partner_models.anthropic.output_params_utils import ( + sanitize_vertex_anthropic_output_params, + ) + + haiku = "claude-haiku-4-5@20251001" + + # Effort-only → output_config removed entirely (no empty dict on the wire). + data: dict = {"output_config": {"effort": "high"}, "max_tokens": 8} + sanitize_vertex_anthropic_output_params(data, haiku) + assert "output_config" not in data + assert data["max_tokens"] == 8 + + # Mixed → effort stripped, format preserved. + fmt = {"type": "json_schema", "schema": {"type": "object"}} + data = {"output_config": {"effort": "high", "format": fmt}} + sanitize_vertex_anthropic_output_params(data, haiku) + assert data["output_config"] == {"format": fmt} + + # Same payload on a supporting model keeps effort untouched. + data = {"output_config": {"effort": "high"}} + sanitize_vertex_anthropic_output_params(data, "vertex_ai/claude-opus-4-6") + assert data["output_config"] == {"effort": "high"}