feat(anthropic): add Claude Opus 4.8 and prune reasoning-effort flags (#29238)

* feat(anthropic): add Claude Opus 4.8 and prune reasoning-effort flags Register claude-opus-4-8 across the anthropic/bedrock/vertex/azure cost-map entries, BEDROCK_CONVERSE_MODELS, and the setup-wizard provider list. Prune two reasoning-effort fields from the cost map: - Drop supports_minimal_reasoning_effort from the Claude fleet (58 entries). "minimal" is not a real Anthropic effort level (the API accepts only low/medium/high/xhigh/max), so LiteLLM degrades it to "low" regardless; the flag was inert and misleading on Anthropic. - Remove tool_use_system_prompt_tokens everywhere (103 entries). It is not in the ModelInfo type and is read by no production code. Update the affected config/schema tests; the reasoning-effort registry tests now assert the Claude fleet omits supports_minimal. * fix(anthropic): recognize output_config effort after minimal-flag prune Pruning supports_minimal_reasoning_effort from the Claude fleet removed the only "supports effort param" marker from 11 Opus 4.5 / mythos-preview map entries that lack supports_output_config. _model_supports_effort_param then returned False for them, so output_config was wrongly dropped under drop_params=True -- regressing test_anthropic_model_supports_effort_param_recognizes_supporting_models for claude-opus-4-5-20251101 and the mythos preview. - _model_supports_effort_param now treats supports_output_config as a sufficient signal, matching the bedrock-invoke call sites that already check supports_output_config OR a reasoning-effort flag. Shared map lookup extracted into _supports_model_capability. - Add supports_output_config: true to the 11 Opus 4.5 / mythos entries that lost their only marker, restoring prior effort-forwarding behavior without re-adding the inert minimal flag.
2026-05-28 18:50:33 -07:00 · 2026-05-28 18:50:33 -07:00 · bae04591b2
commit bae04591b2
parent ffc113b428
11 changed files with 913 additions and 446 deletions
--- a/litellm/constants.py
+++ b/litellm/constants.py
@ -1147,6 +1147,7 @@ BEDROCK_CONVERSE_MODELS = [
    "openai.gpt-oss-120b-1:0",
    "anthropic.claude-haiku-4-5-20251001-v1:0",
    "anthropic.claude-sonnet-4-5-20250929-v1:0",
+    "anthropic.claude-opus-4-8",
    "anthropic.claude-opus-4-7",
    "anthropic.claude-opus-4-6-v1:0",
    "anthropic.claude-opus-4-6-v1",
--- a/litellm/llms/anthropic/chat/transformation.py
+++ b/litellm/llms/anthropic/chat/transformation.py
@ -337,13 +337,12 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
        )

    @staticmethod
-    def _supports_effort_level(model: str, level: str) -> bool:
-        """Check ``supports_{level}_reasoning_effort`` in the model map.
+    def _supports_model_capability(model: str, key: str) -> bool:
+        """Check a boolean capability ``key`` in the model map.

        Strips bedrock/vertex prefixes so a provider-routed Claude still
        resolves to the Anthropic model-map entry.
        """
-        key = f"supports_{level}_reasoning_effort"
        try:
            if _supports_factory(
                model=model,
@ -372,8 +371,6 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
        except Exception:
            pass
        try:
-            import litellm
-
            for cand in candidates:
                if cand in litellm.model_cost and (
                    litellm.model_cost[cand].get(key) is True
@ -383,6 +380,13 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
            pass
        return False

+    @staticmethod
+    def _supports_effort_level(model: str, level: str) -> bool:
+        """Check ``supports_{level}_reasoning_effort`` in the model map."""
+        return AnthropicConfig._supports_model_capability(
+            model, f"supports_{level}_reasoning_effort"
+        )
+
    @staticmethod
    def _validate_effort_for_model(model: str, effort: Optional[str]) -> Optional[str]:
        """Return ``None`` if ``effort`` is allowed on ``model``, else an error message."""
@ -400,7 +404,15 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):

    @staticmethod
    def _model_supports_effort_param(model: str) -> bool:
-        """Whether the model accepts ``output_config.effort`` at all."""
+        """Whether the model accepts ``output_config.effort`` at all.
+
+        A model qualifies if its map entry advertises ``supports_output_config``
+        or any ``supports_*_reasoning_effort`` flag. The two are independent
+        signals: e.g. Claude Opus 4.5 supports ``output_config`` without
+        advertising a non-default (max/xhigh) effort level.
+        """
+        if AnthropicConfig._supports_model_capability(model, "supports_output_config"):
+            return True
        return any(
            AnthropicConfig._supports_effort_level(model, level)
            for level in ("low", "minimal", "medium", "high", "xhigh", "max")
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
--- a/litellm/setup_wizard.py
+++ b/litellm/setup_wizard.py
@ -52,11 +52,12 @@ PROVIDERS: List[Dict] = [
    {
        "id": "anthropic",
        "name": "Anthropic",
-        "description": "Claude Opus 4.7, Opus 4.6, Sonnet 4.6, Haiku 4.5",
+        "description": "Claude Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 4.6, Haiku 4.5",
        "env_key": "ANTHROPIC_API_KEY",
        "key_hint": "sk-ant-...",
        "test_model": "claude-haiku-4-5-20251001",
        "models": [
+            "claude-opus-4-8",
            "claude-opus-4-7",
            "claude-opus-4-6",
            "claude-sonnet-4-6",
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
--- a/tests/test_litellm/llms/anthropic/experimental_pass_through/test_reasoning_effort_fields.py
+++ b/tests/test_litellm/llms/anthropic/experimental_pass_through/test_reasoning_effort_fields.py
@ -63,7 +63,13 @@ class TestGetModelInfoReasoningEffortFields:

 class TestModelRegistryReasoningEffortFields:
    """Verify specific models have the expected reasoning effort capability
-    values in the JSON registry file."""
+    values in the JSON registry file.
+
+    Claude models intentionally OMIT ``supports_minimal_reasoning_effort``:
+    ``minimal`` is not a real Anthropic effort level (the API accepts only
+    low/medium/high/xhigh/max), so LiteLLM degrades ``minimal`` to ``low``
+    regardless of the flag. These tests guard against the flag being
+    re-added to the Claude fleet."""

    @pytest.fixture(autouse=True)
    def _load_registry(self):
@ -77,41 +83,41 @@ class TestModelRegistryReasoningEffortFields:
        entry = self.registry["claude-opus-4-6"]
        assert entry.get("supports_max_reasoning_effort") is True

-    def test_opus_4_7_supports_minimal(self):
+    def test_opus_4_7_omits_minimal(self):
        entry = self.registry["claude-opus-4-7"]
-        assert entry.get("supports_minimal_reasoning_effort") is True
+        assert "supports_minimal_reasoning_effort" not in entry

-    def test_opus_4_6_supports_minimal(self):
+    def test_opus_4_6_omits_minimal(self):
        entry = self.registry["claude-opus-4-6"]
-        assert entry.get("supports_minimal_reasoning_effort") is True
+        assert "supports_minimal_reasoning_effort" not in entry

-    def test_sonnet_4_6_supports_minimal(self):
+    def test_sonnet_4_6_omits_minimal(self):
        entry = self.registry["anthropic.claude-sonnet-4-6"]
-        assert entry.get("supports_minimal_reasoning_effort") is True
+        assert "supports_minimal_reasoning_effort" not in entry

    def test_bedrock_opus_4_7_supports_max(self):
        entry = self.registry["anthropic.claude-opus-4-7"]
        assert entry.get("supports_max_reasoning_effort") is True
-        assert entry.get("supports_minimal_reasoning_effort") is True
+        assert "supports_minimal_reasoning_effort" not in entry

    def test_vertex_opus_4_7_supports_max(self):
        entry = self.registry["vertex_ai/claude-opus-4-7"]
        assert entry.get("supports_max_reasoning_effort") is True
-        assert entry.get("supports_minimal_reasoning_effort") is True
+        assert "supports_minimal_reasoning_effort" not in entry

    def test_vertex_opus_4_6_supports_max(self):
        entry = self.registry["vertex_ai/claude-opus-4-6"]
        assert entry.get("supports_max_reasoning_effort") is True
-        assert entry.get("supports_minimal_reasoning_effort") is True
+        assert "supports_minimal_reasoning_effort" not in entry

-    def test_azure_ai_opus_4_6_supports_minimal(self):
+    def test_azure_ai_opus_4_6_omits_minimal(self):
        entry = self.registry["azure_ai/claude-opus-4-6"]
-        assert entry.get("supports_minimal_reasoning_effort") is True
+        assert "supports_minimal_reasoning_effort" not in entry

    def test_azure_ai_opus_4_7_supports_max(self):
        entry = self.registry["azure_ai/claude-opus-4-7"]
        assert entry.get("supports_max_reasoning_effort") is True
-        assert entry.get("supports_minimal_reasoning_effort") is True
+        assert "supports_minimal_reasoning_effort" not in entry


 # ---------------------------------------------------------------------------
--- a/tests/test_litellm/test_claude_haiku_4_5_config.py
+++ b/tests/test_litellm/test_claude_haiku_4_5_config.py
@ -42,11 +42,6 @@ def test_bedrock_haiku_4_5_configuration():
            model_info.get("supports_vision") is True
        ), f"{model} should support vision"

-        # Verify tool use system prompt tokens
-        assert (
-            model_info.get("tool_use_system_prompt_tokens") == 346
-        ), f"{model} should have tool_use_system_prompt_tokens set to 346"
-
        # Verify core capabilities
        assert model_info.get("supports_computer_use") is True
        assert model_info.get("supports_function_calling") is True
@ -96,7 +91,6 @@ def test_bedrock_haiku_4_5_matches_sonnet_capabilities():
        "supports_pdf_input",
        "supports_assistant_prefill",
        "supports_reasoning",
-        "tool_use_system_prompt_tokens",
    ]

    for capability in shared_capabilities:
--- a/tests/test_litellm/test_claude_opus_4_6_config.py
+++ b/tests/test_litellm/test_claude_opus_4_6_config.py
@ -82,31 +82,26 @@ def test_opus_4_6_model_pricing_and_capabilities():
        "claude-opus-4-6": {
            "provider": "anthropic",
            "has_long_context_pricing": False,
-            "tool_use_system_prompt_tokens": 346,
            "max_input_tokens": 1000000,
        },
        "claude-opus-4-6-20260205": {
            "provider": "anthropic",
            "has_long_context_pricing": False,
-            "tool_use_system_prompt_tokens": 346,
            "max_input_tokens": 1000000,
        },
        "anthropic.claude-opus-4-6-v1": {
            "provider": "bedrock_converse",
            "has_long_context_pricing": False,
-            "tool_use_system_prompt_tokens": 346,
            "max_input_tokens": 1000000,
        },
        "vertex_ai/claude-opus-4-6": {
            "provider": "vertex_ai-anthropic_models",
            "has_long_context_pricing": False,
-            "tool_use_system_prompt_tokens": 346,
            "max_input_tokens": 1000000,
        },
        "azure_ai/claude-opus-4-6": {
            "provider": "azure_ai",
            "has_long_context_pricing": False,
-            "tool_use_system_prompt_tokens": 159,
            "max_input_tokens": 200000,
        },
    }
@ -143,10 +138,6 @@ def test_opus_4_6_model_pricing_and_capabilities():
        assert info["supports_reasoning"] is True
        assert info["supports_tool_choice"] is True
        assert info["supports_vision"] is True
-        assert (
-            info["tool_use_system_prompt_tokens"]
-            == config["tool_use_system_prompt_tokens"]
-        )


 def test_opus_4_6_bedrock_regional_model_pricing():
@ -191,7 +182,6 @@ def test_opus_4_6_bedrock_regional_model_pricing():
        assert info["max_output_tokens"] == 128000
        assert info["max_tokens"] == 128000
        assert info["supports_assistant_prefill"] is False
-        assert info["tool_use_system_prompt_tokens"] == 346
        assert "input_cost_per_token_above_200k_tokens" not in info
        assert "output_cost_per_token_above_200k_tokens" not in info
        assert "cache_creation_input_token_cost_above_200k_tokens" not in info
@ -220,7 +210,6 @@ def test_opus_4_6_alias_and_dated_metadata_match():
        "cache_creation_input_token_cost_above_1hr",
        "cache_read_input_token_cost",
        "supports_assistant_prefill",
-        "tool_use_system_prompt_tokens",
    ]
    for key in keys_to_match:
        assert alias[key] == dated[key], f"Mismatch for {key}"
--- a/tests/test_litellm/test_claude_opus_4_8_config.py
+++ b/tests/test_litellm/test_claude_opus_4_8_config.py
@ -0,0 +1,184 @@
+"""
+Validate Claude Opus 4.8 model configuration entries.
+
+Regression coverage for the wildcard-routing failure where a bare model name
+(``claude-opus-4-8``) could not match an ``anthropic/*`` deployment because
+LiteLLM could not infer its provider — the model was simply missing from the
+model cost map, so ``get_llm_provider`` raised and the router returned
+"no healthy deployments for this model". The fix is the cost-map entries added
+for Anthropic, Bedrock, Vertex AI, and Azure AI; those entries are what populate
+``litellm.anthropic_models`` at import time, which is what the bare-name lookup
+in ``get_llm_provider`` consumes.
+"""
+
+import json
+import os
+
+import pytest
+
+import litellm
+from litellm.constants import BEDROCK_CONVERSE_MODELS
+from litellm.litellm_core_utils.get_model_cost_map import GetModelCostMap
+
+REPO_ROOT = os.path.join(os.path.dirname(__file__), "../..")
+
+
+def _load_root_cost_map() -> dict:
+    json_path = os.path.join(REPO_ROOT, "model_prices_and_context_window.json")
+    with open(json_path) as f:
+        return json.load(f)
+
+
+@pytest.fixture
+def local_model_cost_map(monkeypatch):
+    """Force the bundled backup cost map so assertions don't depend on the
+    network-fetched ``main`` copy (which lags this branch until merge)."""
+    original_model_cost = litellm.model_cost
+    monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+    litellm.get_model_info.cache_clear()
+    try:
+        yield
+    finally:
+        litellm.model_cost = original_model_cost
+        litellm.get_model_info.cache_clear()
+
+
+def test_opus_4_8_model_pricing_and_capabilities():
+    model_data = _load_root_cost_map()
+
+    expected_models = {
+        "claude-opus-4-8": {
+            "provider": "anthropic",
+            "max_input_tokens": 1000000,
+        },
+        "anthropic.claude-opus-4-8": {
+            "provider": "bedrock_converse",
+            "max_input_tokens": 1000000,
+        },
+        "vertex_ai/claude-opus-4-8": {
+            "provider": "vertex_ai-anthropic_models",
+            "max_input_tokens": 1000000,
+        },
+        # Microsoft Foundry / Azure caps Opus 4.8 at a 200k context window.
+        "azure_ai/claude-opus-4-8": {
+            "provider": "azure_ai",
+            "max_input_tokens": 200000,
+        },
+    }
+
+    for model_name, config in expected_models.items():
+        assert model_name in model_data, f"Missing model entry: {model_name}"
+        info = model_data[model_name]
+
+        assert info["litellm_provider"] == config["provider"]
+        assert info["mode"] == "chat"
+        assert info["max_input_tokens"] == config["max_input_tokens"]
+        assert info["max_output_tokens"] == 128000
+        assert info["max_tokens"] == 128000
+
+        # Base pricing matches Opus 4.7: $5 / $25 per MTok, with the standard
+        # 1.25x cache-write and 0.1x cache-read multipliers.
+        assert info["input_cost_per_token"] == 5e-06
+        assert info["output_cost_per_token"] == 2.5e-05
+        assert info["cache_creation_input_token_cost"] == 6.25e-06
+        assert info["cache_read_input_token_cost"] == 5e-07
+
+        # Opus 4.x flagships are flat-rate across the full context window.
+        assert "input_cost_per_token_above_200k_tokens" not in info
+        assert "output_cost_per_token_above_200k_tokens" not in info
+
+        assert info["supports_assistant_prefill"] is False
+        assert info["supports_function_calling"] is True
+        assert info["supports_prompt_caching"] is True
+        assert info["supports_reasoning"] is True
+        assert info["supports_tool_choice"] is True
+        assert info["supports_vision"] is True
+
+
+def test_opus_4_8_bedrock_regional_model_pricing():
+    model_data = _load_root_cost_map()
+
+    # Global endpoints use base pricing; regional endpoints carry a 10% premium.
+    expected_models = {
+        "global.anthropic.claude-opus-4-8": {
+            "input_cost_per_token": 5e-06,
+            "output_cost_per_token": 2.5e-05,
+            "cache_creation_input_token_cost": 6.25e-06,
+            "cache_read_input_token_cost": 5e-07,
+        },
+        "us.anthropic.claude-opus-4-8": {
+            "input_cost_per_token": 5.5e-06,
+            "output_cost_per_token": 2.75e-05,
+            "cache_creation_input_token_cost": 6.875e-06,
+            "cache_read_input_token_cost": 5.5e-07,
+        },
+        "eu.anthropic.claude-opus-4-8": {
+            "input_cost_per_token": 5.5e-06,
+            "output_cost_per_token": 2.75e-05,
+            "cache_creation_input_token_cost": 6.875e-06,
+            "cache_read_input_token_cost": 5.5e-07,
+        },
+        "au.anthropic.claude-opus-4-8": {
+            "input_cost_per_token": 5.5e-06,
+            "output_cost_per_token": 2.75e-05,
+            "cache_creation_input_token_cost": 6.875e-06,
+            "cache_read_input_token_cost": 5.5e-07,
+        },
+    }
+
+    for model_name, expected in expected_models.items():
+        assert model_name in model_data, f"Missing model entry: {model_name}"
+        info = model_data[model_name]
+        assert info["litellm_provider"] == "bedrock_converse"
+        assert info["max_input_tokens"] == 1000000
+        assert info["max_output_tokens"] == 128000
+        assert info["bedrock_output_config_effort_ceiling"] == "xhigh"
+        for key, value in expected.items():
+            assert info[key] == value
+
+
+def test_opus_4_8_fast_mode_multiplier():
+    """Opus 4.8 dropped fast-mode pricing to 2x base ($10/$50 per MTok);
+    Opus 4.7 was 6x ($30/$150)."""
+    model_data = _load_root_cost_map()
+    entry = model_data["claude-opus-4-8"]["provider_specific_entry"]
+    assert entry["us"] == 1.1
+    assert entry["fast"] == 2.0
+
+
+def test_opus_4_8_present_in_bundled_backup():
+    """The bundled backup is the runtime fallback (and what tests load with
+    ``LITELLM_LOCAL_MODEL_COST_MAP=True``) — it must carry the same entries as
+    the root cost map, otherwise the model resolves on one path but not the
+    other."""
+    backup = GetModelCostMap.load_local_model_cost_map()
+    for model_name in (
+        "claude-opus-4-8",
+        "anthropic.claude-opus-4-8",
+        "global.anthropic.claude-opus-4-8",
+        "us.anthropic.claude-opus-4-8",
+        "eu.anthropic.claude-opus-4-8",
+        "au.anthropic.claude-opus-4-8",
+        "vertex_ai/claude-opus-4-8",
+        "vertex_ai/claude-opus-4-8@default",
+        "azure_ai/claude-opus-4-8",
+    ):
+        assert model_name in backup, f"Missing from backup cost map: {model_name}"
+
+
+def test_opus_4_8_registered_for_bedrock_converse():
+    assert "anthropic.claude-opus-4-8" in BEDROCK_CONVERSE_MODELS
+
+
+def test_opus_4_8_provider_resolves_via_model_info(local_model_cost_map):
+    """Regression: ``claude-opus-4-8`` must resolve to provider ``anthropic``.
+
+    Before the cost-map entry existed, the model was unknown to LiteLLM, so it
+    could not be tied to the ``anthropic`` provider and an ``anthropic/*``
+    wildcard deployment would not match it.
+    """
+    info = litellm.get_model_info(model="claude-opus-4-8")
+    assert info["litellm_provider"] == "anthropic"
+    assert info["max_input_tokens"] == 1000000
+    assert info["max_output_tokens"] == 128000
--- a/tests/test_litellm/test_claude_sonnet_4_6_config.py
+++ b/tests/test_litellm/test_claude_sonnet_4_6_config.py
@ -50,7 +50,6 @@ def test_bedrock_sonnet_4_6_region_prefixes():
        assert model_info.get("supports_pdf_input") is True
        assert model_info.get("supports_assistant_prefill") is True
        assert model_info.get("supports_reasoning") is True
-        assert model_info.get("tool_use_system_prompt_tokens") == 346


 def test_bedrock_sonnet_4_6_jp_matches_other_regional_pricing():
--- a/tests/test_litellm/test_utils.py
+++ b/tests/test_litellm/test_utils.py
@ -864,7 +864,6 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
                    "type": "string",
                    "enum": ["low", "medium", "high", "max", "xhigh"],
                },
-                "tool_use_system_prompt_tokens": {"type": "number"},
                "tpm": {"type": "number"},
                "provider_specific_entry": {"type": "object"},
                "supported_endpoints": {