feat(anthropic): add Claude Opus 4.8 and prune reasoning-effort flags (#29238)

* feat(anthropic): add Claude Opus 4.8 and prune reasoning-effort flags

Register claude-opus-4-8 across the anthropic/bedrock/vertex/azure cost-map
entries, BEDROCK_CONVERSE_MODELS, and the setup-wizard provider list.

Prune two reasoning-effort fields from the cost map:
- Drop supports_minimal_reasoning_effort from the Claude fleet (58 entries).
  "minimal" is not a real Anthropic effort level (the API accepts only
  low/medium/high/xhigh/max), so LiteLLM degrades it to "low" regardless;
  the flag was inert and misleading on Anthropic.
- Remove tool_use_system_prompt_tokens everywhere (103 entries). It is not in
  the ModelInfo type and is read by no production code.

Update the affected config/schema tests; the reasoning-effort registry tests
now assert the Claude fleet omits supports_minimal.

* fix(anthropic): recognize output_config effort after minimal-flag prune

Pruning supports_minimal_reasoning_effort from the Claude fleet removed the
only "supports effort param" marker from 11 Opus 4.5 / mythos-preview map
entries that lack supports_output_config. _model_supports_effort_param then
returned False for them, so output_config was wrongly dropped under
drop_params=True -- regressing
test_anthropic_model_supports_effort_param_recognizes_supporting_models for
claude-opus-4-5-20251101 and the mythos preview.

- _model_supports_effort_param now treats supports_output_config as a
  sufficient signal, matching the bedrock-invoke call sites that already
  check supports_output_config OR a reasoning-effort flag. Shared map lookup
  extracted into _supports_model_capability.
- Add supports_output_config: true to the 11 Opus 4.5 / mythos entries that
  lost their only marker, restoring prior effort-forwarding behavior without
  re-adding the inert minimal flag.
This commit is contained in:
Mateo Wang 2026-05-28 18:50:33 -07:00 committed by GitHub
parent ffc113b428
commit bae04591b2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 913 additions and 446 deletions

View File

@ -1147,6 +1147,7 @@ BEDROCK_CONVERSE_MODELS = [
"openai.gpt-oss-120b-1:0",
"anthropic.claude-haiku-4-5-20251001-v1:0",
"anthropic.claude-sonnet-4-5-20250929-v1:0",
"anthropic.claude-opus-4-8",
"anthropic.claude-opus-4-7",
"anthropic.claude-opus-4-6-v1:0",
"anthropic.claude-opus-4-6-v1",

View File

@ -337,13 +337,12 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
)
@staticmethod
def _supports_effort_level(model: str, level: str) -> bool:
"""Check ``supports_{level}_reasoning_effort`` in the model map.
def _supports_model_capability(model: str, key: str) -> bool:
"""Check a boolean capability ``key`` in the model map.
Strips bedrock/vertex prefixes so a provider-routed Claude still
resolves to the Anthropic model-map entry.
"""
key = f"supports_{level}_reasoning_effort"
try:
if _supports_factory(
model=model,
@ -372,8 +371,6 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
except Exception:
pass
try:
import litellm
for cand in candidates:
if cand in litellm.model_cost and (
litellm.model_cost[cand].get(key) is True
@ -383,6 +380,13 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
pass
return False
@staticmethod
def _supports_effort_level(model: str, level: str) -> bool:
"""Check ``supports_{level}_reasoning_effort`` in the model map."""
return AnthropicConfig._supports_model_capability(
model, f"supports_{level}_reasoning_effort"
)
@staticmethod
def _validate_effort_for_model(model: str, effort: Optional[str]) -> Optional[str]:
"""Return ``None`` if ``effort`` is allowed on ``model``, else an error message."""
@ -400,7 +404,15 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
@staticmethod
def _model_supports_effort_param(model: str) -> bool:
"""Whether the model accepts ``output_config.effort`` at all."""
"""Whether the model accepts ``output_config.effort`` at all.
A model qualifies if its map entry advertises ``supports_output_config``
or any ``supports_*_reasoning_effort`` flag. The two are independent
signals: e.g. Claude Opus 4.5 supports ``output_config`` without
advertising a non-default (max/xhigh) effort level.
"""
if AnthropicConfig._supports_model_capability(model, "supports_output_config"):
return True
return any(
AnthropicConfig._supports_effort_level(model, level)
for level in ("low", "minimal", "medium", "high", "xhigh", "max")

File diff suppressed because it is too large Load Diff

View File

@ -52,11 +52,12 @@ PROVIDERS: List[Dict] = [
{
"id": "anthropic",
"name": "Anthropic",
"description": "Claude Opus 4.7, Opus 4.6, Sonnet 4.6, Haiku 4.5",
"description": "Claude Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 4.6, Haiku 4.5",
"env_key": "ANTHROPIC_API_KEY",
"key_hint": "sk-ant-...",
"test_model": "claude-haiku-4-5-20251001",
"models": [
"claude-opus-4-8",
"claude-opus-4-7",
"claude-opus-4-6",
"claude-sonnet-4-6",

File diff suppressed because it is too large Load Diff

View File

@ -63,7 +63,13 @@ class TestGetModelInfoReasoningEffortFields:
class TestModelRegistryReasoningEffortFields:
"""Verify specific models have the expected reasoning effort capability
values in the JSON registry file."""
values in the JSON registry file.
Claude models intentionally OMIT ``supports_minimal_reasoning_effort``:
``minimal`` is not a real Anthropic effort level (the API accepts only
low/medium/high/xhigh/max), so LiteLLM degrades ``minimal`` to ``low``
regardless of the flag. These tests guard against the flag being
re-added to the Claude fleet."""
@pytest.fixture(autouse=True)
def _load_registry(self):
@ -77,41 +83,41 @@ class TestModelRegistryReasoningEffortFields:
entry = self.registry["claude-opus-4-6"]
assert entry.get("supports_max_reasoning_effort") is True
def test_opus_4_7_supports_minimal(self):
def test_opus_4_7_omits_minimal(self):
entry = self.registry["claude-opus-4-7"]
assert entry.get("supports_minimal_reasoning_effort") is True
assert "supports_minimal_reasoning_effort" not in entry
def test_opus_4_6_supports_minimal(self):
def test_opus_4_6_omits_minimal(self):
entry = self.registry["claude-opus-4-6"]
assert entry.get("supports_minimal_reasoning_effort") is True
assert "supports_minimal_reasoning_effort" not in entry
def test_sonnet_4_6_supports_minimal(self):
def test_sonnet_4_6_omits_minimal(self):
entry = self.registry["anthropic.claude-sonnet-4-6"]
assert entry.get("supports_minimal_reasoning_effort") is True
assert "supports_minimal_reasoning_effort" not in entry
def test_bedrock_opus_4_7_supports_max(self):
entry = self.registry["anthropic.claude-opus-4-7"]
assert entry.get("supports_max_reasoning_effort") is True
assert entry.get("supports_minimal_reasoning_effort") is True
assert "supports_minimal_reasoning_effort" not in entry
def test_vertex_opus_4_7_supports_max(self):
entry = self.registry["vertex_ai/claude-opus-4-7"]
assert entry.get("supports_max_reasoning_effort") is True
assert entry.get("supports_minimal_reasoning_effort") is True
assert "supports_minimal_reasoning_effort" not in entry
def test_vertex_opus_4_6_supports_max(self):
entry = self.registry["vertex_ai/claude-opus-4-6"]
assert entry.get("supports_max_reasoning_effort") is True
assert entry.get("supports_minimal_reasoning_effort") is True
assert "supports_minimal_reasoning_effort" not in entry
def test_azure_ai_opus_4_6_supports_minimal(self):
def test_azure_ai_opus_4_6_omits_minimal(self):
entry = self.registry["azure_ai/claude-opus-4-6"]
assert entry.get("supports_minimal_reasoning_effort") is True
assert "supports_minimal_reasoning_effort" not in entry
def test_azure_ai_opus_4_7_supports_max(self):
entry = self.registry["azure_ai/claude-opus-4-7"]
assert entry.get("supports_max_reasoning_effort") is True
assert entry.get("supports_minimal_reasoning_effort") is True
assert "supports_minimal_reasoning_effort" not in entry
# ---------------------------------------------------------------------------

View File

@ -42,11 +42,6 @@ def test_bedrock_haiku_4_5_configuration():
model_info.get("supports_vision") is True
), f"{model} should support vision"
# Verify tool use system prompt tokens
assert (
model_info.get("tool_use_system_prompt_tokens") == 346
), f"{model} should have tool_use_system_prompt_tokens set to 346"
# Verify core capabilities
assert model_info.get("supports_computer_use") is True
assert model_info.get("supports_function_calling") is True
@ -96,7 +91,6 @@ def test_bedrock_haiku_4_5_matches_sonnet_capabilities():
"supports_pdf_input",
"supports_assistant_prefill",
"supports_reasoning",
"tool_use_system_prompt_tokens",
]
for capability in shared_capabilities:

View File

@ -82,31 +82,26 @@ def test_opus_4_6_model_pricing_and_capabilities():
"claude-opus-4-6": {
"provider": "anthropic",
"has_long_context_pricing": False,
"tool_use_system_prompt_tokens": 346,
"max_input_tokens": 1000000,
},
"claude-opus-4-6-20260205": {
"provider": "anthropic",
"has_long_context_pricing": False,
"tool_use_system_prompt_tokens": 346,
"max_input_tokens": 1000000,
},
"anthropic.claude-opus-4-6-v1": {
"provider": "bedrock_converse",
"has_long_context_pricing": False,
"tool_use_system_prompt_tokens": 346,
"max_input_tokens": 1000000,
},
"vertex_ai/claude-opus-4-6": {
"provider": "vertex_ai-anthropic_models",
"has_long_context_pricing": False,
"tool_use_system_prompt_tokens": 346,
"max_input_tokens": 1000000,
},
"azure_ai/claude-opus-4-6": {
"provider": "azure_ai",
"has_long_context_pricing": False,
"tool_use_system_prompt_tokens": 159,
"max_input_tokens": 200000,
},
}
@ -143,10 +138,6 @@ def test_opus_4_6_model_pricing_and_capabilities():
assert info["supports_reasoning"] is True
assert info["supports_tool_choice"] is True
assert info["supports_vision"] is True
assert (
info["tool_use_system_prompt_tokens"]
== config["tool_use_system_prompt_tokens"]
)
def test_opus_4_6_bedrock_regional_model_pricing():
@ -191,7 +182,6 @@ def test_opus_4_6_bedrock_regional_model_pricing():
assert info["max_output_tokens"] == 128000
assert info["max_tokens"] == 128000
assert info["supports_assistant_prefill"] is False
assert info["tool_use_system_prompt_tokens"] == 346
assert "input_cost_per_token_above_200k_tokens" not in info
assert "output_cost_per_token_above_200k_tokens" not in info
assert "cache_creation_input_token_cost_above_200k_tokens" not in info
@ -220,7 +210,6 @@ def test_opus_4_6_alias_and_dated_metadata_match():
"cache_creation_input_token_cost_above_1hr",
"cache_read_input_token_cost",
"supports_assistant_prefill",
"tool_use_system_prompt_tokens",
]
for key in keys_to_match:
assert alias[key] == dated[key], f"Mismatch for {key}"

View File

@ -0,0 +1,184 @@
"""
Validate Claude Opus 4.8 model configuration entries.
Regression coverage for the wildcard-routing failure where a bare model name
(``claude-opus-4-8``) could not match an ``anthropic/*`` deployment because
LiteLLM could not infer its provider the model was simply missing from the
model cost map, so ``get_llm_provider`` raised and the router returned
"no healthy deployments for this model". The fix is the cost-map entries added
for Anthropic, Bedrock, Vertex AI, and Azure AI; those entries are what populate
``litellm.anthropic_models`` at import time, which is what the bare-name lookup
in ``get_llm_provider`` consumes.
"""
import json
import os
import pytest
import litellm
from litellm.constants import BEDROCK_CONVERSE_MODELS
from litellm.litellm_core_utils.get_model_cost_map import GetModelCostMap
REPO_ROOT = os.path.join(os.path.dirname(__file__), "../..")
def _load_root_cost_map() -> dict:
json_path = os.path.join(REPO_ROOT, "model_prices_and_context_window.json")
with open(json_path) as f:
return json.load(f)
@pytest.fixture
def local_model_cost_map(monkeypatch):
"""Force the bundled backup cost map so assertions don't depend on the
network-fetched ``main`` copy (which lags this branch until merge)."""
original_model_cost = litellm.model_cost
monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.get_model_info.cache_clear()
try:
yield
finally:
litellm.model_cost = original_model_cost
litellm.get_model_info.cache_clear()
def test_opus_4_8_model_pricing_and_capabilities():
model_data = _load_root_cost_map()
expected_models = {
"claude-opus-4-8": {
"provider": "anthropic",
"max_input_tokens": 1000000,
},
"anthropic.claude-opus-4-8": {
"provider": "bedrock_converse",
"max_input_tokens": 1000000,
},
"vertex_ai/claude-opus-4-8": {
"provider": "vertex_ai-anthropic_models",
"max_input_tokens": 1000000,
},
# Microsoft Foundry / Azure caps Opus 4.8 at a 200k context window.
"azure_ai/claude-opus-4-8": {
"provider": "azure_ai",
"max_input_tokens": 200000,
},
}
for model_name, config in expected_models.items():
assert model_name in model_data, f"Missing model entry: {model_name}"
info = model_data[model_name]
assert info["litellm_provider"] == config["provider"]
assert info["mode"] == "chat"
assert info["max_input_tokens"] == config["max_input_tokens"]
assert info["max_output_tokens"] == 128000
assert info["max_tokens"] == 128000
# Base pricing matches Opus 4.7: $5 / $25 per MTok, with the standard
# 1.25x cache-write and 0.1x cache-read multipliers.
assert info["input_cost_per_token"] == 5e-06
assert info["output_cost_per_token"] == 2.5e-05
assert info["cache_creation_input_token_cost"] == 6.25e-06
assert info["cache_read_input_token_cost"] == 5e-07
# Opus 4.x flagships are flat-rate across the full context window.
assert "input_cost_per_token_above_200k_tokens" not in info
assert "output_cost_per_token_above_200k_tokens" not in info
assert info["supports_assistant_prefill"] is False
assert info["supports_function_calling"] is True
assert info["supports_prompt_caching"] is True
assert info["supports_reasoning"] is True
assert info["supports_tool_choice"] is True
assert info["supports_vision"] is True
def test_opus_4_8_bedrock_regional_model_pricing():
model_data = _load_root_cost_map()
# Global endpoints use base pricing; regional endpoints carry a 10% premium.
expected_models = {
"global.anthropic.claude-opus-4-8": {
"input_cost_per_token": 5e-06,
"output_cost_per_token": 2.5e-05,
"cache_creation_input_token_cost": 6.25e-06,
"cache_read_input_token_cost": 5e-07,
},
"us.anthropic.claude-opus-4-8": {
"input_cost_per_token": 5.5e-06,
"output_cost_per_token": 2.75e-05,
"cache_creation_input_token_cost": 6.875e-06,
"cache_read_input_token_cost": 5.5e-07,
},
"eu.anthropic.claude-opus-4-8": {
"input_cost_per_token": 5.5e-06,
"output_cost_per_token": 2.75e-05,
"cache_creation_input_token_cost": 6.875e-06,
"cache_read_input_token_cost": 5.5e-07,
},
"au.anthropic.claude-opus-4-8": {
"input_cost_per_token": 5.5e-06,
"output_cost_per_token": 2.75e-05,
"cache_creation_input_token_cost": 6.875e-06,
"cache_read_input_token_cost": 5.5e-07,
},
}
for model_name, expected in expected_models.items():
assert model_name in model_data, f"Missing model entry: {model_name}"
info = model_data[model_name]
assert info["litellm_provider"] == "bedrock_converse"
assert info["max_input_tokens"] == 1000000
assert info["max_output_tokens"] == 128000
assert info["bedrock_output_config_effort_ceiling"] == "xhigh"
for key, value in expected.items():
assert info[key] == value
def test_opus_4_8_fast_mode_multiplier():
"""Opus 4.8 dropped fast-mode pricing to 2x base ($10/$50 per MTok);
Opus 4.7 was 6x ($30/$150)."""
model_data = _load_root_cost_map()
entry = model_data["claude-opus-4-8"]["provider_specific_entry"]
assert entry["us"] == 1.1
assert entry["fast"] == 2.0
def test_opus_4_8_present_in_bundled_backup():
"""The bundled backup is the runtime fallback (and what tests load with
``LITELLM_LOCAL_MODEL_COST_MAP=True``) it must carry the same entries as
the root cost map, otherwise the model resolves on one path but not the
other."""
backup = GetModelCostMap.load_local_model_cost_map()
for model_name in (
"claude-opus-4-8",
"anthropic.claude-opus-4-8",
"global.anthropic.claude-opus-4-8",
"us.anthropic.claude-opus-4-8",
"eu.anthropic.claude-opus-4-8",
"au.anthropic.claude-opus-4-8",
"vertex_ai/claude-opus-4-8",
"vertex_ai/claude-opus-4-8@default",
"azure_ai/claude-opus-4-8",
):
assert model_name in backup, f"Missing from backup cost map: {model_name}"
def test_opus_4_8_registered_for_bedrock_converse():
assert "anthropic.claude-opus-4-8" in BEDROCK_CONVERSE_MODELS
def test_opus_4_8_provider_resolves_via_model_info(local_model_cost_map):
"""Regression: ``claude-opus-4-8`` must resolve to provider ``anthropic``.
Before the cost-map entry existed, the model was unknown to LiteLLM, so it
could not be tied to the ``anthropic`` provider and an ``anthropic/*``
wildcard deployment would not match it.
"""
info = litellm.get_model_info(model="claude-opus-4-8")
assert info["litellm_provider"] == "anthropic"
assert info["max_input_tokens"] == 1000000
assert info["max_output_tokens"] == 128000

View File

@ -50,7 +50,6 @@ def test_bedrock_sonnet_4_6_region_prefixes():
assert model_info.get("supports_pdf_input") is True
assert model_info.get("supports_assistant_prefill") is True
assert model_info.get("supports_reasoning") is True
assert model_info.get("tool_use_system_prompt_tokens") == 346
def test_bedrock_sonnet_4_6_jp_matches_other_regional_pricing():

View File

@ -864,7 +864,6 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
"type": "string",
"enum": ["low", "medium", "high", "max", "xhigh"],
},
"tool_use_system_prompt_tokens": {"type": "number"},
"tpm": {"type": "number"},
"provider_specific_entry": {"type": "object"},
"supported_endpoints": {