feat(anthropic): add Claude Opus 4.8 and prune reasoning-effort flags (#29238)
* feat(anthropic): add Claude Opus 4.8 and prune reasoning-effort flags Register claude-opus-4-8 across the anthropic/bedrock/vertex/azure cost-map entries, BEDROCK_CONVERSE_MODELS, and the setup-wizard provider list. Prune two reasoning-effort fields from the cost map: - Drop supports_minimal_reasoning_effort from the Claude fleet (58 entries). "minimal" is not a real Anthropic effort level (the API accepts only low/medium/high/xhigh/max), so LiteLLM degrades it to "low" regardless; the flag was inert and misleading on Anthropic. - Remove tool_use_system_prompt_tokens everywhere (103 entries). It is not in the ModelInfo type and is read by no production code. Update the affected config/schema tests; the reasoning-effort registry tests now assert the Claude fleet omits supports_minimal. * fix(anthropic): recognize output_config effort after minimal-flag prune Pruning supports_minimal_reasoning_effort from the Claude fleet removed the only "supports effort param" marker from 11 Opus 4.5 / mythos-preview map entries that lack supports_output_config. _model_supports_effort_param then returned False for them, so output_config was wrongly dropped under drop_params=True -- regressing test_anthropic_model_supports_effort_param_recognizes_supporting_models for claude-opus-4-5-20251101 and the mythos preview. - _model_supports_effort_param now treats supports_output_config as a sufficient signal, matching the bedrock-invoke call sites that already check supports_output_config OR a reasoning-effort flag. Shared map lookup extracted into _supports_model_capability. - Add supports_output_config: true to the 11 Opus 4.5 / mythos entries that lost their only marker, restoring prior effort-forwarding behavior without re-adding the inert minimal flag.
This commit is contained in:
parent
ffc113b428
commit
bae04591b2
@ -1147,6 +1147,7 @@ BEDROCK_CONVERSE_MODELS = [
|
||||
"openai.gpt-oss-120b-1:0",
|
||||
"anthropic.claude-haiku-4-5-20251001-v1:0",
|
||||
"anthropic.claude-sonnet-4-5-20250929-v1:0",
|
||||
"anthropic.claude-opus-4-8",
|
||||
"anthropic.claude-opus-4-7",
|
||||
"anthropic.claude-opus-4-6-v1:0",
|
||||
"anthropic.claude-opus-4-6-v1",
|
||||
|
||||
@ -337,13 +337,12 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _supports_effort_level(model: str, level: str) -> bool:
|
||||
"""Check ``supports_{level}_reasoning_effort`` in the model map.
|
||||
def _supports_model_capability(model: str, key: str) -> bool:
|
||||
"""Check a boolean capability ``key`` in the model map.
|
||||
|
||||
Strips bedrock/vertex prefixes so a provider-routed Claude still
|
||||
resolves to the Anthropic model-map entry.
|
||||
"""
|
||||
key = f"supports_{level}_reasoning_effort"
|
||||
try:
|
||||
if _supports_factory(
|
||||
model=model,
|
||||
@ -372,8 +371,6 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
import litellm
|
||||
|
||||
for cand in candidates:
|
||||
if cand in litellm.model_cost and (
|
||||
litellm.model_cost[cand].get(key) is True
|
||||
@ -383,6 +380,13 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||
pass
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _supports_effort_level(model: str, level: str) -> bool:
|
||||
"""Check ``supports_{level}_reasoning_effort`` in the model map."""
|
||||
return AnthropicConfig._supports_model_capability(
|
||||
model, f"supports_{level}_reasoning_effort"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _validate_effort_for_model(model: str, effort: Optional[str]) -> Optional[str]:
|
||||
"""Return ``None`` if ``effort`` is allowed on ``model``, else an error message."""
|
||||
@ -400,7 +404,15 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig):
|
||||
|
||||
@staticmethod
|
||||
def _model_supports_effort_param(model: str) -> bool:
|
||||
"""Whether the model accepts ``output_config.effort`` at all."""
|
||||
"""Whether the model accepts ``output_config.effort`` at all.
|
||||
|
||||
A model qualifies if its map entry advertises ``supports_output_config``
|
||||
or any ``supports_*_reasoning_effort`` flag. The two are independent
|
||||
signals: e.g. Claude Opus 4.5 supports ``output_config`` without
|
||||
advertising a non-default (max/xhigh) effort level.
|
||||
"""
|
||||
if AnthropicConfig._supports_model_capability(model, "supports_output_config"):
|
||||
return True
|
||||
return any(
|
||||
AnthropicConfig._supports_effort_level(model, level)
|
||||
for level in ("low", "minimal", "medium", "high", "xhigh", "max")
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -52,11 +52,12 @@ PROVIDERS: List[Dict] = [
|
||||
{
|
||||
"id": "anthropic",
|
||||
"name": "Anthropic",
|
||||
"description": "Claude Opus 4.7, Opus 4.6, Sonnet 4.6, Haiku 4.5",
|
||||
"description": "Claude Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 4.6, Haiku 4.5",
|
||||
"env_key": "ANTHROPIC_API_KEY",
|
||||
"key_hint": "sk-ant-...",
|
||||
"test_model": "claude-haiku-4-5-20251001",
|
||||
"models": [
|
||||
"claude-opus-4-8",
|
||||
"claude-opus-4-7",
|
||||
"claude-opus-4-6",
|
||||
"claude-sonnet-4-6",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -63,7 +63,13 @@ class TestGetModelInfoReasoningEffortFields:
|
||||
|
||||
class TestModelRegistryReasoningEffortFields:
|
||||
"""Verify specific models have the expected reasoning effort capability
|
||||
values in the JSON registry file."""
|
||||
values in the JSON registry file.
|
||||
|
||||
Claude models intentionally OMIT ``supports_minimal_reasoning_effort``:
|
||||
``minimal`` is not a real Anthropic effort level (the API accepts only
|
||||
low/medium/high/xhigh/max), so LiteLLM degrades ``minimal`` to ``low``
|
||||
regardless of the flag. These tests guard against the flag being
|
||||
re-added to the Claude fleet."""
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _load_registry(self):
|
||||
@ -77,41 +83,41 @@ class TestModelRegistryReasoningEffortFields:
|
||||
entry = self.registry["claude-opus-4-6"]
|
||||
assert entry.get("supports_max_reasoning_effort") is True
|
||||
|
||||
def test_opus_4_7_supports_minimal(self):
|
||||
def test_opus_4_7_omits_minimal(self):
|
||||
entry = self.registry["claude-opus-4-7"]
|
||||
assert entry.get("supports_minimal_reasoning_effort") is True
|
||||
assert "supports_minimal_reasoning_effort" not in entry
|
||||
|
||||
def test_opus_4_6_supports_minimal(self):
|
||||
def test_opus_4_6_omits_minimal(self):
|
||||
entry = self.registry["claude-opus-4-6"]
|
||||
assert entry.get("supports_minimal_reasoning_effort") is True
|
||||
assert "supports_minimal_reasoning_effort" not in entry
|
||||
|
||||
def test_sonnet_4_6_supports_minimal(self):
|
||||
def test_sonnet_4_6_omits_minimal(self):
|
||||
entry = self.registry["anthropic.claude-sonnet-4-6"]
|
||||
assert entry.get("supports_minimal_reasoning_effort") is True
|
||||
assert "supports_minimal_reasoning_effort" not in entry
|
||||
|
||||
def test_bedrock_opus_4_7_supports_max(self):
|
||||
entry = self.registry["anthropic.claude-opus-4-7"]
|
||||
assert entry.get("supports_max_reasoning_effort") is True
|
||||
assert entry.get("supports_minimal_reasoning_effort") is True
|
||||
assert "supports_minimal_reasoning_effort" not in entry
|
||||
|
||||
def test_vertex_opus_4_7_supports_max(self):
|
||||
entry = self.registry["vertex_ai/claude-opus-4-7"]
|
||||
assert entry.get("supports_max_reasoning_effort") is True
|
||||
assert entry.get("supports_minimal_reasoning_effort") is True
|
||||
assert "supports_minimal_reasoning_effort" not in entry
|
||||
|
||||
def test_vertex_opus_4_6_supports_max(self):
|
||||
entry = self.registry["vertex_ai/claude-opus-4-6"]
|
||||
assert entry.get("supports_max_reasoning_effort") is True
|
||||
assert entry.get("supports_minimal_reasoning_effort") is True
|
||||
assert "supports_minimal_reasoning_effort" not in entry
|
||||
|
||||
def test_azure_ai_opus_4_6_supports_minimal(self):
|
||||
def test_azure_ai_opus_4_6_omits_minimal(self):
|
||||
entry = self.registry["azure_ai/claude-opus-4-6"]
|
||||
assert entry.get("supports_minimal_reasoning_effort") is True
|
||||
assert "supports_minimal_reasoning_effort" not in entry
|
||||
|
||||
def test_azure_ai_opus_4_7_supports_max(self):
|
||||
entry = self.registry["azure_ai/claude-opus-4-7"]
|
||||
assert entry.get("supports_max_reasoning_effort") is True
|
||||
assert entry.get("supports_minimal_reasoning_effort") is True
|
||||
assert "supports_minimal_reasoning_effort" not in entry
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@ -42,11 +42,6 @@ def test_bedrock_haiku_4_5_configuration():
|
||||
model_info.get("supports_vision") is True
|
||||
), f"{model} should support vision"
|
||||
|
||||
# Verify tool use system prompt tokens
|
||||
assert (
|
||||
model_info.get("tool_use_system_prompt_tokens") == 346
|
||||
), f"{model} should have tool_use_system_prompt_tokens set to 346"
|
||||
|
||||
# Verify core capabilities
|
||||
assert model_info.get("supports_computer_use") is True
|
||||
assert model_info.get("supports_function_calling") is True
|
||||
@ -96,7 +91,6 @@ def test_bedrock_haiku_4_5_matches_sonnet_capabilities():
|
||||
"supports_pdf_input",
|
||||
"supports_assistant_prefill",
|
||||
"supports_reasoning",
|
||||
"tool_use_system_prompt_tokens",
|
||||
]
|
||||
|
||||
for capability in shared_capabilities:
|
||||
|
||||
@ -82,31 +82,26 @@ def test_opus_4_6_model_pricing_and_capabilities():
|
||||
"claude-opus-4-6": {
|
||||
"provider": "anthropic",
|
||||
"has_long_context_pricing": False,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"max_input_tokens": 1000000,
|
||||
},
|
||||
"claude-opus-4-6-20260205": {
|
||||
"provider": "anthropic",
|
||||
"has_long_context_pricing": False,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"max_input_tokens": 1000000,
|
||||
},
|
||||
"anthropic.claude-opus-4-6-v1": {
|
||||
"provider": "bedrock_converse",
|
||||
"has_long_context_pricing": False,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"max_input_tokens": 1000000,
|
||||
},
|
||||
"vertex_ai/claude-opus-4-6": {
|
||||
"provider": "vertex_ai-anthropic_models",
|
||||
"has_long_context_pricing": False,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"max_input_tokens": 1000000,
|
||||
},
|
||||
"azure_ai/claude-opus-4-6": {
|
||||
"provider": "azure_ai",
|
||||
"has_long_context_pricing": False,
|
||||
"tool_use_system_prompt_tokens": 159,
|
||||
"max_input_tokens": 200000,
|
||||
},
|
||||
}
|
||||
@ -143,10 +138,6 @@ def test_opus_4_6_model_pricing_and_capabilities():
|
||||
assert info["supports_reasoning"] is True
|
||||
assert info["supports_tool_choice"] is True
|
||||
assert info["supports_vision"] is True
|
||||
assert (
|
||||
info["tool_use_system_prompt_tokens"]
|
||||
== config["tool_use_system_prompt_tokens"]
|
||||
)
|
||||
|
||||
|
||||
def test_opus_4_6_bedrock_regional_model_pricing():
|
||||
@ -191,7 +182,6 @@ def test_opus_4_6_bedrock_regional_model_pricing():
|
||||
assert info["max_output_tokens"] == 128000
|
||||
assert info["max_tokens"] == 128000
|
||||
assert info["supports_assistant_prefill"] is False
|
||||
assert info["tool_use_system_prompt_tokens"] == 346
|
||||
assert "input_cost_per_token_above_200k_tokens" not in info
|
||||
assert "output_cost_per_token_above_200k_tokens" not in info
|
||||
assert "cache_creation_input_token_cost_above_200k_tokens" not in info
|
||||
@ -220,7 +210,6 @@ def test_opus_4_6_alias_and_dated_metadata_match():
|
||||
"cache_creation_input_token_cost_above_1hr",
|
||||
"cache_read_input_token_cost",
|
||||
"supports_assistant_prefill",
|
||||
"tool_use_system_prompt_tokens",
|
||||
]
|
||||
for key in keys_to_match:
|
||||
assert alias[key] == dated[key], f"Mismatch for {key}"
|
||||
|
||||
184
tests/test_litellm/test_claude_opus_4_8_config.py
Normal file
184
tests/test_litellm/test_claude_opus_4_8_config.py
Normal file
@ -0,0 +1,184 @@
|
||||
"""
|
||||
Validate Claude Opus 4.8 model configuration entries.
|
||||
|
||||
Regression coverage for the wildcard-routing failure where a bare model name
|
||||
(``claude-opus-4-8``) could not match an ``anthropic/*`` deployment because
|
||||
LiteLLM could not infer its provider — the model was simply missing from the
|
||||
model cost map, so ``get_llm_provider`` raised and the router returned
|
||||
"no healthy deployments for this model". The fix is the cost-map entries added
|
||||
for Anthropic, Bedrock, Vertex AI, and Azure AI; those entries are what populate
|
||||
``litellm.anthropic_models`` at import time, which is what the bare-name lookup
|
||||
in ``get_llm_provider`` consumes.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
import litellm
|
||||
from litellm.constants import BEDROCK_CONVERSE_MODELS
|
||||
from litellm.litellm_core_utils.get_model_cost_map import GetModelCostMap
|
||||
|
||||
REPO_ROOT = os.path.join(os.path.dirname(__file__), "../..")
|
||||
|
||||
|
||||
def _load_root_cost_map() -> dict:
|
||||
json_path = os.path.join(REPO_ROOT, "model_prices_and_context_window.json")
|
||||
with open(json_path) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def local_model_cost_map(monkeypatch):
|
||||
"""Force the bundled backup cost map so assertions don't depend on the
|
||||
network-fetched ``main`` copy (which lags this branch until merge)."""
|
||||
original_model_cost = litellm.model_cost
|
||||
monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
litellm.get_model_info.cache_clear()
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
litellm.model_cost = original_model_cost
|
||||
litellm.get_model_info.cache_clear()
|
||||
|
||||
|
||||
def test_opus_4_8_model_pricing_and_capabilities():
|
||||
model_data = _load_root_cost_map()
|
||||
|
||||
expected_models = {
|
||||
"claude-opus-4-8": {
|
||||
"provider": "anthropic",
|
||||
"max_input_tokens": 1000000,
|
||||
},
|
||||
"anthropic.claude-opus-4-8": {
|
||||
"provider": "bedrock_converse",
|
||||
"max_input_tokens": 1000000,
|
||||
},
|
||||
"vertex_ai/claude-opus-4-8": {
|
||||
"provider": "vertex_ai-anthropic_models",
|
||||
"max_input_tokens": 1000000,
|
||||
},
|
||||
# Microsoft Foundry / Azure caps Opus 4.8 at a 200k context window.
|
||||
"azure_ai/claude-opus-4-8": {
|
||||
"provider": "azure_ai",
|
||||
"max_input_tokens": 200000,
|
||||
},
|
||||
}
|
||||
|
||||
for model_name, config in expected_models.items():
|
||||
assert model_name in model_data, f"Missing model entry: {model_name}"
|
||||
info = model_data[model_name]
|
||||
|
||||
assert info["litellm_provider"] == config["provider"]
|
||||
assert info["mode"] == "chat"
|
||||
assert info["max_input_tokens"] == config["max_input_tokens"]
|
||||
assert info["max_output_tokens"] == 128000
|
||||
assert info["max_tokens"] == 128000
|
||||
|
||||
# Base pricing matches Opus 4.7: $5 / $25 per MTok, with the standard
|
||||
# 1.25x cache-write and 0.1x cache-read multipliers.
|
||||
assert info["input_cost_per_token"] == 5e-06
|
||||
assert info["output_cost_per_token"] == 2.5e-05
|
||||
assert info["cache_creation_input_token_cost"] == 6.25e-06
|
||||
assert info["cache_read_input_token_cost"] == 5e-07
|
||||
|
||||
# Opus 4.x flagships are flat-rate across the full context window.
|
||||
assert "input_cost_per_token_above_200k_tokens" not in info
|
||||
assert "output_cost_per_token_above_200k_tokens" not in info
|
||||
|
||||
assert info["supports_assistant_prefill"] is False
|
||||
assert info["supports_function_calling"] is True
|
||||
assert info["supports_prompt_caching"] is True
|
||||
assert info["supports_reasoning"] is True
|
||||
assert info["supports_tool_choice"] is True
|
||||
assert info["supports_vision"] is True
|
||||
|
||||
|
||||
def test_opus_4_8_bedrock_regional_model_pricing():
|
||||
model_data = _load_root_cost_map()
|
||||
|
||||
# Global endpoints use base pricing; regional endpoints carry a 10% premium.
|
||||
expected_models = {
|
||||
"global.anthropic.claude-opus-4-8": {
|
||||
"input_cost_per_token": 5e-06,
|
||||
"output_cost_per_token": 2.5e-05,
|
||||
"cache_creation_input_token_cost": 6.25e-06,
|
||||
"cache_read_input_token_cost": 5e-07,
|
||||
},
|
||||
"us.anthropic.claude-opus-4-8": {
|
||||
"input_cost_per_token": 5.5e-06,
|
||||
"output_cost_per_token": 2.75e-05,
|
||||
"cache_creation_input_token_cost": 6.875e-06,
|
||||
"cache_read_input_token_cost": 5.5e-07,
|
||||
},
|
||||
"eu.anthropic.claude-opus-4-8": {
|
||||
"input_cost_per_token": 5.5e-06,
|
||||
"output_cost_per_token": 2.75e-05,
|
||||
"cache_creation_input_token_cost": 6.875e-06,
|
||||
"cache_read_input_token_cost": 5.5e-07,
|
||||
},
|
||||
"au.anthropic.claude-opus-4-8": {
|
||||
"input_cost_per_token": 5.5e-06,
|
||||
"output_cost_per_token": 2.75e-05,
|
||||
"cache_creation_input_token_cost": 6.875e-06,
|
||||
"cache_read_input_token_cost": 5.5e-07,
|
||||
},
|
||||
}
|
||||
|
||||
for model_name, expected in expected_models.items():
|
||||
assert model_name in model_data, f"Missing model entry: {model_name}"
|
||||
info = model_data[model_name]
|
||||
assert info["litellm_provider"] == "bedrock_converse"
|
||||
assert info["max_input_tokens"] == 1000000
|
||||
assert info["max_output_tokens"] == 128000
|
||||
assert info["bedrock_output_config_effort_ceiling"] == "xhigh"
|
||||
for key, value in expected.items():
|
||||
assert info[key] == value
|
||||
|
||||
|
||||
def test_opus_4_8_fast_mode_multiplier():
|
||||
"""Opus 4.8 dropped fast-mode pricing to 2x base ($10/$50 per MTok);
|
||||
Opus 4.7 was 6x ($30/$150)."""
|
||||
model_data = _load_root_cost_map()
|
||||
entry = model_data["claude-opus-4-8"]["provider_specific_entry"]
|
||||
assert entry["us"] == 1.1
|
||||
assert entry["fast"] == 2.0
|
||||
|
||||
|
||||
def test_opus_4_8_present_in_bundled_backup():
|
||||
"""The bundled backup is the runtime fallback (and what tests load with
|
||||
``LITELLM_LOCAL_MODEL_COST_MAP=True``) — it must carry the same entries as
|
||||
the root cost map, otherwise the model resolves on one path but not the
|
||||
other."""
|
||||
backup = GetModelCostMap.load_local_model_cost_map()
|
||||
for model_name in (
|
||||
"claude-opus-4-8",
|
||||
"anthropic.claude-opus-4-8",
|
||||
"global.anthropic.claude-opus-4-8",
|
||||
"us.anthropic.claude-opus-4-8",
|
||||
"eu.anthropic.claude-opus-4-8",
|
||||
"au.anthropic.claude-opus-4-8",
|
||||
"vertex_ai/claude-opus-4-8",
|
||||
"vertex_ai/claude-opus-4-8@default",
|
||||
"azure_ai/claude-opus-4-8",
|
||||
):
|
||||
assert model_name in backup, f"Missing from backup cost map: {model_name}"
|
||||
|
||||
|
||||
def test_opus_4_8_registered_for_bedrock_converse():
|
||||
assert "anthropic.claude-opus-4-8" in BEDROCK_CONVERSE_MODELS
|
||||
|
||||
|
||||
def test_opus_4_8_provider_resolves_via_model_info(local_model_cost_map):
|
||||
"""Regression: ``claude-opus-4-8`` must resolve to provider ``anthropic``.
|
||||
|
||||
Before the cost-map entry existed, the model was unknown to LiteLLM, so it
|
||||
could not be tied to the ``anthropic`` provider and an ``anthropic/*``
|
||||
wildcard deployment would not match it.
|
||||
"""
|
||||
info = litellm.get_model_info(model="claude-opus-4-8")
|
||||
assert info["litellm_provider"] == "anthropic"
|
||||
assert info["max_input_tokens"] == 1000000
|
||||
assert info["max_output_tokens"] == 128000
|
||||
@ -50,7 +50,6 @@ def test_bedrock_sonnet_4_6_region_prefixes():
|
||||
assert model_info.get("supports_pdf_input") is True
|
||||
assert model_info.get("supports_assistant_prefill") is True
|
||||
assert model_info.get("supports_reasoning") is True
|
||||
assert model_info.get("tool_use_system_prompt_tokens") == 346
|
||||
|
||||
|
||||
def test_bedrock_sonnet_4_6_jp_matches_other_regional_pricing():
|
||||
|
||||
@ -864,7 +864,6 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high", "max", "xhigh"],
|
||||
},
|
||||
"tool_use_system_prompt_tokens": {"type": "number"},
|
||||
"tpm": {"type": "number"},
|
||||
"provider_specific_entry": {"type": "object"},
|
||||
"supported_endpoints": {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user