diff --git a/litellm/constants.py b/litellm/constants.py index f10cec034f..57f55e6c17 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -1158,6 +1158,7 @@ BEDROCK_CONVERSE_MODELS = [ "openai.gpt-oss-120b-1:0", "anthropic.claude-haiku-4-5-20251001-v1:0", "anthropic.claude-sonnet-4-5-20250929-v1:0", + "anthropic.claude-fable-5", "anthropic.claude-opus-4-8", "anthropic.claude-opus-4-7", "anthropic.claude-opus-4-6-v1:0", diff --git a/litellm/llms/anthropic/chat/transformation.py b/litellm/llms/anthropic/chat/transformation.py index 3f30d5d680..9ecd0df0cb 100644 --- a/litellm/llms/anthropic/chat/transformation.py +++ b/litellm/llms/anthropic/chat/transformation.py @@ -1455,10 +1455,15 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig): _value = self._map_stop_sequences(value) if _value is not None: optional_params["stop_sequences"] = _value - elif param == "temperature": - optional_params["temperature"] = value - elif param == "top_p": - optional_params["top_p"] = value + elif param == "temperature" or param == "top_p": + AnthropicConfig._apply_sampling_param( + optional_params=optional_params, + model=model, + param=param, + value=value, + drop_params=drop_params, + output_key=param, + ) elif param == "response_format" and isinstance(value, dict): if any( substring in model @@ -1975,6 +1980,20 @@ class AnthropicConfig(AnthropicModelInfo, BaseConfig): optional_params.pop("is_vertex_request", None) optional_params.pop("client_metadata", None) + # ``top_k`` is a provider-specific kwarg that bypasses + # ``map_openai_params``; gate it here, the single boundary shared by + # the direct Anthropic, Bedrock invoke, Vertex, and Azure paths. + top_k = optional_params.pop("top_k", None) + if top_k is not None: + AnthropicConfig._apply_sampling_param( + optional_params=optional_params, + model=model, + param="top_k", + value=top_k, + drop_params=litellm_params.get("drop_params") is True, + output_key="top_k", + ) + data = { "model": model, "messages": anthropic_messages, diff --git a/litellm/llms/anthropic/common_utils.py b/litellm/llms/anthropic/common_utils.py index 3f002d73cb..5741513903 100644 --- a/litellm/llms/anthropic/common_utils.py +++ b/litellm/llms/anthropic/common_utils.py @@ -272,23 +272,68 @@ class AnthropicModelInfo(BaseLLMModelInfo): ) @staticmethod - def _supports_model_capability(model: str, key: str) -> bool: - """Check a boolean capability ``key`` in the model map. + def _supports_sampling_params(model: str) -> bool: + """Claude 4.7+ (Opus 4.7/4.8, Fable 5) removed sampling params: the API + rejects ``top_p``, ``top_k``, and any ``temperature`` other than 1 with + a 400 ("`temperature` is deprecated for this model"). - Strips bedrock/vertex prefixes so a provider-routed Claude still - resolves to the Anthropic model-map entry. - """ - from litellm.utils import _supports_factory + Driven by the ``supports_sampling_params`` flag in the model map; the + name check remains only as a fallback for provider-routed ids whose + map entries predate the flag.""" + flag = AnthropicModelInfo._get_model_capability( + model, "supports_sampling_params" + ) + if flag is not None: + return flag + model_lower = model.lower() + return not any( + v in model_lower + for v in ( + "fable", + "opus-4-7", + "opus_4_7", + "opus-4.7", + "opus_4.7", + "opus-4-8", + "opus_4_8", + "opus-4.8", + "opus_4.8", + ) + ) - try: - if _supports_factory( - model=model, - custom_llm_provider="anthropic", - key=key, - ): - return True - except Exception: - pass + @staticmethod + def _apply_sampling_param( + optional_params: dict, + model: str, + param: str, + value: Any, + drop_params: bool, + output_key: str, + ) -> None: + """Forward ``temperature``/``top_p``/``top_k`` to + ``optional_params[output_key]`` unless the model removed sampling + params, in which case drop the param (with drop_params) or raise a + clean client-side 400.""" + if AnthropicModelInfo._supports_sampling_params(model) or ( + param == "temperature" and value == 1 + ): + optional_params[output_key] = value + elif not (litellm.drop_params or drop_params): + supported_hint = ( + "Only temperature=1 is supported. " if param == "temperature" else "" + ) + raise litellm.utils.UnsupportedParamsError( + message=( + f"{model} does not support {param}={value}. {supported_hint}" + "To drop unsupported params, set `litellm.drop_params = True`." + ), + status_code=400, + ) + + @staticmethod + def _model_map_lookup_candidates(model: str) -> List[str]: + """Model-map keys to try for ``model``, stripping bedrock/vertex + prefixes so a provider-routed Claude still resolves to its entry.""" candidates = [model] for prefix in ( "bedrock/converse/", @@ -307,15 +352,40 @@ class AnthropicModelInfo(BaseLLMModelInfo): candidates.append(f"bedrock/{base}") except Exception: pass + return candidates + + @staticmethod + def _get_model_capability(model: str, key: str) -> Optional[bool]: + """Read boolean capability ``key`` from the model map, or None when + no entry declares it.""" try: - for cand in candidates: - if cand in litellm.model_cost and ( - litellm.model_cost[cand].get(key) is True - ): - return True + for cand in AnthropicModelInfo._model_map_lookup_candidates(model): + value = litellm.model_cost.get(cand, {}).get(key) + if isinstance(value, bool): + return value except Exception: pass - return False + return None + + @staticmethod + def _supports_model_capability(model: str, key: str) -> bool: + """Check a boolean capability ``key`` in the model map. + + Strips bedrock/vertex prefixes so a provider-routed Claude still + resolves to the Anthropic model-map entry. + """ + from litellm.utils import _supports_factory + + try: + if _supports_factory( + model=model, + custom_llm_provider="anthropic", + key=key, + ): + return True + except Exception: + pass + return AnthropicModelInfo._get_model_capability(model, key) is True @staticmethod def _is_adaptive_thinking_model(model: str) -> bool: diff --git a/litellm/llms/bedrock/chat/converse_transformation.py b/litellm/llms/bedrock/chat/converse_transformation.py index ea0326dffd..b5e5e4de6f 100644 --- a/litellm/llms/bedrock/chat/converse_transformation.py +++ b/litellm/llms/bedrock/chat/converse_transformation.py @@ -920,10 +920,15 @@ class AmazonConverseConfig(BaseConfig): continue value = [value] optional_params["stopSequences"] = value - if param == "temperature": - optional_params["temperature"] = value - if param == "top_p": - optional_params["topP"] = value + if param == "temperature" or param == "top_p": + AnthropicConfig._apply_sampling_param( + optional_params=optional_params, + model=model, + param=param, + value=value, + drop_params=drop_params, + output_key="topP" if param == "top_p" else param, + ) if param == "tools" and isinstance(value, list): self._apply_tool_call_transformation( tools=cast(List[OpenAIChatCompletionToolParam], value), @@ -1221,7 +1226,9 @@ class AmazonConverseConfig(BaseConfig): inference_params["topK"] = inference_params.pop("top_k") return InferenceConfig(**inference_params) - def _handle_top_k_value(self, model: str, inference_params: dict) -> dict: + def _handle_top_k_value( + self, model: str, inference_params: dict, drop_params: bool = False + ) -> dict: base_model = BedrockModelInfo.get_base_model(model) val_top_k = None @@ -1230,16 +1237,25 @@ class AmazonConverseConfig(BaseConfig): elif "top_k" in inference_params: val_top_k = inference_params.pop("top_k") - if val_top_k: + if val_top_k is not None: if base_model.startswith("anthropic"): - return {"top_k": val_top_k} + top_k_params: dict = {} + AnthropicConfig._apply_sampling_param( + optional_params=top_k_params, + model=model, + param="top_k", + value=val_top_k, + drop_params=drop_params, + output_key="top_k", + ) + return top_k_params if base_model.startswith("amazon.nova"): return {"inferenceConfig": {"topK": val_top_k}} return {} def _prepare_request_params( - self, optional_params: dict, model: str + self, optional_params: dict, model: str, drop_params: bool = False ) -> Tuple[dict, dict, dict, Optional[OutputConfigBlock]]: """Prepare and separate request parameters.""" # Consume the internal ``_output_config_normalized`` marker set by @@ -1338,7 +1354,7 @@ class AmazonConverseConfig(BaseConfig): # Only set the topK value in for models that support it additional_request_params.update( - self._handle_top_k_value(model, inference_params) + self._handle_top_k_value(model, inference_params, drop_params) ) # Filter out internal/MCP-related parameters that shouldn't be sent to the API @@ -1572,6 +1588,7 @@ class AmazonConverseConfig(BaseConfig): optional_params: dict, messages: Optional[List[AllMessageValues]] = None, headers: Optional[dict] = None, + drop_params: bool = False, ) -> CommonRequestObject: ## VALIDATE REQUEST """ @@ -1618,7 +1635,7 @@ class AmazonConverseConfig(BaseConfig): additional_request_params, request_metadata, output_config, - ) = self._prepare_request_params(optional_params, model) + ) = self._prepare_request_params(optional_params, model, drop_params) original_tools = inference_params.pop("tools", []) @@ -1701,6 +1718,7 @@ class AmazonConverseConfig(BaseConfig): optional_params=optional_params, messages=messages, headers=headers, + drop_params=litellm_params.get("drop_params") is True, ) bedrock_messages = ( @@ -1758,6 +1776,7 @@ class AmazonConverseConfig(BaseConfig): optional_params=optional_params, messages=messages, headers=headers, + drop_params=litellm_params.get("drop_params") is True, ) ## TRANSFORMATION ## diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 282a292ab1..3782da1350 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1156,6 +1156,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1202,6 +1203,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1233,6 +1235,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1264,6 +1267,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1295,6 +1299,139 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" + }, + "anthropic.claude-fable-5": { + "cache_creation_input_token_cost": 1.25e-05, + "cache_creation_input_token_cost_above_1hr": 2e-05, + "cache_read_input_token_cost": 1e-06, + "input_cost_per_token": 1e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" + }, + "global.anthropic.claude-fable-5": { + "cache_creation_input_token_cost": 1.25e-05, + "cache_creation_input_token_cost_above_1hr": 2e-05, + "cache_read_input_token_cost": 1e-06, + "input_cost_per_token": 1e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" + }, + "us.anthropic.claude-fable-5": { + "cache_creation_input_token_cost": 1.375e-05, + "cache_creation_input_token_cost_above_1hr": 2.2e-05, + "cache_read_input_token_cost": 1.1e-06, + "input_cost_per_token": 1.1e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" + }, + "eu.anthropic.claude-fable-5": { + "cache_creation_input_token_cost": 1.375e-05, + "cache_creation_input_token_cost_above_1hr": 2.2e-05, + "cache_read_input_token_cost": 1.1e-06, + "input_cost_per_token": 1.1e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1327,6 +1464,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1359,6 +1497,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1391,6 +1530,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1423,6 +1563,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1455,6 +1596,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1485,6 +1627,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -2208,6 +2351,37 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true + }, + "azure_ai/claude-fable-5": { + "input_cost_per_token": 1e-05, + "output_cost_per_token": 5e-05, + "litellm_provider": "azure_ai", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "cache_creation_input_token_cost": 1.25e-05, + "cache_creation_input_token_cost_above_1hr": 2e-05, + "cache_read_input_token_cost": 1e-06, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -2237,6 +2411,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -10170,6 +10345,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -10204,6 +10380,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -10214,6 +10391,40 @@ }, "supports_output_config": true }, + "claude-fable-5": { + "cache_creation_input_token_cost": 1.25e-05, + "cache_creation_input_token_cost_above_1hr": 2e-05, + "cache_read_input_token_cost": 1e-06, + "input_cost_per_token": 1e-05, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true, + "provider_specific_entry": { + "us": 1.1 + }, + "supports_output_config": true + }, "claude-opus-4-8": { "cache_creation_input_token_cost": 6.25e-06, "cache_creation_input_token_cost_above_1hr": 1e-05, @@ -10238,6 +10449,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -34004,6 +34216,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -34032,6 +34245,67 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true + }, + "vertex_ai/claude-fable-5": { + "cache_creation_input_token_cost": 1.25e-05, + "cache_creation_input_token_cost_above_1hr": 2e-05, + "cache_read_input_token_cost": 1e-06, + "input_cost_per_token": 1e-05, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true + }, + "vertex_ai/claude-fable-5@default": { + "cache_creation_input_token_cost": 1.25e-05, + "cache_creation_input_token_cost_above_1hr": 2e-05, + "cache_read_input_token_cost": 1e-06, + "input_cost_per_token": 1e-05, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -34061,6 +34335,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -34090,6 +34365,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, diff --git a/litellm/setup_wizard.py b/litellm/setup_wizard.py index 862ca13e7b..2f0cb1233a 100644 --- a/litellm/setup_wizard.py +++ b/litellm/setup_wizard.py @@ -52,11 +52,12 @@ PROVIDERS: List[Dict] = [ { "id": "anthropic", "name": "Anthropic", - "description": "Claude Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 4.6, Haiku 4.5", + "description": "Claude Fable 5, Opus 4.8, Opus 4.7, Opus 4.6, Sonnet 4.6, Haiku 4.5", "env_key": "ANTHROPIC_API_KEY", "key_hint": "sk-ant-...", "test_model": "claude-haiku-4-5-20251001", "models": [ + "claude-fable-5", "claude-opus-4-8", "claude-opus-4-7", "claude-opus-4-6", diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index b0ffc66d03..85cb06b7f1 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1156,6 +1156,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1202,6 +1203,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1233,6 +1235,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1264,6 +1267,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1295,6 +1299,139 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" + }, + "anthropic.claude-fable-5": { + "cache_creation_input_token_cost": 1.25e-05, + "cache_creation_input_token_cost_above_1hr": 2e-05, + "cache_read_input_token_cost": 1e-06, + "input_cost_per_token": 1e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" + }, + "global.anthropic.claude-fable-5": { + "cache_creation_input_token_cost": 1.25e-05, + "cache_creation_input_token_cost_above_1hr": 2e-05, + "cache_read_input_token_cost": 1e-06, + "input_cost_per_token": 1e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" + }, + "us.anthropic.claude-fable-5": { + "cache_creation_input_token_cost": 1.375e-05, + "cache_creation_input_token_cost_above_1hr": 2.2e-05, + "cache_read_input_token_cost": 1.1e-06, + "input_cost_per_token": 1.1e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_native_structured_output": true, + "supports_max_reasoning_effort": true, + "supports_output_config": true, + "bedrock_output_config_effort_ceiling": "xhigh" + }, + "eu.anthropic.claude-fable-5": { + "cache_creation_input_token_cost": 1.375e-05, + "cache_creation_input_token_cost_above_1hr": 2.2e-05, + "cache_read_input_token_cost": 1.1e-06, + "input_cost_per_token": 1.1e-05, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1327,6 +1464,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1359,6 +1497,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1391,6 +1530,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1423,6 +1563,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1455,6 +1596,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -1485,6 +1627,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -2208,6 +2351,37 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true + }, + "azure_ai/claude-fable-5": { + "input_cost_per_token": 1e-05, + "output_cost_per_token": 5e-05, + "litellm_provider": "azure_ai", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "cache_creation_input_token_cost": 1.25e-05, + "cache_creation_input_token_cost_above_1hr": 2e-05, + "cache_read_input_token_cost": 1e-06, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -2237,6 +2411,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -10170,6 +10345,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -10204,6 +10380,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -10214,6 +10391,40 @@ }, "supports_output_config": true }, + "claude-fable-5": { + "cache_creation_input_token_cost": 1.25e-05, + "cache_creation_input_token_cost_above_1hr": 2e-05, + "cache_read_input_token_cost": 1e-06, + "input_cost_per_token": 1e-05, + "litellm_provider": "anthropic", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true, + "provider_specific_entry": { + "us": 1.1 + }, + "supports_output_config": true + }, "claude-opus-4-8": { "cache_creation_input_token_cost": 6.25e-06, "cache_creation_input_token_cost_above_1hr": 1e-05, @@ -10238,6 +10449,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -34044,6 +34256,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -34072,6 +34285,67 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true + }, + "vertex_ai/claude-fable-5": { + "cache_creation_input_token_cost": 1.25e-05, + "cache_creation_input_token_cost_above_1hr": 2e-05, + "cache_read_input_token_cost": 1e-06, + "input_cost_per_token": 1e-05, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, + "supports_tool_choice": true, + "supports_vision": true, + "supports_xhigh_reasoning_effort": true, + "supports_max_reasoning_effort": true + }, + "vertex_ai/claude-fable-5@default": { + "cache_creation_input_token_cost": 1.25e-05, + "cache_creation_input_token_cost_above_1hr": 2e-05, + "cache_read_input_token_cost": 1e-06, + "input_cost_per_token": 1e-05, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 1000000, + "max_output_tokens": 128000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_adaptive_thinking": true, + "supports_assistant_prefill": false, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -34101,6 +34375,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, @@ -34130,6 +34405,7 @@ "supports_prompt_caching": true, "supports_reasoning": true, "supports_response_schema": true, + "supports_sampling_params": false, "supports_tool_choice": true, "supports_vision": true, "supports_xhigh_reasoning_effort": true, diff --git a/tests/llm_translation/reasoning_effort_grid/grid_spec.py b/tests/llm_translation/reasoning_effort_grid/grid_spec.py index a08013cd43..83a2c286d6 100644 --- a/tests/llm_translation/reasoning_effort_grid/grid_spec.py +++ b/tests/llm_translation/reasoning_effort_grid/grid_spec.py @@ -1,7 +1,6 @@ from dataclasses import dataclass, field from typing import Dict, FrozenSet, List, Optional, Tuple - OMIT = object() @@ -136,6 +135,13 @@ _CAPS_NONE: FrozenSet[str] = frozenset() ANTHROPIC_DIRECT_MODELS: Tuple[ModelEntry, ...] = ( + ModelEntry( + alias="claude-fable-5", + model="anthropic/claude-fable-5", + mode="adaptive", + required_env=_ANTHROPIC_REQ, + caps=_CAPS_XHIGH_MAX, + ), ModelEntry( alias="claude-opus-4-8", model="anthropic/claude-opus-4-8", @@ -168,6 +174,19 @@ ANTHROPIC_DIRECT_MODELS: Tuple[ModelEntry, ...] = ( AZURE_AI_MODELS: Tuple[ModelEntry, ...] = ( + ModelEntry( + alias="azure-claude-fable-5", + model="azure_ai/claude-fable-5", + mode="adaptive", + required_env=_AZURE_FOUNDRY_REQ, + caps=_CAPS_XHIGH_MAX, + fail_reason=( + "claude-fable-5 has no deployment on the CI Microsoft Foundry " + "resource yet; Foundry returns DeploymentNotFound until someone " + "creates the fable-5 deployment, so this cell stays loud in CI. " + "Remove this fail_reason once the deployment exists." + ), + ), ModelEntry( alias="azure-claude-opus-4-8", model="azure_ai/claude-opus-4-8", @@ -213,6 +232,20 @@ AZURE_AI_MODELS: Tuple[ModelEntry, ...] = ( VERTEX_AI_MODELS: Tuple[ModelEntry, ...] = ( + ModelEntry( + alias="vertex-claude-fable-5", + model="vertex_ai/claude-fable-5", + mode="adaptive", + extra_params=(("vertex_location", "global"),), + required_env=_VERTEX_REQ, + caps=_CAPS_XHIGH_MAX, + fail_reason=( + "claude-fable-5 availability on the CI Vertex project is not yet " + "confirmed for this brand-new release, so this cell stays loud in " + "CI until verified. Remove this fail_reason once the model is " + "confirmed available on the global Vertex endpoint." + ), + ), ModelEntry( alias="vertex-claude-opus-4-8", model="vertex_ai/claude-opus-4-8", @@ -263,6 +296,23 @@ VERTEX_AI_MODELS: Tuple[ModelEntry, ...] = ( BEDROCK_CONVERSE_MODELS: Tuple[ModelEntry, ...] = ( + ModelEntry( + alias="bedrock-claude-fable-5", + model="bedrock/converse/us.anthropic.claude-fable-5", + mode="adaptive", + extra_params=(("aws_region_name", "us-east-1"),), + required_env=_BEDROCK_REQ, + caps=_CAPS_XHIGH_MAX, + bedrock_effort_ceiling="xhigh", + unavailable_error="is not available for this account", + fail_reason=( + "claude-fable-5 on Bedrock requires the account to opt in to " + "provider data sharing (data retention mode " + "'provider_data_sharing' via the Data Retention API); the CI " + "account has not opted in yet, so this cell stays loud in CI. " + "Remove this fail_reason once the opt-in is done." + ), + ), ModelEntry( alias="bedrock-claude-opus-4-8", model="bedrock/converse/us.anthropic.claude-opus-4-8", diff --git a/tests/llm_translation/reasoning_effort_grid/test_reasoning_effort_grid.py b/tests/llm_translation/reasoning_effort_grid/test_reasoning_effort_grid.py index 551ab8459d..a5f16f928e 100644 --- a/tests/llm_translation/reasoning_effort_grid/test_reasoning_effort_grid.py +++ b/tests/llm_translation/reasoning_effort_grid/test_reasoning_effort_grid.py @@ -15,7 +15,6 @@ from .grid_spec import ( all_cells, ) - _PROMPT_MESSAGES: List[Dict[str, str]] = [ {"role": "user", "content": "Step by step, calculate 47 * 53. Show your work."} ] @@ -201,8 +200,8 @@ async def test_reasoning_effort_grid( def test_grid_cell_count() -> None: - assert len(_PARAMS) == 25 * 11, ( - f"expected 275 cells (25 provider x model combos x 11 efforts), " + assert len(_PARAMS) == 29 * 11, ( + f"expected 319 cells (29 provider x model combos x 11 efforts), " f"got {len(_PARAMS)}" ) diff --git a/tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py b/tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py index 75038574c6..abb162e9dd 100644 --- a/tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py +++ b/tests/test_litellm/llms/anthropic/chat/test_anthropic_chat_transformation.py @@ -5261,6 +5261,8 @@ def test_should_strip_billing_metadata_by_provider( config_cls = getattr(importlib.import_module(module_path), class_name) assert config_cls().should_strip_billing_metadata() is expected_strip + + def test_namespace_tool_flat_nested_tools_are_extracted(): """Codex sends nested tools in flat format {type, name, description, parameters} with no 'function' wrapper. These must be normalized and mapped without raising KeyError: 'function'.""" @@ -5357,3 +5359,140 @@ def test_client_metadata_stripped_from_anthropic_request(): headers={}, ) assert "client_metadata" not in result + + +@pytest.mark.parametrize( + "model", + ["claude-fable-5", "claude-opus-4-7", "claude-opus-4-8-20260120"], +) +def test_sampling_params_dropped_for_models_that_removed_them(model): + """Fable 5 / Opus 4.7 / 4.8 reject temperature != 1 and any top_p with a + 400; with drop_params set they must be dropped, not forwarded (#30064).""" + config = AnthropicConfig() + + result = config.map_openai_params( + non_default_params={"temperature": 0.5, "top_p": 0.9}, + optional_params={}, + model=model, + drop_params=True, + ) + + assert "temperature" not in result + assert "top_p" not in result + + +@pytest.mark.parametrize("params", [{"temperature": 0.5}, {"top_p": 0.9}, {"top_p": 1}]) +def test_sampling_params_raise_clean_error_without_drop_params(params, monkeypatch): + monkeypatch.setattr(litellm, "drop_params", False) + config = AnthropicConfig() + + with pytest.raises(litellm.utils.UnsupportedParamsError, match="drop_params"): + config.map_openai_params( + non_default_params=params, + optional_params={}, + model="claude-fable-5", + drop_params=False, + ) + + +def test_temperature_1_forwarded_on_models_that_removed_sampling_params(): + """temperature=1 (the API default) is still accepted and must pass through.""" + config = AnthropicConfig() + + result = config.map_openai_params( + non_default_params={"temperature": 1}, + optional_params={}, + model="claude-fable-5", + drop_params=False, + ) + + assert result["temperature"] == 1 + + +@pytest.mark.parametrize("model", ["claude-opus-4-6", "claude-sonnet-4-6"]) +def test_sampling_params_forwarded_on_models_that_accept_them(model): + config = AnthropicConfig() + + result = config.map_openai_params( + non_default_params={"temperature": 0.5, "top_p": 0.9}, + optional_params={}, + model=model, + drop_params=True, + ) + + assert result["temperature"] == 0.5 + assert result["top_p"] == 0.9 + + +def test_sampling_param_gating_driven_by_model_map_flag(monkeypatch): + """The drop/raise decision must come from ``supports_sampling_params`` in + the model map, not just name matching: a flagged entry gates a model whose + name says nothing, and an explicit ``true`` overrides the name fallback.""" + monkeypatch.setitem( + litellm.model_cost, "claude-zeta-9", {"supports_sampling_params": False} + ) + monkeypatch.setitem( + litellm.model_cost, "claude-fable-5-test", {"supports_sampling_params": True} + ) + config = AnthropicConfig() + + flagged_off = config.map_openai_params( + non_default_params={"top_p": 0.9}, + optional_params={}, + model="claude-zeta-9", + drop_params=True, + ) + assert "top_p" not in flagged_off + + flagged_on = config.map_openai_params( + non_default_params={"top_p": 0.9}, + optional_params={}, + model="claude-fable-5-test", + drop_params=True, + ) + assert flagged_on["top_p"] == 0.9 + + +def test_top_k_dropped_at_transform_for_models_that_removed_it(): + """``top_k`` is a provider-specific kwarg that bypasses + ``map_openai_params``, so it must be stripped at the transform_request + boundary shared by the direct, invoke, Vertex, and Azure paths (#30064).""" + config = AnthropicConfig() + + result = config.transform_request( + model="claude-fable-5", + messages=[{"role": "user", "content": "hello"}], + optional_params={"max_tokens": 10, "top_k": 40}, + litellm_params={"drop_params": True}, + headers={}, + ) + + assert "top_k" not in result + + +def test_top_k_raises_at_transform_without_drop_params(monkeypatch): + monkeypatch.setattr(litellm, "drop_params", False) + config = AnthropicConfig() + + with pytest.raises(litellm.utils.UnsupportedParamsError, match="drop_params"): + config.transform_request( + model="claude-fable-5", + messages=[{"role": "user", "content": "hello"}], + optional_params={"max_tokens": 10, "top_k": 40}, + litellm_params={}, + headers={}, + ) + + +def test_top_k_forwarded_at_transform_on_models_that_accept_it(): + config = AnthropicConfig() + + result = config.transform_request( + model="claude-sonnet-4-6", + messages=[{"role": "user", "content": "hello"}], + optional_params={"max_tokens": 10, "top_k": 40}, + litellm_params={"drop_params": True}, + headers={}, + ) + + assert result["top_k"] == 40 diff --git a/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py b/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py index ed978113b8..5c83f8b34f 100644 --- a/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py +++ b/tests/test_litellm/llms/bedrock/chat/test_converse_transformation.py @@ -5267,3 +5267,122 @@ def test_transform_response_does_not_leak_body_on_parse_failure(): msg = str(exc_info.value) assert "secret content" not in msg assert "Error converting to valid response block" in msg + + +def test_converse_drops_sampling_params_for_models_that_removed_them(): + """Fable 5 / Opus 4.7 / 4.8 reject temperature != 1 and any top_p; with + drop_params set, converse must drop them instead of forwarding (#30064).""" + config = AmazonConverseConfig() + + result = config.map_openai_params( + non_default_params={"temperature": 0.5, "top_p": 0.9}, + optional_params={}, + model="us.anthropic.claude-fable-5", + drop_params=True, + ) + + assert "temperature" not in result + assert "topP" not in result + + +def test_converse_sampling_params_raise_without_drop_params(monkeypatch): + monkeypatch.setattr(litellm, "drop_params", False) + config = AmazonConverseConfig() + + with pytest.raises(litellm.utils.UnsupportedParamsError, match="drop_params"): + config.map_openai_params( + non_default_params={"temperature": 0.5}, + optional_params={}, + model="global.anthropic.claude-opus-4-8-v1:0", + drop_params=False, + ) + + +def test_converse_sampling_params_forwarded_on_models_that_accept_them(): + config = AmazonConverseConfig() + + result = config.map_openai_params( + non_default_params={"temperature": 0.5, "top_p": 0.9}, + optional_params={}, + model="us.anthropic.claude-sonnet-4-6", + drop_params=True, + ) + + assert result["temperature"] == 0.5 + assert result["topP"] == 0.9 + + +def test_converse_top_k_dropped_for_models_that_removed_it(): + """``top_k`` reaches converse as a provider-specific kwarg destined for + ``additionalModelRequestFields``, bypassing ``map_openai_params``; the + transform must strip it for models that removed sampling params (#30064).""" + config = AmazonConverseConfig() + + result = config.transform_request( + model="us.anthropic.claude-fable-5", + messages=[{"role": "user", "content": "hello"}], + optional_params={"top_k": 40}, + litellm_params={"drop_params": True}, + headers={}, + ) + + assert "top_k" not in result.get("additionalModelRequestFields", {}) + + +def test_converse_top_k_raises_without_drop_params(monkeypatch): + monkeypatch.setattr(litellm, "drop_params", False) + config = AmazonConverseConfig() + + with pytest.raises(litellm.utils.UnsupportedParamsError, match="drop_params"): + config.transform_request( + model="us.anthropic.claude-fable-5", + messages=[{"role": "user", "content": "hello"}], + optional_params={"top_k": 40}, + litellm_params={}, + headers={}, + ) + + +def test_converse_top_k_forwarded_on_models_that_accept_it(): + config = AmazonConverseConfig() + + result = config.transform_request( + model="us.anthropic.claude-sonnet-4-6", + messages=[{"role": "user", "content": "hello"}], + optional_params={"top_k": 40}, + litellm_params={"drop_params": True}, + headers={}, + ) + + assert result["additionalModelRequestFields"]["top_k"] == 40 + + +def test_converse_top_k_zero_raises_without_drop_params(monkeypatch): + """``top_k=0`` must hit the same gating as any other value; previously the + truthiness check let it silently disappear on models that removed sampling + params, diverging from the Anthropic boundary that treats ``0`` as present.""" + monkeypatch.setattr(litellm, "drop_params", False) + config = AmazonConverseConfig() + + with pytest.raises(litellm.utils.UnsupportedParamsError, match="drop_params"): + config.transform_request( + model="us.anthropic.claude-fable-5", + messages=[{"role": "user", "content": "hello"}], + optional_params={"top_k": 0}, + litellm_params={}, + headers={}, + ) + + +def test_converse_top_k_zero_forwarded_on_models_that_accept_it(): + config = AmazonConverseConfig() + + result = config.transform_request( + model="us.anthropic.claude-sonnet-4-6", + messages=[{"role": "user", "content": "hello"}], + optional_params={"top_k": 0}, + litellm_params={"drop_params": True}, + headers={}, + ) + + assert result["additionalModelRequestFields"]["top_k"] == 0 diff --git a/tests/test_litellm/test_claude_fable_5_config.py b/tests/test_litellm/test_claude_fable_5_config.py new file mode 100644 index 0000000000..d8d95fba0d --- /dev/null +++ b/tests/test_litellm/test_claude_fable_5_config.py @@ -0,0 +1,230 @@ +""" +Validate Claude Fable 5 model configuration entries. + +Fable 5 is a new tier above Opus ($10/$50 per MTok) with the same adaptive-only +API surface as Opus 4.7/4.8. The cost-map entries below are what make the model +resolvable across Anthropic, Bedrock, Vertex AI, and Azure AI (Microsoft +Foundry), and the ``supports_adaptive_thinking`` flag is what makes LiteLLM send +``thinking.type='adaptive'`` instead of the legacy ``enabled``/``budget_tokens`` +shape, which Fable 5 rejects with a 400. +""" + +import json +import os + +import pytest + +import litellm +from litellm.constants import BEDROCK_CONVERSE_MODELS +from litellm.litellm_core_utils.get_model_cost_map import GetModelCostMap + +REPO_ROOT = os.path.join(os.path.dirname(__file__), "../..") + + +def _load_root_cost_map() -> dict: + json_path = os.path.join(REPO_ROOT, "model_prices_and_context_window.json") + with open(json_path) as f: + return json.load(f) + + +@pytest.fixture +def local_model_cost_map(monkeypatch): + """Force the bundled backup cost map so assertions don't depend on the + network-fetched ``main`` copy (which lags this branch until merge).""" + original_model_cost = litellm.model_cost + monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True") + litellm.model_cost = litellm.get_model_cost_map(url="") + litellm.get_model_info.cache_clear() + try: + yield + finally: + litellm.model_cost = original_model_cost + litellm.get_model_info.cache_clear() + + +def test_fable_5_model_pricing_and_capabilities(): + model_data = _load_root_cost_map() + + expected_models = [ + ("claude-fable-5", "anthropic"), + ("anthropic.claude-fable-5", "bedrock_converse"), + ("vertex_ai/claude-fable-5", "vertex_ai-anthropic_models"), + # Unlike Opus 4.8 (200k on Foundry), Fable 5 has the full 1M context + # window on Microsoft Foundry. + ("azure_ai/claude-fable-5", "azure_ai"), + ] + + for model_name, provider in expected_models: + assert model_name in model_data, f"Missing model entry: {model_name}" + info = model_data[model_name] + + assert info["litellm_provider"] == provider + assert info["mode"] == "chat" + assert info["max_input_tokens"] == 1000000 + assert info["max_output_tokens"] == 128000 + assert info["max_tokens"] == 128000 + + # $10 / $50 per MTok (2x Opus 4.8), with the standard 1.25x 5m + # cache-write, 2x 1h cache-write, and 0.1x cache-read multipliers. + assert info["input_cost_per_token"] == 1e-05 + assert info["output_cost_per_token"] == 5e-05 + assert info["cache_creation_input_token_cost"] == 1.25e-05 + assert info["cache_creation_input_token_cost_above_1hr"] == 2e-05 + assert info["cache_read_input_token_cost"] == 1e-06 + + # Flat-rate across the full 1M context window. + assert "input_cost_per_token_above_200k_tokens" not in info + assert "output_cost_per_token_above_200k_tokens" not in info + + assert info["supports_assistant_prefill"] is False + assert info["supports_function_calling"] is True + assert info["supports_prompt_caching"] is True + assert info["supports_reasoning"] is True + assert info["supports_tool_choice"] is True + assert info["supports_vision"] is True + assert info["supports_xhigh_reasoning_effort"] is True + assert info["supports_max_reasoning_effort"] is True + + +def test_fable_5_bedrock_regional_model_pricing(): + model_data = _load_root_cost_map() + + # Fable 5 launched with us/eu geo inference profiles plus a global profile + # (no au/apac/jp). Global uses base pricing; geo profiles carry the + # standard 10% regional premium. + expected_models = { + "global.anthropic.claude-fable-5": { + "input_cost_per_token": 1e-05, + "output_cost_per_token": 5e-05, + "cache_creation_input_token_cost": 1.25e-05, + "cache_read_input_token_cost": 1e-06, + }, + "us.anthropic.claude-fable-5": { + "input_cost_per_token": 1.1e-05, + "output_cost_per_token": 5.5e-05, + "cache_creation_input_token_cost": 1.375e-05, + "cache_read_input_token_cost": 1.1e-06, + }, + "eu.anthropic.claude-fable-5": { + "input_cost_per_token": 1.1e-05, + "output_cost_per_token": 5.5e-05, + "cache_creation_input_token_cost": 1.375e-05, + "cache_read_input_token_cost": 1.1e-06, + }, + } + + for model_name, expected in expected_models.items(): + assert model_name in model_data, f"Missing model entry: {model_name}" + info = model_data[model_name] + assert info["litellm_provider"] == "bedrock_converse" + assert info["max_input_tokens"] == 1000000 + assert info["max_output_tokens"] == 128000 + assert info["bedrock_output_config_effort_ceiling"] == "xhigh" + for key, value in expected.items(): + assert info[key] == value + + +def test_fable_5_geo_multiplier_without_fast_mode(): + """First-party ``inference_geo='us'`` carries the 1.1x premium, but unlike + the Opus line there is no fast-mode variant for Fable 5; a ``fast`` key + here would silently misprice ``speed='fast'`` requests.""" + model_data = _load_root_cost_map() + entry = model_data["claude-fable-5"]["provider_specific_entry"] + assert entry == {"us": 1.1} + + +def test_fable_5_present_in_bundled_backup(): + """The bundled backup is the runtime fallback (and what tests load with + ``LITELLM_LOCAL_MODEL_COST_MAP=True``) — it must carry the same entries as + the root cost map, otherwise the model resolves on one path but not the + other.""" + backup = GetModelCostMap.load_local_model_cost_map() + root = _load_root_cost_map() + for model_name in ( + "claude-fable-5", + "anthropic.claude-fable-5", + "global.anthropic.claude-fable-5", + "us.anthropic.claude-fable-5", + "eu.anthropic.claude-fable-5", + "vertex_ai/claude-fable-5", + "vertex_ai/claude-fable-5@default", + "azure_ai/claude-fable-5", + ): + assert model_name in backup, f"Missing from backup cost map: {model_name}" + assert backup[model_name] == root[model_name], model_name + + +def test_fable_5_registered_for_bedrock_converse(): + assert "anthropic.claude-fable-5" in BEDROCK_CONVERSE_MODELS + + +def test_fable_5_provider_resolves_via_model_info(local_model_cost_map): + info = litellm.get_model_info(model="claude-fable-5") + assert info["litellm_provider"] == "anthropic" + assert info["max_input_tokens"] == 1000000 + assert info["max_output_tokens"] == 128000 + + +@pytest.mark.parametrize( + "cost_map", + [_load_root_cost_map(), GetModelCostMap.load_local_model_cost_map()], + ids=["root", "bundled_backup"], +) +def test_fable_5_all_variants_carry_adaptive_thinking_flag(cost_map): + """Every Fable 5 entry must advertise ``supports_adaptive_thinking``. + + Adaptive-thinking detection is cost-map driven, so a single variant missing + the flag silently sends the legacy ``thinking.type='enabled'`` shape and the + provider 400s (issue #29188 for the Opus 4.8 equivalent). Fable 5 is even + stricter than Opus 4.8: an explicit ``thinking.type='disabled'`` also 400s, + so adaptive is the only valid thinking shape LiteLLM can emit for it.""" + variants = [k for k in cost_map if "claude-fable-5" in k] + assert variants, "no claude-fable-5 entries found in cost map" + missing = [ + k for k in variants if cost_map[k].get("supports_adaptive_thinking") is not True + ] + assert not missing, f"missing supports_adaptive_thinking: {missing}" + + +@pytest.mark.parametrize( + "model", + [ + "claude-fable-5", + "anthropic/claude-fable-5", + "anthropic.claude-fable-5", + "bedrock/us.anthropic.claude-fable-5", + "bedrock/invoke/eu.anthropic.claude-fable-5", + "bedrock/global.anthropic.claude-fable-5", + "vertex_ai/claude-fable-5", + "azure_ai/claude-fable-5", + ], +) +def test_adaptive_thinking_detected_for_fable_5(local_model_cost_map, model): + """Provider-routed ids must resolve to a flagged entry so ``reasoning_effort`` + maps to ``thinking.type='adaptive'`` + ``output_config.effort``.""" + from litellm.llms.anthropic.common_utils import AnthropicModelInfo + + assert AnthropicModelInfo._is_adaptive_thinking_model(model) is True + + +@pytest.mark.parametrize( + "cost_map", + [_load_root_cost_map(), GetModelCostMap.load_local_model_cost_map()], + ids=["root", "bundled_backup"], +) +def test_sampling_params_flag_on_all_models_that_removed_them(cost_map): + """Fable 5 and Opus 4.7/4.8 reject ``top_p``/``top_k``/``temperature != 1``; + the drop/raise gating is cost-map driven, so every variant must carry an + explicit ``supports_sampling_params: false``. The perplexity route is + exempt: it is OpenAI-compatible and maps sampling params upstream.""" + variants = [ + k + for k in cost_map + if any(v in k for v in ("claude-fable-5", "claude-opus-4-7", "claude-opus-4-8")) + and not k.startswith("perplexity/") + ] + assert variants, "no matching entries found in cost map" + missing = [ + k for k in variants if cost_map[k].get("supports_sampling_params") is not False + ] + assert not missing, f"missing supports_sampling_params=false: {missing}" diff --git a/tests/test_litellm/test_utils.py b/tests/test_litellm/test_utils.py index f179e9c8f9..4c4d9e1133 100644 --- a/tests/test_litellm/test_utils.py +++ b/tests/test_litellm/test_utils.py @@ -858,6 +858,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "supports_xhigh_reasoning_effort": {"type": "boolean"}, "supports_max_reasoning_effort": {"type": "boolean"}, "supports_adaptive_thinking": {"type": "boolean"}, + "supports_sampling_params": {"type": "boolean"}, "supports_service_tier": {"type": "boolean"}, "supports_preset": {"type": "boolean"}, "supports_output_config": {"type": "boolean"},