diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 74c1afb0cc..ad6eb6b4f3 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -480,6 +480,7 @@ def cost_per_token( # noqa: PLR0915 model=model_without_prefix, custom_llm_provider=custom_llm_provider, usage=usage_block, + service_tier=service_tier, ) elif custom_llm_provider == "anthropic": return anthropic_cost_per_token(model=model, usage=usage_block) @@ -500,7 +501,9 @@ def cost_per_token( # noqa: PLR0915 model=model, usage=usage_block, response_time_ms=response_time_ms ) elif custom_llm_provider == "gemini": - return gemini_cost_per_token(model=model, usage=usage_block) + return gemini_cost_per_token( + model=model, usage=usage_block, service_tier=service_tier + ) elif custom_llm_provider == "deepseek": return deepseek_cost_per_token(model=model, usage=usage_block) elif custom_llm_provider == "perplexity": @@ -704,6 +707,36 @@ def _get_response_model(completion_response: Any) -> Optional[str]: return None +_GEMINI_TRAFFIC_TYPE_TO_SERVICE_TIER: dict = { + # ON_DEMAND_PRIORITY maps to "priority" — selects input_cost_per_token_priority, etc. + "ON_DEMAND_PRIORITY": "priority", + # FLEX / BATCH maps to "flex" — selects input_cost_per_token_flex, etc. + "FLEX": "flex", + "BATCH": "flex", + # ON_DEMAND is standard pricing — no service_tier suffix applied + "ON_DEMAND": None, +} + + +def _map_traffic_type_to_service_tier(traffic_type: Optional[str]) -> Optional[str]: + """ + Map a Gemini usageMetadata.trafficType value to a LiteLLM service_tier string. + + This allows the same `_priority` / `_flex` cost-key suffix logic used for + OpenAI/Azure to work for Gemini and Vertex AI models. + + trafficType values seen in practice + ------------------------------------ + ON_DEMAND -> standard pricing (service_tier = None) + ON_DEMAND_PRIORITY -> priority pricing (service_tier = "priority") + FLEX / BATCH -> batch/flex pricing (service_tier = "flex") + """ + if traffic_type is None: + return None + service_tier = _GEMINI_TRAFFIC_TYPE_TO_SERVICE_TIER.get(traffic_type.upper()) + return service_tier + + def _get_usage_object( completion_response: Any, ) -> Optional[Usage]: @@ -1145,6 +1178,20 @@ def completion_cost( # noqa: PLR0915 "custom_llm_provider", custom_llm_provider or None ) region_name = hidden_params.get("region_name", region_name) + + # For Gemini/Vertex AI responses, trafficType is stored in + # provider_specific_fields. Map it to the service_tier used + # by the cost key lookup (_priority / _flex suffixes) so that + # ON_DEMAND_PRIORITY requests are billed at priority prices. + if service_tier is None: + provider_specific = ( + hidden_params.get("provider_specific_fields") or {} + ) + raw_traffic_type = provider_specific.get("traffic_type") + if raw_traffic_type: + service_tier = _map_traffic_type_to_service_tier( + raw_traffic_type + ) else: if model is None: raise ValueError( diff --git a/litellm/litellm_core_utils/llm_cost_calc/utils.py b/litellm/litellm_core_utils/llm_cost_calc/utils.py index 7c41e1bbe6..a9fd0f4ea8 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/utils.py +++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py @@ -200,8 +200,14 @@ def _get_token_base_cost( ## CHECK IF ABOVE THRESHOLD # Optimization: collect threshold keys first to avoid sorting all model_info keys. # Most models don't have threshold pricing, so we can return early. + # Exclude service_tier-specific variants (e.g. input_cost_per_token_above_200k_tokens_priority) + # so that the threshold detection loop only processes standard keys. The + # service_tier-specific above-threshold key is resolved later via _get_service_tier_cost_key. threshold_keys = [ - k for k in model_info if k.startswith("input_cost_per_token_above_") + k + for k in model_info + if k.startswith("input_cost_per_token_above_") + and not any(k.endswith(f"_{st.value}") for st in ServiceTier) ] if not threshold_keys: return ( @@ -224,14 +230,34 @@ def _get_token_base_cost( 1000 if "k" in threshold_str else 1 ) if usage.prompt_tokens > threshold: + # Prefer a service_tier-specific above-threshold key when available, + # e.g. input_cost_per_token_priority_above_200k_tokens for Gemini + # ON_DEMAND_PRIORITY. Falls back to the standard key automatically + # via _get_cost_per_unit's service_tier fallback logic. + tiered_input_key = ( + _get_service_tier_cost_key( + f"input_cost_per_token_above_{threshold_str}_tokens", + service_tier, + ) + if service_tier + else key + ) prompt_base_cost = cast( - float, _get_cost_per_unit(model_info, key, prompt_base_cost) + float, _get_cost_per_unit(model_info, tiered_input_key, prompt_base_cost) + ) + tiered_output_key = ( + _get_service_tier_cost_key( + f"output_cost_per_token_above_{threshold_str}_tokens", + service_tier, + ) + if service_tier + else f"output_cost_per_token_above_{threshold_str}_tokens" ) completion_base_cost = cast( float, _get_cost_per_unit( model_info, - f"output_cost_per_token_above_{threshold_str}_tokens", + tiered_output_key, completion_base_cost, ), ) @@ -517,6 +543,7 @@ def _calculate_input_cost( cache_read_cost: float, cache_creation_cost: float, cache_creation_cost_above_1hr: float, + service_tier: Optional[str] = None, ) -> float: """ Calculates the input cost for a given model, prompt tokens, and completion tokens. @@ -528,8 +555,11 @@ def _calculate_input_cost( ### AUDIO COST if prompt_tokens_details["audio_tokens"]: + audio_cost_key = _get_service_tier_cost_key( + "input_cost_per_audio_token", service_tier + ) prompt_cost += calculate_cost_component( - model_info, "input_cost_per_audio_token", prompt_tokens_details["audio_tokens"] + model_info, audio_cost_key, prompt_tokens_details["audio_tokens"] ) ### IMAGE TOKEN COST @@ -659,6 +689,7 @@ def generic_cost_per_token( # noqa: PLR0915 cache_read_cost=cache_read_cost, cache_creation_cost=cache_creation_cost, cache_creation_cost_above_1hr=cache_creation_cost_above_1hr, + service_tier=service_tier, ) ## CALCULATE OUTPUT COST diff --git a/litellm/llms/gemini/cost_calculator.py b/litellm/llms/gemini/cost_calculator.py index 471421b487..79242fe01d 100644 --- a/litellm/llms/gemini/cost_calculator.py +++ b/litellm/llms/gemini/cost_calculator.py @@ -4,13 +4,15 @@ This file is used to calculate the cost of the Gemini API. Handles the context caching for Gemini API. """ -from typing import TYPE_CHECKING, Tuple +from typing import TYPE_CHECKING, Optional, Tuple if TYPE_CHECKING: from litellm.types.utils import ModelInfo, Usage -def cost_per_token(model: str, usage: "Usage") -> Tuple[float, float]: +def cost_per_token( + model: str, usage: "Usage", service_tier: Optional[str] = None +) -> Tuple[float, float]: """ Calculates the cost per token for a given model, prompt tokens, and completion tokens. @@ -19,7 +21,7 @@ def cost_per_token(model: str, usage: "Usage") -> Tuple[float, float]: from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token return generic_cost_per_token( - model=model, usage=usage, custom_llm_provider="gemini" + model=model, usage=usage, custom_llm_provider="gemini", service_tier=service_tier ) diff --git a/litellm/llms/vertex_ai/cost_calculator.py b/litellm/llms/vertex_ai/cost_calculator.py index e98dc75915..e7ac453e94 100644 --- a/litellm/llms/vertex_ai/cost_calculator.py +++ b/litellm/llms/vertex_ai/cost_calculator.py @@ -224,6 +224,7 @@ def cost_per_token( model: str, custom_llm_provider: str, usage: Usage, + service_tier: Optional[str] = None, ) -> Tuple[float, float]: """ Calculates the cost per token for a given model, prompt tokens, and completion tokens. @@ -233,6 +234,8 @@ def cost_per_token( - custom_llm_provider: str, either "vertex_ai-*" or "gemini" - prompt_tokens: float, the number of input tokens - completion_tokens: float, the number of output tokens + - service_tier: optional tier derived from Gemini trafficType + ("priority" for ON_DEMAND_PRIORITY, "flex" for FLEX/batch). Returns: Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd @@ -266,4 +269,5 @@ def cost_per_token( model=model, custom_llm_provider=custom_llm_provider, usage=usage, + service_tier=service_tier, ) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 18cbe6d1d8..4f4e99f099 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -14590,7 +14590,14 @@ "supports_video_input": true, "supports_vision": true, "supports_web_search": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "gemini-3.1-pro-preview": { "cache_read_input_token_cost": 2e-07, @@ -14641,7 +14648,14 @@ "supports_vision": true, "supports_web_search": true, "supports_url_context": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "gemini-3.1-pro-preview-customtools": { "cache_read_input_token_cost": 2e-07, @@ -14741,7 +14755,14 @@ "supports_video_input": true, "supports_vision": true, "supports_web_search": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "vertex_ai/gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-08, @@ -14785,7 +14806,12 @@ "supports_video_input": true, "supports_vision": true, "supports_web_search": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 9e-07, + "input_cost_per_audio_token_priority": 1.8e-06, + "output_cost_per_token_priority": 5.4e-06, + "cache_read_input_token_cost_priority": 9e-08, + "supports_service_tier": true }, "vertex_ai/gemini-3.1-pro-preview": { "cache_read_input_token_cost": 2e-07, @@ -14836,7 +14862,14 @@ "supports_vision": true, "supports_web_search": true, "supports_url_context": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "vertex_ai/gemini-3.1-pro-preview-customtools": { "cache_read_input_token_cost": 2e-07, @@ -14887,7 +14920,14 @@ "supports_vision": true, "supports_web_search": true, "supports_url_context": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "gemini-2.5-pro-exp-03-25": { "cache_read_input_token_cost": 1.25e-07, @@ -16682,6 +16722,8 @@ "cache_read_input_token_cost_above_200k_tokens": 2.5e-07, "input_cost_per_token": 1.25e-06, "input_cost_per_token_above_200k_tokens": 2.5e-06, + "input_cost_per_token_priority": 1.25e-06, + "input_cost_per_token_above_200k_tokens_priority": 2.5e-06, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -16695,8 +16737,11 @@ "mode": "chat", "output_cost_per_token": 1e-05, "output_cost_per_token_above_200k_tokens": 1.5e-05, + "output_cost_per_token_priority": 1e-05, + "output_cost_per_token_above_200k_tokens_priority": 1.5e-05, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supports_service_tier": true, "supported_endpoints": [ "/v1/chat/completions", "/v1/completions" @@ -16801,7 +16846,14 @@ "supports_video_input": true, "supports_vision": true, "supports_web_search": true, - "tpm": 800000 + "tpm": 800000, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "gemini/gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-08, @@ -16849,7 +16901,12 @@ "supports_vision": true, "supports_web_search": true, "supports_native_streaming": true, - "tpm": 800000 + "tpm": 800000, + "input_cost_per_token_priority": 9e-07, + "input_cost_per_audio_token_priority": 1.8e-06, + "output_cost_per_token_priority": 5.4e-06, + "cache_read_input_token_cost_priority": 9e-08, + "supports_service_tier": true }, "gemini/gemini-3.1-pro-preview": { "cache_read_input_token_cost": 2e-07, @@ -16900,7 +16957,14 @@ "supports_web_search": true, "supports_url_context": true, "supports_native_streaming": true, - "tpm": 800000 + "tpm": 800000, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "gemini/gemini-3.1-pro-preview-customtools": { "cache_read_input_token_cost": 2e-07, @@ -16951,7 +17015,14 @@ "supports_web_search": true, "supports_url_context": true, "supports_native_streaming": true, - "tpm": 800000 + "tpm": 800000, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-08, @@ -16997,7 +17068,12 @@ "supports_url_context": true, "supports_vision": true, "supports_web_search": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 9e-07, + "input_cost_per_audio_token_priority": 1.8e-06, + "output_cost_per_token_priority": 5.4e-06, + "cache_read_input_token_cost_priority": 9e-08, + "supports_service_tier": true }, "gemini/gemini-2.5-pro-exp-03-25": { "cache_read_input_token_cost": 0.0, @@ -37571,4 +37647,4 @@ "notes": "DuckDuckGo Instant Answer API is free and does not require an API key." } } -} +} \ No newline at end of file diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 18cbe6d1d8..4f4e99f099 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -14590,7 +14590,14 @@ "supports_video_input": true, "supports_vision": true, "supports_web_search": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "gemini-3.1-pro-preview": { "cache_read_input_token_cost": 2e-07, @@ -14641,7 +14648,14 @@ "supports_vision": true, "supports_web_search": true, "supports_url_context": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "gemini-3.1-pro-preview-customtools": { "cache_read_input_token_cost": 2e-07, @@ -14741,7 +14755,14 @@ "supports_video_input": true, "supports_vision": true, "supports_web_search": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "vertex_ai/gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-08, @@ -14785,7 +14806,12 @@ "supports_video_input": true, "supports_vision": true, "supports_web_search": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 9e-07, + "input_cost_per_audio_token_priority": 1.8e-06, + "output_cost_per_token_priority": 5.4e-06, + "cache_read_input_token_cost_priority": 9e-08, + "supports_service_tier": true }, "vertex_ai/gemini-3.1-pro-preview": { "cache_read_input_token_cost": 2e-07, @@ -14836,7 +14862,14 @@ "supports_vision": true, "supports_web_search": true, "supports_url_context": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "vertex_ai/gemini-3.1-pro-preview-customtools": { "cache_read_input_token_cost": 2e-07, @@ -14887,7 +14920,14 @@ "supports_vision": true, "supports_web_search": true, "supports_url_context": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "gemini-2.5-pro-exp-03-25": { "cache_read_input_token_cost": 1.25e-07, @@ -16682,6 +16722,8 @@ "cache_read_input_token_cost_above_200k_tokens": 2.5e-07, "input_cost_per_token": 1.25e-06, "input_cost_per_token_above_200k_tokens": 2.5e-06, + "input_cost_per_token_priority": 1.25e-06, + "input_cost_per_token_above_200k_tokens_priority": 2.5e-06, "litellm_provider": "gemini", "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, @@ -16695,8 +16737,11 @@ "mode": "chat", "output_cost_per_token": 1e-05, "output_cost_per_token_above_200k_tokens": 1.5e-05, + "output_cost_per_token_priority": 1e-05, + "output_cost_per_token_above_200k_tokens_priority": 1.5e-05, "rpm": 2000, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", + "supports_service_tier": true, "supported_endpoints": [ "/v1/chat/completions", "/v1/completions" @@ -16801,7 +16846,14 @@ "supports_video_input": true, "supports_vision": true, "supports_web_search": true, - "tpm": 800000 + "tpm": 800000, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "gemini/gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-08, @@ -16849,7 +16901,12 @@ "supports_vision": true, "supports_web_search": true, "supports_native_streaming": true, - "tpm": 800000 + "tpm": 800000, + "input_cost_per_token_priority": 9e-07, + "input_cost_per_audio_token_priority": 1.8e-06, + "output_cost_per_token_priority": 5.4e-06, + "cache_read_input_token_cost_priority": 9e-08, + "supports_service_tier": true }, "gemini/gemini-3.1-pro-preview": { "cache_read_input_token_cost": 2e-07, @@ -16900,7 +16957,14 @@ "supports_web_search": true, "supports_url_context": true, "supports_native_streaming": true, - "tpm": 800000 + "tpm": 800000, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "gemini/gemini-3.1-pro-preview-customtools": { "cache_read_input_token_cost": 2e-07, @@ -16951,7 +17015,14 @@ "supports_web_search": true, "supports_url_context": true, "supports_native_streaming": true, - "tpm": 800000 + "tpm": 800000, + "input_cost_per_token_priority": 3.6e-06, + "input_cost_per_token_above_200k_tokens_priority": 7.2e-06, + "output_cost_per_token_priority": 2.16e-05, + "output_cost_per_token_above_200k_tokens_priority": 3.24e-05, + "cache_read_input_token_cost_priority": 3.6e-07, + "cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07, + "supports_service_tier": true }, "gemini-3-flash-preview": { "cache_read_input_token_cost": 5e-08, @@ -16997,7 +17068,12 @@ "supports_url_context": true, "supports_vision": true, "supports_web_search": true, - "supports_native_streaming": true + "supports_native_streaming": true, + "input_cost_per_token_priority": 9e-07, + "input_cost_per_audio_token_priority": 1.8e-06, + "output_cost_per_token_priority": 5.4e-06, + "cache_read_input_token_cost_priority": 9e-08, + "supports_service_tier": true }, "gemini/gemini-2.5-pro-exp-03-25": { "cache_read_input_token_cost": 0.0, @@ -37571,4 +37647,4 @@ "notes": "DuckDuckGo Instant Answer API is free and does not require an API key." } } -} +} \ No newline at end of file diff --git a/tests/test_litellm/test_utils.py b/tests/test_litellm/test_utils.py index e6607fdf1b..984c6b0117 100644 --- a/tests/test_litellm/test_utils.py +++ b/tests/test_litellm/test_utils.py @@ -606,10 +606,14 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "input_cost_per_token_above_200k_tokens": {"type": "number"}, "cache_read_input_token_cost_flex": {"type": "number"}, "cache_read_input_token_cost_priority": {"type": "number"}, + "cache_read_input_token_cost_above_200k_tokens_priority": {"type": "number"}, "input_cost_per_token_flex": {"type": "number"}, "input_cost_per_token_priority": {"type": "number"}, + "input_cost_per_token_above_200k_tokens_priority": {"type": "number"}, + "input_cost_per_audio_token_priority": {"type": "number"}, "output_cost_per_token_flex": {"type": "number"}, "output_cost_per_token_priority": {"type": "number"}, + "output_cost_per_token_above_200k_tokens_priority": {"type": "number"}, "input_cost_per_pixel": {"type": "number"}, "input_cost_per_query": {"type": "number"}, "input_cost_per_request": {"type": "number"}, @@ -644,6 +648,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "max_video_length": {"type": "number"}, "max_videos_per_prompt": {"type": "number"}, "metadata": {"type": "object"}, + "provider_specific_entry": {"type": "object"}, "mode": { "type": "string", "enum": [ @@ -803,7 +808,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): }, } - prod_json = "./model_prices_and_context_window.json" + prod_json = "litellm/model_prices_and_context_window.json" # prod_json = "../../model_prices_and_context_window.json" with open(prod_json, "r") as model_prices_file: actual_json = json.load(model_prices_file)