Merge pull request #21909 from BerriAI/litellm_cost_tracking_gemini
Add Priority PayGo cost tracking gemini/vertex ai
This commit is contained in:
commit
f97ee62fb0
@ -480,6 +480,7 @@ def cost_per_token( # noqa: PLR0915
|
||||
model=model_without_prefix,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
usage=usage_block,
|
||||
service_tier=service_tier,
|
||||
)
|
||||
elif custom_llm_provider == "anthropic":
|
||||
return anthropic_cost_per_token(model=model, usage=usage_block)
|
||||
@ -500,7 +501,9 @@ def cost_per_token( # noqa: PLR0915
|
||||
model=model, usage=usage_block, response_time_ms=response_time_ms
|
||||
)
|
||||
elif custom_llm_provider == "gemini":
|
||||
return gemini_cost_per_token(model=model, usage=usage_block)
|
||||
return gemini_cost_per_token(
|
||||
model=model, usage=usage_block, service_tier=service_tier
|
||||
)
|
||||
elif custom_llm_provider == "deepseek":
|
||||
return deepseek_cost_per_token(model=model, usage=usage_block)
|
||||
elif custom_llm_provider == "perplexity":
|
||||
@ -704,6 +707,36 @@ def _get_response_model(completion_response: Any) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
_GEMINI_TRAFFIC_TYPE_TO_SERVICE_TIER: dict = {
|
||||
# ON_DEMAND_PRIORITY maps to "priority" — selects input_cost_per_token_priority, etc.
|
||||
"ON_DEMAND_PRIORITY": "priority",
|
||||
# FLEX / BATCH maps to "flex" — selects input_cost_per_token_flex, etc.
|
||||
"FLEX": "flex",
|
||||
"BATCH": "flex",
|
||||
# ON_DEMAND is standard pricing — no service_tier suffix applied
|
||||
"ON_DEMAND": None,
|
||||
}
|
||||
|
||||
|
||||
def _map_traffic_type_to_service_tier(traffic_type: Optional[str]) -> Optional[str]:
|
||||
"""
|
||||
Map a Gemini usageMetadata.trafficType value to a LiteLLM service_tier string.
|
||||
|
||||
This allows the same `_priority` / `_flex` cost-key suffix logic used for
|
||||
OpenAI/Azure to work for Gemini and Vertex AI models.
|
||||
|
||||
trafficType values seen in practice
|
||||
------------------------------------
|
||||
ON_DEMAND -> standard pricing (service_tier = None)
|
||||
ON_DEMAND_PRIORITY -> priority pricing (service_tier = "priority")
|
||||
FLEX / BATCH -> batch/flex pricing (service_tier = "flex")
|
||||
"""
|
||||
if traffic_type is None:
|
||||
return None
|
||||
service_tier = _GEMINI_TRAFFIC_TYPE_TO_SERVICE_TIER.get(traffic_type.upper())
|
||||
return service_tier
|
||||
|
||||
|
||||
def _get_usage_object(
|
||||
completion_response: Any,
|
||||
) -> Optional[Usage]:
|
||||
@ -1145,6 +1178,20 @@ def completion_cost( # noqa: PLR0915
|
||||
"custom_llm_provider", custom_llm_provider or None
|
||||
)
|
||||
region_name = hidden_params.get("region_name", region_name)
|
||||
|
||||
# For Gemini/Vertex AI responses, trafficType is stored in
|
||||
# provider_specific_fields. Map it to the service_tier used
|
||||
# by the cost key lookup (_priority / _flex suffixes) so that
|
||||
# ON_DEMAND_PRIORITY requests are billed at priority prices.
|
||||
if service_tier is None:
|
||||
provider_specific = (
|
||||
hidden_params.get("provider_specific_fields") or {}
|
||||
)
|
||||
raw_traffic_type = provider_specific.get("traffic_type")
|
||||
if raw_traffic_type:
|
||||
service_tier = _map_traffic_type_to_service_tier(
|
||||
raw_traffic_type
|
||||
)
|
||||
else:
|
||||
if model is None:
|
||||
raise ValueError(
|
||||
|
||||
@ -200,8 +200,14 @@ def _get_token_base_cost(
|
||||
## CHECK IF ABOVE THRESHOLD
|
||||
# Optimization: collect threshold keys first to avoid sorting all model_info keys.
|
||||
# Most models don't have threshold pricing, so we can return early.
|
||||
# Exclude service_tier-specific variants (e.g. input_cost_per_token_above_200k_tokens_priority)
|
||||
# so that the threshold detection loop only processes standard keys. The
|
||||
# service_tier-specific above-threshold key is resolved later via _get_service_tier_cost_key.
|
||||
threshold_keys = [
|
||||
k for k in model_info if k.startswith("input_cost_per_token_above_")
|
||||
k
|
||||
for k in model_info
|
||||
if k.startswith("input_cost_per_token_above_")
|
||||
and not any(k.endswith(f"_{st.value}") for st in ServiceTier)
|
||||
]
|
||||
if not threshold_keys:
|
||||
return (
|
||||
@ -224,14 +230,34 @@ def _get_token_base_cost(
|
||||
1000 if "k" in threshold_str else 1
|
||||
)
|
||||
if usage.prompt_tokens > threshold:
|
||||
# Prefer a service_tier-specific above-threshold key when available,
|
||||
# e.g. input_cost_per_token_priority_above_200k_tokens for Gemini
|
||||
# ON_DEMAND_PRIORITY. Falls back to the standard key automatically
|
||||
# via _get_cost_per_unit's service_tier fallback logic.
|
||||
tiered_input_key = (
|
||||
_get_service_tier_cost_key(
|
||||
f"input_cost_per_token_above_{threshold_str}_tokens",
|
||||
service_tier,
|
||||
)
|
||||
if service_tier
|
||||
else key
|
||||
)
|
||||
prompt_base_cost = cast(
|
||||
float, _get_cost_per_unit(model_info, key, prompt_base_cost)
|
||||
float, _get_cost_per_unit(model_info, tiered_input_key, prompt_base_cost)
|
||||
)
|
||||
tiered_output_key = (
|
||||
_get_service_tier_cost_key(
|
||||
f"output_cost_per_token_above_{threshold_str}_tokens",
|
||||
service_tier,
|
||||
)
|
||||
if service_tier
|
||||
else f"output_cost_per_token_above_{threshold_str}_tokens"
|
||||
)
|
||||
completion_base_cost = cast(
|
||||
float,
|
||||
_get_cost_per_unit(
|
||||
model_info,
|
||||
f"output_cost_per_token_above_{threshold_str}_tokens",
|
||||
tiered_output_key,
|
||||
completion_base_cost,
|
||||
),
|
||||
)
|
||||
@ -517,6 +543,7 @@ def _calculate_input_cost(
|
||||
cache_read_cost: float,
|
||||
cache_creation_cost: float,
|
||||
cache_creation_cost_above_1hr: float,
|
||||
service_tier: Optional[str] = None,
|
||||
) -> float:
|
||||
"""
|
||||
Calculates the input cost for a given model, prompt tokens, and completion tokens.
|
||||
@ -528,8 +555,11 @@ def _calculate_input_cost(
|
||||
|
||||
### AUDIO COST
|
||||
if prompt_tokens_details["audio_tokens"]:
|
||||
audio_cost_key = _get_service_tier_cost_key(
|
||||
"input_cost_per_audio_token", service_tier
|
||||
)
|
||||
prompt_cost += calculate_cost_component(
|
||||
model_info, "input_cost_per_audio_token", prompt_tokens_details["audio_tokens"]
|
||||
model_info, audio_cost_key, prompt_tokens_details["audio_tokens"]
|
||||
)
|
||||
|
||||
### IMAGE TOKEN COST
|
||||
@ -659,6 +689,7 @@ def generic_cost_per_token( # noqa: PLR0915
|
||||
cache_read_cost=cache_read_cost,
|
||||
cache_creation_cost=cache_creation_cost,
|
||||
cache_creation_cost_above_1hr=cache_creation_cost_above_1hr,
|
||||
service_tier=service_tier,
|
||||
)
|
||||
|
||||
## CALCULATE OUTPUT COST
|
||||
|
||||
@ -4,13 +4,15 @@ This file is used to calculate the cost of the Gemini API.
|
||||
Handles the context caching for Gemini API.
|
||||
"""
|
||||
|
||||
from typing import TYPE_CHECKING, Tuple
|
||||
from typing import TYPE_CHECKING, Optional, Tuple
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.types.utils import ModelInfo, Usage
|
||||
|
||||
|
||||
def cost_per_token(model: str, usage: "Usage") -> Tuple[float, float]:
|
||||
def cost_per_token(
|
||||
model: str, usage: "Usage", service_tier: Optional[str] = None
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||
|
||||
@ -19,7 +21,7 @@ def cost_per_token(model: str, usage: "Usage") -> Tuple[float, float]:
|
||||
from litellm.litellm_core_utils.llm_cost_calc.utils import generic_cost_per_token
|
||||
|
||||
return generic_cost_per_token(
|
||||
model=model, usage=usage, custom_llm_provider="gemini"
|
||||
model=model, usage=usage, custom_llm_provider="gemini", service_tier=service_tier
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -224,6 +224,7 @@ def cost_per_token(
|
||||
model: str,
|
||||
custom_llm_provider: str,
|
||||
usage: Usage,
|
||||
service_tier: Optional[str] = None,
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||
@ -233,6 +234,8 @@ def cost_per_token(
|
||||
- custom_llm_provider: str, either "vertex_ai-*" or "gemini"
|
||||
- prompt_tokens: float, the number of input tokens
|
||||
- completion_tokens: float, the number of output tokens
|
||||
- service_tier: optional tier derived from Gemini trafficType
|
||||
("priority" for ON_DEMAND_PRIORITY, "flex" for FLEX/batch).
|
||||
|
||||
Returns:
|
||||
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
||||
@ -266,4 +269,5 @@ def cost_per_token(
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
usage=usage,
|
||||
service_tier=service_tier,
|
||||
)
|
||||
|
||||
@ -14590,7 +14590,14 @@
|
||||
"supports_video_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini-3.1-pro-preview": {
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
@ -14641,7 +14648,14 @@
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_url_context": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini-3.1-pro-preview-customtools": {
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
@ -14741,7 +14755,14 @@
|
||||
"supports_video_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"vertex_ai/gemini-3-flash-preview": {
|
||||
"cache_read_input_token_cost": 5e-08,
|
||||
@ -14785,7 +14806,12 @@
|
||||
"supports_video_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 9e-07,
|
||||
"input_cost_per_audio_token_priority": 1.8e-06,
|
||||
"output_cost_per_token_priority": 5.4e-06,
|
||||
"cache_read_input_token_cost_priority": 9e-08,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"vertex_ai/gemini-3.1-pro-preview": {
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
@ -14836,7 +14862,14 @@
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_url_context": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"vertex_ai/gemini-3.1-pro-preview-customtools": {
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
@ -14887,7 +14920,14 @@
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_url_context": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini-2.5-pro-exp-03-25": {
|
||||
"cache_read_input_token_cost": 1.25e-07,
|
||||
@ -16682,6 +16722,8 @@
|
||||
"cache_read_input_token_cost_above_200k_tokens": 2.5e-07,
|
||||
"input_cost_per_token": 1.25e-06,
|
||||
"input_cost_per_token_above_200k_tokens": 2.5e-06,
|
||||
"input_cost_per_token_priority": 1.25e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 2.5e-06,
|
||||
"litellm_provider": "gemini",
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
@ -16695,8 +16737,11 @@
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1e-05,
|
||||
"output_cost_per_token_above_200k_tokens": 1.5e-05,
|
||||
"output_cost_per_token_priority": 1e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 1.5e-05,
|
||||
"rpm": 2000,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
|
||||
"supports_service_tier": true,
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/completions"
|
||||
@ -16801,7 +16846,14 @@
|
||||
"supports_video_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"tpm": 800000
|
||||
"tpm": 800000,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini/gemini-3-flash-preview": {
|
||||
"cache_read_input_token_cost": 5e-08,
|
||||
@ -16849,7 +16901,12 @@
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_native_streaming": true,
|
||||
"tpm": 800000
|
||||
"tpm": 800000,
|
||||
"input_cost_per_token_priority": 9e-07,
|
||||
"input_cost_per_audio_token_priority": 1.8e-06,
|
||||
"output_cost_per_token_priority": 5.4e-06,
|
||||
"cache_read_input_token_cost_priority": 9e-08,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini/gemini-3.1-pro-preview": {
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
@ -16900,7 +16957,14 @@
|
||||
"supports_web_search": true,
|
||||
"supports_url_context": true,
|
||||
"supports_native_streaming": true,
|
||||
"tpm": 800000
|
||||
"tpm": 800000,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini/gemini-3.1-pro-preview-customtools": {
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
@ -16951,7 +17015,14 @@
|
||||
"supports_web_search": true,
|
||||
"supports_url_context": true,
|
||||
"supports_native_streaming": true,
|
||||
"tpm": 800000
|
||||
"tpm": 800000,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini-3-flash-preview": {
|
||||
"cache_read_input_token_cost": 5e-08,
|
||||
@ -16997,7 +17068,12 @@
|
||||
"supports_url_context": true,
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 9e-07,
|
||||
"input_cost_per_audio_token_priority": 1.8e-06,
|
||||
"output_cost_per_token_priority": 5.4e-06,
|
||||
"cache_read_input_token_cost_priority": 9e-08,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini/gemini-2.5-pro-exp-03-25": {
|
||||
"cache_read_input_token_cost": 0.0,
|
||||
@ -37571,4 +37647,4 @@
|
||||
"notes": "DuckDuckGo Instant Answer API is free and does not require an API key."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -14590,7 +14590,14 @@
|
||||
"supports_video_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini-3.1-pro-preview": {
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
@ -14641,7 +14648,14 @@
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_url_context": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini-3.1-pro-preview-customtools": {
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
@ -14741,7 +14755,14 @@
|
||||
"supports_video_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"vertex_ai/gemini-3-flash-preview": {
|
||||
"cache_read_input_token_cost": 5e-08,
|
||||
@ -14785,7 +14806,12 @@
|
||||
"supports_video_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 9e-07,
|
||||
"input_cost_per_audio_token_priority": 1.8e-06,
|
||||
"output_cost_per_token_priority": 5.4e-06,
|
||||
"cache_read_input_token_cost_priority": 9e-08,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"vertex_ai/gemini-3.1-pro-preview": {
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
@ -14836,7 +14862,14 @@
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_url_context": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"vertex_ai/gemini-3.1-pro-preview-customtools": {
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
@ -14887,7 +14920,14 @@
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_url_context": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini-2.5-pro-exp-03-25": {
|
||||
"cache_read_input_token_cost": 1.25e-07,
|
||||
@ -16682,6 +16722,8 @@
|
||||
"cache_read_input_token_cost_above_200k_tokens": 2.5e-07,
|
||||
"input_cost_per_token": 1.25e-06,
|
||||
"input_cost_per_token_above_200k_tokens": 2.5e-06,
|
||||
"input_cost_per_token_priority": 1.25e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 2.5e-06,
|
||||
"litellm_provider": "gemini",
|
||||
"max_audio_length_hours": 8.4,
|
||||
"max_audio_per_prompt": 1,
|
||||
@ -16695,8 +16737,11 @@
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1e-05,
|
||||
"output_cost_per_token_above_200k_tokens": 1.5e-05,
|
||||
"output_cost_per_token_priority": 1e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 1.5e-05,
|
||||
"rpm": 2000,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing",
|
||||
"supports_service_tier": true,
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/completions"
|
||||
@ -16801,7 +16846,14 @@
|
||||
"supports_video_input": true,
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"tpm": 800000
|
||||
"tpm": 800000,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini/gemini-3-flash-preview": {
|
||||
"cache_read_input_token_cost": 5e-08,
|
||||
@ -16849,7 +16901,12 @@
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_native_streaming": true,
|
||||
"tpm": 800000
|
||||
"tpm": 800000,
|
||||
"input_cost_per_token_priority": 9e-07,
|
||||
"input_cost_per_audio_token_priority": 1.8e-06,
|
||||
"output_cost_per_token_priority": 5.4e-06,
|
||||
"cache_read_input_token_cost_priority": 9e-08,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini/gemini-3.1-pro-preview": {
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
@ -16900,7 +16957,14 @@
|
||||
"supports_web_search": true,
|
||||
"supports_url_context": true,
|
||||
"supports_native_streaming": true,
|
||||
"tpm": 800000
|
||||
"tpm": 800000,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini/gemini-3.1-pro-preview-customtools": {
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
@ -16951,7 +17015,14 @@
|
||||
"supports_web_search": true,
|
||||
"supports_url_context": true,
|
||||
"supports_native_streaming": true,
|
||||
"tpm": 800000
|
||||
"tpm": 800000,
|
||||
"input_cost_per_token_priority": 3.6e-06,
|
||||
"input_cost_per_token_above_200k_tokens_priority": 7.2e-06,
|
||||
"output_cost_per_token_priority": 2.16e-05,
|
||||
"output_cost_per_token_above_200k_tokens_priority": 3.24e-05,
|
||||
"cache_read_input_token_cost_priority": 3.6e-07,
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": 7.2e-07,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini-3-flash-preview": {
|
||||
"cache_read_input_token_cost": 5e-08,
|
||||
@ -16997,7 +17068,12 @@
|
||||
"supports_url_context": true,
|
||||
"supports_vision": true,
|
||||
"supports_web_search": true,
|
||||
"supports_native_streaming": true
|
||||
"supports_native_streaming": true,
|
||||
"input_cost_per_token_priority": 9e-07,
|
||||
"input_cost_per_audio_token_priority": 1.8e-06,
|
||||
"output_cost_per_token_priority": 5.4e-06,
|
||||
"cache_read_input_token_cost_priority": 9e-08,
|
||||
"supports_service_tier": true
|
||||
},
|
||||
"gemini/gemini-2.5-pro-exp-03-25": {
|
||||
"cache_read_input_token_cost": 0.0,
|
||||
@ -37571,4 +37647,4 @@
|
||||
"notes": "DuckDuckGo Instant Answer API is free and does not require an API key."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -606,10 +606,14 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
|
||||
"input_cost_per_token_above_200k_tokens": {"type": "number"},
|
||||
"cache_read_input_token_cost_flex": {"type": "number"},
|
||||
"cache_read_input_token_cost_priority": {"type": "number"},
|
||||
"cache_read_input_token_cost_above_200k_tokens_priority": {"type": "number"},
|
||||
"input_cost_per_token_flex": {"type": "number"},
|
||||
"input_cost_per_token_priority": {"type": "number"},
|
||||
"input_cost_per_token_above_200k_tokens_priority": {"type": "number"},
|
||||
"input_cost_per_audio_token_priority": {"type": "number"},
|
||||
"output_cost_per_token_flex": {"type": "number"},
|
||||
"output_cost_per_token_priority": {"type": "number"},
|
||||
"output_cost_per_token_above_200k_tokens_priority": {"type": "number"},
|
||||
"input_cost_per_pixel": {"type": "number"},
|
||||
"input_cost_per_query": {"type": "number"},
|
||||
"input_cost_per_request": {"type": "number"},
|
||||
@ -644,6 +648,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
|
||||
"max_video_length": {"type": "number"},
|
||||
"max_videos_per_prompt": {"type": "number"},
|
||||
"metadata": {"type": "object"},
|
||||
"provider_specific_entry": {"type": "object"},
|
||||
"mode": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
@ -803,7 +808,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
|
||||
},
|
||||
}
|
||||
|
||||
prod_json = "./model_prices_and_context_window.json"
|
||||
prod_json = "litellm/model_prices_and_context_window.json"
|
||||
# prod_json = "../../model_prices_and_context_window.json"
|
||||
with open(prod_json, "r") as model_prices_file:
|
||||
actual_json = json.load(model_prices_file)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user