feat(openai): apply regional-processing cost uplift for EU/US data residency (#28626)
* feat(openai): apply regional-processing cost uplift for EU/US data residency OpenAI charges a 10% uplift on the latest GPT models when requests are served from a regionalized hostname (eu./us.api.openai.com). Infer the region from `api_base`, expose it on `kwargs["litellm_params"]["data_residency"]`, and multiply the computed cost by a per-model `regional_processing_uplift_multiplier_<region>` field. https://claude.ai/code/session_012ebH44s7ohYxjoix5CXzTW * test: allow regional_processing_uplift_multiplier_{eu,us} in model_prices schema * fix(cost): tighten data_residency inference and restore model_cost in tests - Only infer OpenAI data_residency when custom_llm_provider == "openai"; drop the implicit None fallback so non-OpenAI callers can't accidentally pick up a regional tag from a stray OpenAI hostname. - _local_model_cost_map fixture now snapshots and restores litellm.model_cost and LITELLM_LOCAL_MODEL_COST_MAP so tests don't leak state across the session. * refactor(openai): move data_residency helper under llms/openai * fix: thread data_residency through realtime stream cost calculation Co-authored-by: Yassin Kortam <yassin@berri.ai> * fix(cost): thread data_residency through batch_cost_calculator Apply the OpenAI regional-processing uplift multiplier to retrieve_batch cost paths so Batch API requests served via eu./us.api.openai.com are priced at the same uplifted token rates as completions/transcriptions. * refactor(openai): encapsulate provider check inside infer_openai_data_residency Move the custom_llm_provider == "openai" guard from get_litellm_params into the helper itself so the core utility no longer carries provider-specific dispatch logic. Callers pass through the provider unconditionally; the helper returns None for any non-OpenAI provider. * fix(responses): thread data_residency through Responses logging params The Responses API paths build their logging litellm_params dict after provider resolution but did not include data_residency, so cost calc saw None even when the effective api_base was a regional OpenAI host. --------- Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Cursor Agent <cursoragent@cursor.com> Co-authored-by: Yassin Kortam <yassin@berri.ai>
This commit is contained in:
parent
f38c16c71e
commit
c23b19f09c
@ -24,6 +24,7 @@ from litellm.litellm_core_utils.llm_cost_calc.usage_object_transformation import
|
||||
from litellm.litellm_core_utils.llm_cost_calc.utils import (
|
||||
CostCalculatorUtils,
|
||||
_generic_cost_per_character,
|
||||
_get_regional_uplift_multiplier,
|
||||
_get_service_tier_cost_key,
|
||||
_parse_prompt_tokens_details,
|
||||
calculate_cost_component,
|
||||
@ -312,6 +313,10 @@ def cost_per_token( # noqa: PLR0915
|
||||
audio_transcription_file_duration: float = 0.0, # for audio transcription calls - the file time in seconds
|
||||
### SERVICE TIER ###
|
||||
service_tier: Optional[str] = None, # for OpenAI service tier pricing
|
||||
### DATA RESIDENCY ###
|
||||
data_residency: Optional[
|
||||
str
|
||||
] = None, # for OpenAI regional-processing uplift (e.g. "eu", "us")
|
||||
response: Optional[Any] = None,
|
||||
### REQUEST MODEL ###
|
||||
request_model: Optional[str] = None, # original request model for router detection
|
||||
@ -493,6 +498,7 @@ def cost_per_token( # noqa: PLR0915
|
||||
usage=usage_block,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
service_tier=service_tier,
|
||||
data_residency=data_residency,
|
||||
)
|
||||
|
||||
return prompt_cost, completion_cost
|
||||
@ -521,7 +527,10 @@ def cost_per_token( # noqa: PLR0915
|
||||
or call_type == CallTypes.retrieve_batch
|
||||
):
|
||||
return batch_cost_calculator(
|
||||
usage=usage_block, model=model, custom_llm_provider=custom_llm_provider
|
||||
usage=usage_block,
|
||||
model=model,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
data_residency=data_residency,
|
||||
)
|
||||
elif call_type == "atranscription" or call_type == "transcription":
|
||||
if _transcription_usage_has_token_details(usage_block):
|
||||
@ -529,6 +538,7 @@ def cost_per_token( # noqa: PLR0915
|
||||
model=model_without_prefix,
|
||||
usage=usage_block,
|
||||
service_tier=service_tier,
|
||||
data_residency=data_residency,
|
||||
)
|
||||
|
||||
return openai_cost_per_second(
|
||||
@ -579,7 +589,10 @@ def cost_per_token( # noqa: PLR0915
|
||||
)
|
||||
elif custom_llm_provider == "openai":
|
||||
return openai_cost_per_token(
|
||||
model=model, usage=usage_block, service_tier=service_tier
|
||||
model=model,
|
||||
usage=usage_block,
|
||||
service_tier=service_tier,
|
||||
data_residency=data_residency,
|
||||
)
|
||||
elif custom_llm_provider == "databricks":
|
||||
return databricks_cost_per_token(model=model, usage=usage_block)
|
||||
@ -631,6 +644,7 @@ def cost_per_token( # noqa: PLR0915
|
||||
usage=usage_block,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
service_tier=service_tier,
|
||||
data_residency=data_residency,
|
||||
)
|
||||
|
||||
if (
|
||||
@ -1117,6 +1131,10 @@ def completion_cost( # noqa: PLR0915
|
||||
litellm_logging_obj: Optional[LitellmLoggingObject] = None,
|
||||
### SERVICE TIER ###
|
||||
service_tier: Optional[str] = None, # for OpenAI service tier pricing
|
||||
### DATA RESIDENCY ###
|
||||
data_residency: Optional[
|
||||
str
|
||||
] = None, # for OpenAI regional-processing uplift (e.g. "eu", "us")
|
||||
) -> float:
|
||||
"""
|
||||
Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
|
||||
@ -1516,6 +1534,7 @@ def completion_cost( # noqa: PLR0915
|
||||
combined_usage_object=cost_per_token_usage_object,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
litellm_model_name=model,
|
||||
data_residency=data_residency,
|
||||
)
|
||||
elif call_type == _MCP_CALL_TYPE:
|
||||
from litellm.proxy._experimental.mcp_server.cost_calculator import (
|
||||
@ -1600,6 +1619,7 @@ def completion_cost( # noqa: PLR0915
|
||||
audio_transcription_file_duration=audio_transcription_file_duration,
|
||||
rerank_billed_units=rerank_billed_units,
|
||||
service_tier=service_tier,
|
||||
data_residency=data_residency,
|
||||
response=completion_response,
|
||||
request_model=request_model_for_cost,
|
||||
)
|
||||
@ -1811,6 +1831,10 @@ def response_cost_calculator(
|
||||
litellm_logging_obj: Optional[LitellmLoggingObject] = None,
|
||||
### SERVICE TIER ###
|
||||
service_tier: Optional[str] = None, # for OpenAI service tier pricing
|
||||
### DATA RESIDENCY ###
|
||||
data_residency: Optional[
|
||||
str
|
||||
] = None, # for OpenAI regional-processing uplift (e.g. "eu", "us")
|
||||
) -> float:
|
||||
"""
|
||||
Returns
|
||||
@ -1844,6 +1868,7 @@ def response_cost_calculator(
|
||||
router_model_id=router_model_id,
|
||||
litellm_logging_obj=litellm_logging_obj,
|
||||
service_tier=service_tier,
|
||||
data_residency=data_residency,
|
||||
)
|
||||
return response_cost
|
||||
except Exception as e:
|
||||
@ -2202,6 +2227,7 @@ def batch_cost_calculator(
|
||||
model: str,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
model_info: Optional[ModelInfo] = None,
|
||||
data_residency: Optional[str] = None,
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculate the cost of a batch job.
|
||||
@ -2286,6 +2312,11 @@ def batch_cost_calculator(
|
||||
usage.completion_tokens * (output_cost_per_token) / 2
|
||||
) # batch cost is usually half of the regular token cost
|
||||
|
||||
uplift = _get_regional_uplift_multiplier(model_info, data_residency)
|
||||
if uplift != 1.0:
|
||||
total_prompt_cost *= uplift
|
||||
total_completion_cost *= uplift
|
||||
|
||||
return total_prompt_cost, total_completion_cost
|
||||
|
||||
|
||||
@ -2431,6 +2462,7 @@ def handle_realtime_stream_cost_calculation(
|
||||
combined_usage_object: Usage,
|
||||
custom_llm_provider: str,
|
||||
litellm_model_name: str,
|
||||
data_residency: Optional[str] = None,
|
||||
) -> float:
|
||||
"""
|
||||
Handles the cost calculation for realtime stream responses.
|
||||
@ -2461,6 +2493,7 @@ def handle_realtime_stream_cost_calculation(
|
||||
model=model_name,
|
||||
usage=combined_usage_object,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
data_residency=data_residency,
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
from typing import Optional
|
||||
|
||||
from litellm.llms.openai.data_residency import infer_openai_data_residency
|
||||
|
||||
# Pre-define optional kwargs keys as frozenset for O(1) lookups
|
||||
# These are extracted from kwargs only if present, avoiding unnecessary .get() calls
|
||||
_OPTIONAL_KWARGS_KEYS = frozenset(
|
||||
@ -103,6 +105,10 @@ def get_litellm_params(
|
||||
if litellm_trace_id is None:
|
||||
litellm_trace_id = _meta.get("trace_id") or _meta.get("session_id")
|
||||
|
||||
data_residency: Optional[str] = infer_openai_data_residency(
|
||||
custom_llm_provider, api_base
|
||||
)
|
||||
|
||||
# Build base dict with explicit parameters (always included)
|
||||
litellm_params = {
|
||||
"acompletion": acompletion,
|
||||
@ -112,6 +118,7 @@ def get_litellm_params(
|
||||
"verbose": verbose,
|
||||
"custom_llm_provider": custom_llm_provider,
|
||||
"api_base": api_base,
|
||||
"data_residency": data_residency,
|
||||
"litellm_call_id": litellm_call_id,
|
||||
"model_alias_map": model_alias_map,
|
||||
"completion_call_id": completion_call_id,
|
||||
|
||||
@ -1546,6 +1546,11 @@ class Logging(LiteLLMLoggingBaseClass):
|
||||
if self.optional_params
|
||||
else None
|
||||
),
|
||||
"data_residency": (
|
||||
self.litellm_params.get("data_residency")
|
||||
if hasattr(self, "litellm_params") and self.litellm_params
|
||||
else None
|
||||
),
|
||||
}
|
||||
except Exception as e: # error creating kwargs for cost calculation
|
||||
debug_info = StandardLoggingModelCostFailureDebugInformation(
|
||||
|
||||
@ -9,6 +9,7 @@ from litellm.types.utils import (
|
||||
CacheCreationTokenDetails,
|
||||
CallTypes,
|
||||
CompletionTokensDetailsWrapper,
|
||||
DataResidency,
|
||||
ImageResponse,
|
||||
ModelInfo,
|
||||
PassthroughCallTypes,
|
||||
@ -617,11 +618,46 @@ def _calculate_input_cost(
|
||||
return prompt_cost
|
||||
|
||||
|
||||
def _get_regional_uplift_multiplier(
|
||||
model_info: ModelInfo, data_residency: Optional[str]
|
||||
) -> float:
|
||||
"""
|
||||
Resolve the per-model regional-processing uplift multiplier for a given
|
||||
data-residency region.
|
||||
|
||||
OpenAI applies a flat percentage uplift (e.g. +10%) on all token costs for
|
||||
requests served from a regionalized hostname (eu./us.api.openai.com). The
|
||||
multiplier is stored on the model entry as
|
||||
``regional_processing_uplift_multiplier_<region>`` (e.g. 1.10).
|
||||
|
||||
Returns 1.0 (no uplift) when ``data_residency`` is ``None`` or when the
|
||||
model has no multiplier configured for the given region.
|
||||
"""
|
||||
if data_residency is None:
|
||||
return 1.0
|
||||
residency = data_residency.lower()
|
||||
if residency not in {r.value for r in DataResidency}:
|
||||
return 1.0
|
||||
multiplier = model_info.get(f"regional_processing_uplift_multiplier_{residency}")
|
||||
if multiplier is None:
|
||||
return 1.0
|
||||
try:
|
||||
return float(cast(float, multiplier))
|
||||
except (TypeError, ValueError):
|
||||
verbose_logger.exception(
|
||||
"Invalid regional_processing_uplift_multiplier_%s for model; "
|
||||
"defaulting to 1.0",
|
||||
residency,
|
||||
)
|
||||
return 1.0
|
||||
|
||||
|
||||
def generic_cost_per_token( # noqa: PLR0915
|
||||
model: str,
|
||||
usage: Usage,
|
||||
custom_llm_provider: str,
|
||||
service_tier: Optional[str] = None,
|
||||
data_residency: Optional[str] = None,
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||
@ -631,6 +667,8 @@ def generic_cost_per_token( # noqa: PLR0915
|
||||
Input:
|
||||
- model: str, the model name without provider prefix
|
||||
- usage: LiteLLM Usage block, containing anthropic caching information
|
||||
- data_residency: optional OpenAI data-residency region (e.g. "eu", "us"),
|
||||
used to apply the per-model regional-processing uplift multiplier.
|
||||
|
||||
Returns:
|
||||
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
||||
@ -781,6 +819,14 @@ def generic_cost_per_token( # noqa: PLR0915
|
||||
)
|
||||
completion_cost += float(image_tokens) * _output_cost_per_image_token
|
||||
|
||||
## REGIONAL DATA-RESIDENCY UPLIFT
|
||||
# Applied as a flat multiplier across all token costs for the request
|
||||
# when the upstream is a regionalized OpenAI host (eu./us.api.openai.com).
|
||||
uplift = _get_regional_uplift_multiplier(model_info, data_residency)
|
||||
if uplift != 1.0:
|
||||
prompt_cost *= uplift
|
||||
completion_cost *= uplift
|
||||
|
||||
return prompt_cost, completion_cost
|
||||
|
||||
|
||||
|
||||
@ -19,7 +19,10 @@ def cost_router(call_type: CallTypes) -> Literal["cost_per_token", "cost_per_sec
|
||||
|
||||
|
||||
def cost_per_token(
|
||||
model: str, usage: Usage, service_tier: Optional[str] = None
|
||||
model: str,
|
||||
usage: Usage,
|
||||
service_tier: Optional[str] = None,
|
||||
data_residency: Optional[str] = None,
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||
@ -27,6 +30,9 @@ def cost_per_token(
|
||||
Input:
|
||||
- model: str, the model name without provider prefix
|
||||
- usage: LiteLLM Usage block, containing anthropic caching information
|
||||
- data_residency: optional OpenAI data-residency region (e.g. "eu", "us"),
|
||||
inferred from api_base. Applies the model's regional-processing
|
||||
uplift multiplier when set.
|
||||
|
||||
Returns:
|
||||
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
||||
@ -37,6 +43,7 @@ def cost_per_token(
|
||||
usage=usage,
|
||||
custom_llm_provider="openai",
|
||||
service_tier=service_tier,
|
||||
data_residency=data_residency,
|
||||
)
|
||||
# ### Non-cached text tokens
|
||||
# non_cached_text_tokens = usage.prompt_tokens
|
||||
|
||||
41
litellm/llms/openai/data_residency.py
Normal file
41
litellm/llms/openai/data_residency.py
Normal file
@ -0,0 +1,41 @@
|
||||
"""
|
||||
Helpers for resolving OpenAI data-residency (regional processing) from an
|
||||
api_base URL.
|
||||
|
||||
OpenAI enforces hostname-per-region for projects with geography restrictions
|
||||
enabled and rejects requests sent to the wrong host, so the api_base hostname
|
||||
is the authoritative signal of which region a request was processed in.
|
||||
"""
|
||||
|
||||
from typing import Dict, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
# Mapping of OpenAI regional hostnames to the corresponding data-residency
|
||||
# value used by the cost calculator. See
|
||||
# https://developers.openai.com/api/docs/pricing for the regional-processing
|
||||
# uplift these hostnames trigger.
|
||||
_OPENAI_REGIONAL_HOSTS: Dict[str, str] = {
|
||||
"eu.api.openai.com": "eu",
|
||||
"us.api.openai.com": "us",
|
||||
}
|
||||
|
||||
|
||||
def infer_openai_data_residency(
|
||||
custom_llm_provider: Optional[str], api_base: Optional[str]
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Derive the OpenAI data-residency region from an api_base URL.
|
||||
|
||||
Returns ``"eu"`` for the EU regional host, ``"us"`` for the US regional
|
||||
host, and ``None`` for the default global host, any non-OpenAI provider,
|
||||
or any non-OpenAI URL.
|
||||
"""
|
||||
if custom_llm_provider != "openai" or not api_base:
|
||||
return None
|
||||
try:
|
||||
host = urlparse(api_base).hostname
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if not host:
|
||||
return None
|
||||
return _OPENAI_REGIONAL_HOSTS.get(host.lower())
|
||||
@ -1011,6 +1011,7 @@
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_native_structured_output": true,
|
||||
"supports_output_config": true,
|
||||
"supports_max_reasoning_effort": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
@ -1041,6 +1042,7 @@
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_native_structured_output": true,
|
||||
"supports_output_config": true,
|
||||
"supports_max_reasoning_effort": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
@ -1071,6 +1073,7 @@
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_native_structured_output": true,
|
||||
"supports_output_config": true,
|
||||
"supports_max_reasoning_effort": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
@ -1100,6 +1103,7 @@
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_native_structured_output": true,
|
||||
"supports_output_config": true,
|
||||
"supports_max_reasoning_effort": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
@ -1129,6 +1133,7 @@
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_native_structured_output": true,
|
||||
"supports_output_config": true,
|
||||
"supports_max_reasoning_effort": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
@ -1328,6 +1333,7 @@
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_native_structured_output": true,
|
||||
"supports_output_config": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
"global.anthropic.claude-sonnet-4-6": {
|
||||
@ -1358,6 +1364,7 @@
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_native_structured_output": true,
|
||||
"supports_output_config": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
"us.anthropic.claude-sonnet-4-6": {
|
||||
@ -1388,6 +1395,7 @@
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_native_structured_output": true,
|
||||
"supports_output_config": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
"eu.anthropic.claude-sonnet-4-6": {
|
||||
@ -1417,6 +1425,7 @@
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_native_structured_output": true,
|
||||
"supports_output_config": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
"au.anthropic.claude-sonnet-4-6": {
|
||||
@ -1446,6 +1455,7 @@
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_native_structured_output": true,
|
||||
"supports_output_config": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
"jp.anthropic.claude-sonnet-4-6": {
|
||||
@ -1475,6 +1485,7 @@
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_native_structured_output": true,
|
||||
"supports_output_config": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
"anthropic.claude-sonnet-4-20250514-v1:0": {
|
||||
@ -1996,6 +2007,7 @@
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 159,
|
||||
"supports_output_config": true,
|
||||
"supports_max_reasoning_effort": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
@ -2093,6 +2105,7 @@
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_output_config": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
"azure/computer-use-preview": {
|
||||
@ -9654,6 +9667,7 @@
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_output_config": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
"claude-sonnet-4-5-20250929-v1:0": {
|
||||
@ -9851,6 +9865,7 @@
|
||||
"us": 1.1,
|
||||
"fast": 6.0
|
||||
},
|
||||
"supports_output_config": true,
|
||||
"supports_max_reasoning_effort": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
@ -9886,7 +9901,8 @@
|
||||
"fast": 6.0
|
||||
},
|
||||
"supports_max_reasoning_effort": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
"supports_minimal_reasoning_effort": true,
|
||||
"supports_output_config": true
|
||||
},
|
||||
"claude-opus-4-7": {
|
||||
"cache_creation_input_token_cost": 6.25e-06,
|
||||
@ -9921,7 +9937,8 @@
|
||||
"us": 1.1,
|
||||
"fast": 6.0
|
||||
},
|
||||
"supports_minimal_reasoning_effort": true
|
||||
"supports_minimal_reasoning_effort": true,
|
||||
"supports_output_config": true
|
||||
},
|
||||
"claude-opus-4-7-20260416": {
|
||||
"cache_creation_input_token_cost": 6.25e-06,
|
||||
@ -9956,7 +9973,8 @@
|
||||
"us": 1.1,
|
||||
"fast": 6.0
|
||||
},
|
||||
"supports_minimal_reasoning_effort": true
|
||||
"supports_minimal_reasoning_effort": true,
|
||||
"supports_output_config": true
|
||||
},
|
||||
"claude-sonnet-4-20250514": {
|
||||
"deprecation_date": "2026-05-14",
|
||||
@ -14958,7 +14976,7 @@
|
||||
"mode": "chat",
|
||||
"output_cost_per_reasoning_token": 1.5e-06,
|
||||
"output_cost_per_token": 1.5e-06,
|
||||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models",
|
||||
"source": "https://ai.google.dev/gemini-api/docs/models",
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/completions",
|
||||
@ -19014,6 +19032,8 @@
|
||||
"output_cost_per_token": 8e-06,
|
||||
"output_cost_per_token_batches": 4e-06,
|
||||
"output_cost_per_token_priority": 1.4e-05,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/batch",
|
||||
@ -19087,6 +19107,8 @@
|
||||
"output_cost_per_token": 1.6e-06,
|
||||
"output_cost_per_token_batches": 8e-07,
|
||||
"output_cost_per_token_priority": 2.8e-06,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/batch",
|
||||
@ -19160,6 +19182,8 @@
|
||||
"output_cost_per_token": 4e-07,
|
||||
"output_cost_per_token_batches": 2e-07,
|
||||
"output_cost_per_token_priority": 8e-07,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/batch",
|
||||
@ -19231,6 +19255,8 @@
|
||||
"output_cost_per_token": 1e-05,
|
||||
"output_cost_per_token_batches": 5e-06,
|
||||
"output_cost_per_token_priority": 1.7e-05,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
@ -19272,6 +19298,8 @@
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1e-05,
|
||||
"output_cost_per_token_batches": 5e-06,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
@ -19293,6 +19321,8 @@
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1e-05,
|
||||
"output_cost_per_token_batches": 5e-06,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
@ -19581,6 +19611,8 @@
|
||||
"output_cost_per_token": 6e-07,
|
||||
"output_cost_per_token_batches": 3e-07,
|
||||
"output_cost_per_token_priority": 1e-06,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
@ -20284,6 +20316,8 @@
|
||||
"output_cost_per_token": 1e-05,
|
||||
"output_cost_per_token_flex": 5e-06,
|
||||
"output_cost_per_token_priority": 2e-05,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/batch",
|
||||
@ -21206,6 +21240,8 @@
|
||||
"mode": "responses",
|
||||
"output_cost_per_token": 0.00012,
|
||||
"output_cost_per_token_batches": 6e-05,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supported_endpoints": [
|
||||
"/v1/batch",
|
||||
"/v1/responses"
|
||||
@ -21612,6 +21648,8 @@
|
||||
"output_cost_per_token": 2e-06,
|
||||
"output_cost_per_token_flex": 1e-06,
|
||||
"output_cost_per_token_priority": 3.6e-06,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/batch",
|
||||
@ -21693,6 +21731,8 @@
|
||||
"max_input_tokens": 272000,
|
||||
"max_output_tokens": 128000,
|
||||
"max_tokens": 128000,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 4e-07,
|
||||
"output_cost_per_token_flex": 2e-07,
|
||||
@ -28243,10 +28283,10 @@
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"openrouter/xiaomi/mimo-v2-flash": {
|
||||
"input_cost_per_token": 9e-08,
|
||||
"output_cost_per_token": 2.9e-07,
|
||||
"input_cost_per_token": 1e-07,
|
||||
"output_cost_per_token": 3e-07,
|
||||
"cache_creation_input_token_cost": 0.0,
|
||||
"cache_read_input_token_cost": 0.0,
|
||||
"cache_read_input_token_cost": 1e-08,
|
||||
"litellm_provider": "openrouter",
|
||||
"max_input_tokens": 262144,
|
||||
"max_output_tokens": 16384,
|
||||
@ -28256,7 +28296,43 @@
|
||||
"supports_tool_choice": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_vision": false,
|
||||
"supports_prompt_caching": false
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"openrouter/xiaomi/mimo-v2.5-pro": {
|
||||
"input_cost_per_token": 1e-06,
|
||||
"output_cost_per_token": 3e-06,
|
||||
"cache_creation_input_token_cost": 0.0,
|
||||
"cache_read_input_token_cost": 2e-07,
|
||||
"litellm_provider": "openrouter",
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 16384,
|
||||
"max_tokens": 16384,
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_vision": false,
|
||||
"supports_response_schema": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"openrouter/xiaomi/mimo-v2.5": {
|
||||
"input_cost_per_token": 4e-07,
|
||||
"output_cost_per_token": 2e-06,
|
||||
"cache_creation_input_token_cost": 0.0,
|
||||
"cache_read_input_token_cost": 8e-08,
|
||||
"litellm_provider": "openrouter",
|
||||
"max_input_tokens": 1048576,
|
||||
"max_output_tokens": 131072,
|
||||
"max_tokens": 131072,
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_tool_choice": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_vision": true,
|
||||
"supports_audio_input": true,
|
||||
"supports_video_input": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
"openrouter/z-ai/glm-4.7": {
|
||||
"input_cost_per_token": 4e-07,
|
||||
@ -28987,14 +29063,16 @@
|
||||
"mode": "responses",
|
||||
"supports_web_search": true,
|
||||
"supports_reasoning": false,
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_output_config": true
|
||||
},
|
||||
"perplexity/anthropic/claude-opus-4-7": {
|
||||
"litellm_provider": "perplexity",
|
||||
"mode": "responses",
|
||||
"supports_web_search": true,
|
||||
"supports_reasoning": false,
|
||||
"supports_function_calling": true
|
||||
"supports_function_calling": true,
|
||||
"supports_output_config": true
|
||||
},
|
||||
"perplexity/anthropic/claude-opus-4-5": {
|
||||
"litellm_provider": "perplexity",
|
||||
@ -33405,6 +33483,7 @@
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_output_config": true,
|
||||
"supports_max_reasoning_effort": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
@ -33433,6 +33512,7 @@
|
||||
"supports_tool_choice": true,
|
||||
"supports_vision": true,
|
||||
"tool_use_system_prompt_tokens": 346,
|
||||
"supports_output_config": true,
|
||||
"supports_max_reasoning_effort": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
@ -33546,6 +33626,7 @@
|
||||
"search_context_size_low": 0.01,
|
||||
"search_context_size_medium": 0.01
|
||||
},
|
||||
"supports_output_config": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
"vertex_ai/claude-sonnet-4-5@20250929": {
|
||||
@ -40658,6 +40739,7 @@
|
||||
"search_context_size_low": 0.01,
|
||||
"search_context_size_medium": 0.01
|
||||
},
|
||||
"supports_output_config": true,
|
||||
"supports_minimal_reasoning_effort": true
|
||||
},
|
||||
"duckduckgo/search": {
|
||||
|
||||
@ -54,6 +54,7 @@ if TYPE_CHECKING:
|
||||
else:
|
||||
ResponseText = str # Fallback for ResponseText import
|
||||
from litellm.litellm_core_utils.get_litellm_params import get_litellm_params
|
||||
from litellm.llms.openai.data_residency import infer_openai_data_residency
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.responses.main import *
|
||||
from litellm.types.router import GenericLiteLLMParams
|
||||
@ -1139,6 +1140,9 @@ def responses(
|
||||
"aresponses": _is_async,
|
||||
"litellm_call_id": litellm_call_id,
|
||||
"model_info": kwargs.get("model_info"),
|
||||
"data_residency": infer_openai_data_residency(
|
||||
custom_llm_provider, litellm_params.api_base
|
||||
),
|
||||
"metadata": (
|
||||
kwargs["litellm_metadata"]
|
||||
if "litellm_metadata" in kwargs
|
||||
@ -2032,6 +2036,9 @@ def compact_responses(
|
||||
litellm_params={
|
||||
**responses_api_request_params,
|
||||
"litellm_call_id": litellm_call_id,
|
||||
"data_residency": infer_openai_data_residency(
|
||||
custom_llm_provider, litellm_params.api_base
|
||||
),
|
||||
},
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
)
|
||||
@ -2129,6 +2136,11 @@ async def _aresponses_websocket(
|
||||
api_key=api_key,
|
||||
)
|
||||
|
||||
litellm_params_dict["data_residency"] = infer_openai_data_residency(
|
||||
_custom_llm_provider,
|
||||
dynamic_api_base or litellm_params.api_base or litellm.api_base,
|
||||
)
|
||||
|
||||
litellm_logging_obj.update_from_kwargs(
|
||||
kwargs=kwargs,
|
||||
model=model,
|
||||
|
||||
@ -219,6 +219,12 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
|
||||
output_cost_per_token_priority: Optional[
|
||||
float
|
||||
] # OpenAI priority service tier pricing
|
||||
regional_processing_uplift_multiplier_eu: Optional[
|
||||
float
|
||||
] # OpenAI EU data-residency uplift multiplier applied to all token costs (e.g. 1.10 = +10%)
|
||||
regional_processing_uplift_multiplier_us: Optional[
|
||||
float
|
||||
] # OpenAI US data-residency uplift multiplier applied to all token costs (e.g. 1.10 = +10%)
|
||||
output_cost_per_character: Optional[float] # only for vertex ai models
|
||||
output_cost_per_audio_token: Optional[float]
|
||||
output_cost_per_token_above_128k_tokens: Optional[
|
||||
@ -3601,6 +3607,20 @@ class ServiceTier(Enum):
|
||||
PRIORITY = "priority"
|
||||
|
||||
|
||||
class DataResidency(Enum):
|
||||
"""
|
||||
OpenAI data-residency / regional-processing regions.
|
||||
|
||||
Inferred from the OpenAI api_base host (eu.api.openai.com -> EU,
|
||||
us.api.openai.com -> US). Used to apply the regional-processing
|
||||
cost uplift (see ``regional_processing_uplift_multiplier_<region>``
|
||||
on ModelInfo).
|
||||
"""
|
||||
|
||||
US = "us"
|
||||
EU = "eu"
|
||||
|
||||
|
||||
LLMResponseTypes = Union[
|
||||
ModelResponse,
|
||||
EmbeddingResponse,
|
||||
|
||||
@ -5942,6 +5942,12 @@ def _get_model_info_helper( # noqa: PLR0915
|
||||
output_cost_per_token_priority=_model_info.get(
|
||||
"output_cost_per_token_priority", None
|
||||
),
|
||||
regional_processing_uplift_multiplier_eu=_model_info.get(
|
||||
"regional_processing_uplift_multiplier_eu", None
|
||||
),
|
||||
regional_processing_uplift_multiplier_us=_model_info.get(
|
||||
"regional_processing_uplift_multiplier_us", None
|
||||
),
|
||||
output_cost_per_audio_token=_model_info.get(
|
||||
"output_cost_per_audio_token", None
|
||||
),
|
||||
|
||||
@ -19050,6 +19050,8 @@
|
||||
"output_cost_per_token": 8e-06,
|
||||
"output_cost_per_token_batches": 4e-06,
|
||||
"output_cost_per_token_priority": 1.4e-05,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/batch",
|
||||
@ -19123,6 +19125,8 @@
|
||||
"output_cost_per_token": 1.6e-06,
|
||||
"output_cost_per_token_batches": 8e-07,
|
||||
"output_cost_per_token_priority": 2.8e-06,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/batch",
|
||||
@ -19196,6 +19200,8 @@
|
||||
"output_cost_per_token": 4e-07,
|
||||
"output_cost_per_token_batches": 2e-07,
|
||||
"output_cost_per_token_priority": 8e-07,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/batch",
|
||||
@ -19267,6 +19273,8 @@
|
||||
"output_cost_per_token": 1e-05,
|
||||
"output_cost_per_token_batches": 5e-06,
|
||||
"output_cost_per_token_priority": 1.7e-05,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
@ -19308,6 +19316,8 @@
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1e-05,
|
||||
"output_cost_per_token_batches": 5e-06,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
@ -19329,6 +19339,8 @@
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 1e-05,
|
||||
"output_cost_per_token_batches": 5e-06,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
@ -19617,6 +19629,8 @@
|
||||
"output_cost_per_token": 6e-07,
|
||||
"output_cost_per_token_batches": 3e-07,
|
||||
"output_cost_per_token_priority": 1e-06,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_pdf_input": true,
|
||||
@ -20320,6 +20334,8 @@
|
||||
"output_cost_per_token": 1e-05,
|
||||
"output_cost_per_token_flex": 5e-06,
|
||||
"output_cost_per_token_priority": 2e-05,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/batch",
|
||||
@ -21242,6 +21258,8 @@
|
||||
"mode": "responses",
|
||||
"output_cost_per_token": 0.00012,
|
||||
"output_cost_per_token_batches": 6e-05,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supported_endpoints": [
|
||||
"/v1/batch",
|
||||
"/v1/responses"
|
||||
@ -21648,6 +21666,8 @@
|
||||
"output_cost_per_token": 2e-06,
|
||||
"output_cost_per_token_flex": 1e-06,
|
||||
"output_cost_per_token_priority": 3.6e-06,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"supported_endpoints": [
|
||||
"/v1/chat/completions",
|
||||
"/v1/batch",
|
||||
@ -21729,6 +21749,8 @@
|
||||
"max_input_tokens": 272000,
|
||||
"max_output_tokens": 128000,
|
||||
"max_tokens": 128000,
|
||||
"regional_processing_uplift_multiplier_eu": 1.10,
|
||||
"regional_processing_uplift_multiplier_us": 1.10,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 4e-07,
|
||||
"output_cost_per_token_flex": 2e-07,
|
||||
|
||||
@ -145,6 +145,37 @@ def test_batch_cost_calculator_func_uses_custom_model_info():
|
||||
), f"Expected total cost {expected}, got {cost}"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data_residency", ["eu", "us"])
|
||||
def test_batch_cost_calculator_applies_data_residency_uplift(
|
||||
data_residency, monkeypatch
|
||||
):
|
||||
"""batch_cost_calculator should apply the regional uplift multiplier when
|
||||
data_residency is set and the model carries a configured multiplier."""
|
||||
monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
|
||||
prev_model_cost = litellm.model_cost
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
try:
|
||||
usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
|
||||
|
||||
base_prompt, base_completion = batch_cost_calculator(
|
||||
usage=usage,
|
||||
model="gpt-5",
|
||||
custom_llm_provider="openai",
|
||||
)
|
||||
regional_prompt, regional_completion = batch_cost_calculator(
|
||||
usage=usage,
|
||||
model="gpt-5",
|
||||
custom_llm_provider="openai",
|
||||
data_residency=data_residency,
|
||||
)
|
||||
|
||||
assert base_prompt > 0 and base_completion > 0
|
||||
assert regional_prompt == pytest.approx(base_prompt * 1.10, rel=1e-9)
|
||||
assert regional_completion == pytest.approx(base_completion * 1.10, rel=1e-9)
|
||||
finally:
|
||||
litellm.model_cost = prev_model_cost
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_calculate_batch_cost_and_usage_uses_custom_model_info():
|
||||
"""calculate_batch_cost_and_usage should thread model_info."""
|
||||
|
||||
@ -1418,3 +1418,123 @@ def test_image_count_prevents_text_tokens_fallback():
|
||||
f"got {prompt_cost}. text_tokens fallback may be double-charging."
|
||||
)
|
||||
assert completion_cost == 0.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data-residency (OpenAI regional processing) tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _local_model_cost_map():
|
||||
prev_env = os.environ.get("LITELLM_LOCAL_MODEL_COST_MAP")
|
||||
prev_model_cost = litellm.model_cost
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
litellm.model_cost = prev_model_cost
|
||||
if prev_env is None:
|
||||
os.environ.pop("LITELLM_LOCAL_MODEL_COST_MAP", None)
|
||||
else:
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = prev_env
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data_residency", ["eu", "us"])
|
||||
def test_data_residency_applies_uplift(data_residency, _local_model_cost_map):
|
||||
"""gpt-5 should apply the regional processing uplift multiplier when
|
||||
data_residency is set."""
|
||||
from litellm.types.utils import Usage
|
||||
|
||||
usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
|
||||
|
||||
base = generic_cost_per_token(
|
||||
model="gpt-5",
|
||||
usage=usage,
|
||||
custom_llm_provider="openai",
|
||||
)
|
||||
regional = generic_cost_per_token(
|
||||
model="gpt-5",
|
||||
usage=usage,
|
||||
custom_llm_provider="openai",
|
||||
data_residency=data_residency,
|
||||
)
|
||||
|
||||
base_total = base[0] + base[1]
|
||||
regional_total = regional[0] + regional[1]
|
||||
|
||||
assert base_total > 0
|
||||
assert regional_total == pytest.approx(base_total * 1.10, rel=1e-9)
|
||||
assert regional[0] == pytest.approx(base[0] * 1.10, rel=1e-9)
|
||||
assert regional[1] == pytest.approx(base[1] * 1.10, rel=1e-9)
|
||||
|
||||
|
||||
def test_data_residency_no_uplift_for_unmarked_model(_local_model_cost_map):
|
||||
"""A model without a regional_processing_uplift_multiplier_* entry should
|
||||
fall back to base pricing, not error."""
|
||||
from litellm.types.utils import Usage
|
||||
|
||||
usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
|
||||
|
||||
base = generic_cost_per_token(
|
||||
model="gpt-3.5-turbo",
|
||||
usage=usage,
|
||||
custom_llm_provider="openai",
|
||||
)
|
||||
with_residency = generic_cost_per_token(
|
||||
model="gpt-3.5-turbo",
|
||||
usage=usage,
|
||||
custom_llm_provider="openai",
|
||||
data_residency="eu",
|
||||
)
|
||||
|
||||
assert base == with_residency
|
||||
|
||||
|
||||
def test_data_residency_none_no_uplift(_local_model_cost_map):
|
||||
"""data_residency=None should be a no-op even for models with a multiplier."""
|
||||
from litellm.types.utils import Usage
|
||||
|
||||
usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
|
||||
|
||||
base = generic_cost_per_token(
|
||||
model="gpt-5",
|
||||
usage=usage,
|
||||
custom_llm_provider="openai",
|
||||
)
|
||||
explicit_none = generic_cost_per_token(
|
||||
model="gpt-5",
|
||||
usage=usage,
|
||||
custom_llm_provider="openai",
|
||||
data_residency=None,
|
||||
)
|
||||
|
||||
assert base == explicit_none
|
||||
|
||||
|
||||
def test_data_residency_composes_with_service_tier(_local_model_cost_map):
|
||||
"""The uplift multiplies the priority-tier cost, not the standard one."""
|
||||
from litellm.types.utils import Usage
|
||||
|
||||
usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
|
||||
|
||||
priority_base = generic_cost_per_token(
|
||||
model="gpt-5",
|
||||
usage=usage,
|
||||
custom_llm_provider="openai",
|
||||
service_tier="priority",
|
||||
)
|
||||
priority_eu = generic_cost_per_token(
|
||||
model="gpt-5",
|
||||
usage=usage,
|
||||
custom_llm_provider="openai",
|
||||
service_tier="priority",
|
||||
data_residency="eu",
|
||||
)
|
||||
|
||||
priority_base_total = priority_base[0] + priority_base[1]
|
||||
priority_eu_total = priority_eu[0] + priority_eu[1]
|
||||
|
||||
assert priority_base_total > 0
|
||||
assert priority_eu_total == pytest.approx(priority_base_total * 1.10, rel=1e-9)
|
||||
|
||||
@ -125,3 +125,40 @@ class TestGetLitellmParamsExplicitFields:
|
||||
def test_no_log_from_explicit_param(self):
|
||||
result = get_litellm_params(no_log=True)
|
||||
assert result["no-log"] is True
|
||||
|
||||
|
||||
class TestGetLitellmParamsDataResidency:
|
||||
"""Verify that data_residency is inferred from OpenAI regional api_base."""
|
||||
|
||||
def test_eu_host_resolves_to_eu(self):
|
||||
result = get_litellm_params(
|
||||
custom_llm_provider="openai",
|
||||
api_base="https://eu.api.openai.com/v1",
|
||||
)
|
||||
assert result["data_residency"] == "eu"
|
||||
|
||||
def test_us_host_resolves_to_us(self):
|
||||
result = get_litellm_params(
|
||||
custom_llm_provider="openai",
|
||||
api_base="https://us.api.openai.com/v1",
|
||||
)
|
||||
assert result["data_residency"] == "us"
|
||||
|
||||
def test_global_host_resolves_to_none(self):
|
||||
result = get_litellm_params(
|
||||
custom_llm_provider="openai",
|
||||
api_base="https://api.openai.com/v1",
|
||||
)
|
||||
assert result["data_residency"] is None
|
||||
|
||||
def test_no_api_base_is_none(self):
|
||||
result = get_litellm_params(custom_llm_provider="openai")
|
||||
assert result["data_residency"] is None
|
||||
|
||||
def test_non_openai_provider_does_not_resolve(self):
|
||||
"""Regional OpenAI host doesn't apply to other providers."""
|
||||
result = get_litellm_params(
|
||||
custom_llm_provider="anthropic",
|
||||
api_base="https://eu.api.openai.com/v1",
|
||||
)
|
||||
assert result["data_residency"] is None
|
||||
|
||||
@ -0,0 +1,134 @@
|
||||
"""
|
||||
Tests that data_residency is correctly populated on the litellm logging
|
||||
object's litellm_params for OpenAI Responses paths, even when
|
||||
custom_llm_provider is resolved from the model string inside responses()
|
||||
rather than passed explicitly.
|
||||
"""
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import litellm
|
||||
|
||||
|
||||
def _make_responses_api_response_body() -> dict:
|
||||
return {
|
||||
"id": "resp-test",
|
||||
"object": "response",
|
||||
"created_at": 1234567890,
|
||||
"model": "gpt-4.1",
|
||||
"output": [
|
||||
{
|
||||
"type": "message",
|
||||
"id": "msg-test",
|
||||
"status": "completed",
|
||||
"role": "assistant",
|
||||
"content": [
|
||||
{
|
||||
"type": "output_text",
|
||||
"text": "ok",
|
||||
"annotations": [],
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
"status": "completed",
|
||||
"usage": {
|
||||
"input_tokens": 1,
|
||||
"output_tokens": 1,
|
||||
"total_tokens": 2,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _make_mock_http_client(response_body: dict) -> MagicMock:
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response.headers = {"content-type": "application/json"}
|
||||
mock_response.json.return_value = response_body
|
||||
mock_response.text = json.dumps(response_body)
|
||||
mock_client.post.return_value = mock_response
|
||||
return mock_client
|
||||
|
||||
|
||||
def _capture_logging_obj():
|
||||
captured = {}
|
||||
|
||||
real_init = litellm.Logging.__init__
|
||||
|
||||
def init_spy(self, *args, **kwargs):
|
||||
real_init(self, *args, **kwargs)
|
||||
captured["logging_obj"] = self
|
||||
|
||||
return captured, init_spy
|
||||
|
||||
|
||||
def test_responses_eu_api_base_sets_data_residency():
|
||||
"""When api_base is a regional OpenAI host and custom_llm_provider is
|
||||
inferred from the model (not passed explicitly), data_residency must end
|
||||
up on the logging object's litellm_params so the cost calculator can apply
|
||||
the regional uplift."""
|
||||
mock_client = _make_mock_http_client(_make_responses_api_response_body())
|
||||
captured, init_spy = _capture_logging_obj()
|
||||
|
||||
with (
|
||||
patch(
|
||||
"litellm.llms.custom_httpx.llm_http_handler._get_httpx_client",
|
||||
return_value=mock_client,
|
||||
),
|
||||
patch.object(litellm.Logging, "__init__", init_spy),
|
||||
):
|
||||
litellm.responses(
|
||||
model="gpt-4.1",
|
||||
input="hi",
|
||||
api_base="https://eu.api.openai.com/v1",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
logging_obj = captured["logging_obj"]
|
||||
assert logging_obj.litellm_params.get("data_residency") == "eu"
|
||||
|
||||
|
||||
def test_responses_us_api_base_sets_data_residency():
|
||||
mock_client = _make_mock_http_client(_make_responses_api_response_body())
|
||||
captured, init_spy = _capture_logging_obj()
|
||||
|
||||
with (
|
||||
patch(
|
||||
"litellm.llms.custom_httpx.llm_http_handler._get_httpx_client",
|
||||
return_value=mock_client,
|
||||
),
|
||||
patch.object(litellm.Logging, "__init__", init_spy),
|
||||
):
|
||||
litellm.responses(
|
||||
model="gpt-4.1",
|
||||
input="hi",
|
||||
api_base="https://us.api.openai.com/v1",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
logging_obj = captured["logging_obj"]
|
||||
assert logging_obj.litellm_params.get("data_residency") == "us"
|
||||
|
||||
|
||||
def test_responses_global_api_base_leaves_data_residency_none():
|
||||
mock_client = _make_mock_http_client(_make_responses_api_response_body())
|
||||
captured, init_spy = _capture_logging_obj()
|
||||
|
||||
with (
|
||||
patch(
|
||||
"litellm.llms.custom_httpx.llm_http_handler._get_httpx_client",
|
||||
return_value=mock_client,
|
||||
),
|
||||
patch.object(litellm.Logging, "__init__", init_spy),
|
||||
):
|
||||
litellm.responses(
|
||||
model="gpt-4.1",
|
||||
input="hi",
|
||||
api_base="https://api.openai.com/v1",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
logging_obj = captured["logging_obj"]
|
||||
assert logging_obj.litellm_params.get("data_residency") is None
|
||||
34
tests/test_litellm/llms/openai/test_data_residency.py
Normal file
34
tests/test_litellm/llms/openai/test_data_residency.py
Normal file
@ -0,0 +1,34 @@
|
||||
"""Tests for the OpenAI data-residency inference helper."""
|
||||
|
||||
import pytest
|
||||
|
||||
from litellm.llms.openai.data_residency import infer_openai_data_residency
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"api_base, expected",
|
||||
[
|
||||
("https://eu.api.openai.com/v1", "eu"),
|
||||
("https://eu.api.openai.com", "eu"),
|
||||
("https://us.api.openai.com/v1", "us"),
|
||||
("https://us.api.openai.com", "us"),
|
||||
("https://EU.api.openai.com/v1", "eu"),
|
||||
("https://api.openai.com/v1", None),
|
||||
("https://api.openai.com", None),
|
||||
("https://example.com/v1", None),
|
||||
("https://my-azure-endpoint.openai.azure.com/openai/deployments/foo", None),
|
||||
("", None),
|
||||
(None, None),
|
||||
("not a url", None),
|
||||
],
|
||||
)
|
||||
def test_infer_openai_data_residency(api_base, expected):
|
||||
assert infer_openai_data_residency("openai", api_base) == expected
|
||||
|
||||
|
||||
@pytest.mark.parametrize("custom_llm_provider", [None, "anthropic", "azure", "bedrock"])
|
||||
def test_infer_openai_data_residency_non_openai_provider(custom_llm_provider):
|
||||
assert (
|
||||
infer_openai_data_residency(custom_llm_provider, "https://eu.api.openai.com/v1")
|
||||
is None
|
||||
)
|
||||
@ -737,6 +737,8 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
|
||||
"output_cost_per_token_priority": {"type": "number"},
|
||||
"output_cost_per_token_above_200k_tokens_priority": {"type": "number"},
|
||||
"output_cost_per_token_above_272k_tokens_priority": {"type": "number"},
|
||||
"regional_processing_uplift_multiplier_eu": {"type": "number"},
|
||||
"regional_processing_uplift_multiplier_us": {"type": "number"},
|
||||
"input_cost_per_pixel": {"type": "number"},
|
||||
"input_cost_per_query": {"type": "number"},
|
||||
"input_cost_per_request": {"type": "number"},
|
||||
|
||||
Loading…
Reference in New Issue
Block a user