feat(openai): apply regional-processing cost uplift for EU/US data residency (#28626)

* feat(openai): apply regional-processing cost uplift for EU/US data residency

OpenAI charges a 10% uplift on the latest GPT models when requests are
served from a regionalized hostname (eu./us.api.openai.com).  Infer the
region from `api_base`, expose it on `kwargs["litellm_params"]["data_residency"]`,
and multiply the computed cost by a per-model
`regional_processing_uplift_multiplier_<region>` field.

https://claude.ai/code/session_012ebH44s7ohYxjoix5CXzTW

* test: allow regional_processing_uplift_multiplier_{eu,us} in model_prices schema

* fix(cost): tighten data_residency inference and restore model_cost in tests

- Only infer OpenAI data_residency when custom_llm_provider == "openai";
  drop the implicit None fallback so non-OpenAI callers can't accidentally
  pick up a regional tag from a stray OpenAI hostname.
- _local_model_cost_map fixture now snapshots and restores
  litellm.model_cost and LITELLM_LOCAL_MODEL_COST_MAP so tests don't leak
  state across the session.

* refactor(openai): move data_residency helper under llms/openai

* fix: thread data_residency through realtime stream cost calculation

Co-authored-by: Yassin Kortam <yassin@berri.ai>

* fix(cost): thread data_residency through batch_cost_calculator

Apply the OpenAI regional-processing uplift multiplier to retrieve_batch
cost paths so Batch API requests served via eu./us.api.openai.com are
priced at the same uplifted token rates as completions/transcriptions.

* refactor(openai): encapsulate provider check inside infer_openai_data_residency

Move the custom_llm_provider == "openai" guard from get_litellm_params
into the helper itself so the core utility no longer carries
provider-specific dispatch logic. Callers pass through the provider
unconditionally; the helper returns None for any non-OpenAI provider.

* fix(responses): thread data_residency through Responses logging params

The Responses API paths build their logging litellm_params dict after
provider resolution but did not include data_residency, so cost calc
saw None even when the effective api_base was a regional OpenAI host.

---------

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Cursor Agent <cursoragent@cursor.com>
Co-authored-by: Yassin Kortam <yassin@berri.ai>
This commit is contained in:
Mateo Wang 2026-05-25 20:36:14 -07:00 committed by GitHub
parent f38c16c71e
commit c23b19f09c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 652 additions and 13 deletions

View File

@ -24,6 +24,7 @@ from litellm.litellm_core_utils.llm_cost_calc.usage_object_transformation import
from litellm.litellm_core_utils.llm_cost_calc.utils import (
CostCalculatorUtils,
_generic_cost_per_character,
_get_regional_uplift_multiplier,
_get_service_tier_cost_key,
_parse_prompt_tokens_details,
calculate_cost_component,
@ -312,6 +313,10 @@ def cost_per_token( # noqa: PLR0915
audio_transcription_file_duration: float = 0.0, # for audio transcription calls - the file time in seconds
### SERVICE TIER ###
service_tier: Optional[str] = None, # for OpenAI service tier pricing
### DATA RESIDENCY ###
data_residency: Optional[
str
] = None, # for OpenAI regional-processing uplift (e.g. "eu", "us")
response: Optional[Any] = None,
### REQUEST MODEL ###
request_model: Optional[str] = None, # original request model for router detection
@ -493,6 +498,7 @@ def cost_per_token( # noqa: PLR0915
usage=usage_block,
custom_llm_provider=custom_llm_provider,
service_tier=service_tier,
data_residency=data_residency,
)
return prompt_cost, completion_cost
@ -521,7 +527,10 @@ def cost_per_token( # noqa: PLR0915
or call_type == CallTypes.retrieve_batch
):
return batch_cost_calculator(
usage=usage_block, model=model, custom_llm_provider=custom_llm_provider
usage=usage_block,
model=model,
custom_llm_provider=custom_llm_provider,
data_residency=data_residency,
)
elif call_type == "atranscription" or call_type == "transcription":
if _transcription_usage_has_token_details(usage_block):
@ -529,6 +538,7 @@ def cost_per_token( # noqa: PLR0915
model=model_without_prefix,
usage=usage_block,
service_tier=service_tier,
data_residency=data_residency,
)
return openai_cost_per_second(
@ -579,7 +589,10 @@ def cost_per_token( # noqa: PLR0915
)
elif custom_llm_provider == "openai":
return openai_cost_per_token(
model=model, usage=usage_block, service_tier=service_tier
model=model,
usage=usage_block,
service_tier=service_tier,
data_residency=data_residency,
)
elif custom_llm_provider == "databricks":
return databricks_cost_per_token(model=model, usage=usage_block)
@ -631,6 +644,7 @@ def cost_per_token( # noqa: PLR0915
usage=usage_block,
custom_llm_provider=custom_llm_provider,
service_tier=service_tier,
data_residency=data_residency,
)
if (
@ -1117,6 +1131,10 @@ def completion_cost( # noqa: PLR0915
litellm_logging_obj: Optional[LitellmLoggingObject] = None,
### SERVICE TIER ###
service_tier: Optional[str] = None, # for OpenAI service tier pricing
### DATA RESIDENCY ###
data_residency: Optional[
str
] = None, # for OpenAI regional-processing uplift (e.g. "eu", "us")
) -> float:
"""
Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
@ -1516,6 +1534,7 @@ def completion_cost( # noqa: PLR0915
combined_usage_object=cost_per_token_usage_object,
custom_llm_provider=custom_llm_provider,
litellm_model_name=model,
data_residency=data_residency,
)
elif call_type == _MCP_CALL_TYPE:
from litellm.proxy._experimental.mcp_server.cost_calculator import (
@ -1600,6 +1619,7 @@ def completion_cost( # noqa: PLR0915
audio_transcription_file_duration=audio_transcription_file_duration,
rerank_billed_units=rerank_billed_units,
service_tier=service_tier,
data_residency=data_residency,
response=completion_response,
request_model=request_model_for_cost,
)
@ -1811,6 +1831,10 @@ def response_cost_calculator(
litellm_logging_obj: Optional[LitellmLoggingObject] = None,
### SERVICE TIER ###
service_tier: Optional[str] = None, # for OpenAI service tier pricing
### DATA RESIDENCY ###
data_residency: Optional[
str
] = None, # for OpenAI regional-processing uplift (e.g. "eu", "us")
) -> float:
"""
Returns
@ -1844,6 +1868,7 @@ def response_cost_calculator(
router_model_id=router_model_id,
litellm_logging_obj=litellm_logging_obj,
service_tier=service_tier,
data_residency=data_residency,
)
return response_cost
except Exception as e:
@ -2202,6 +2227,7 @@ def batch_cost_calculator(
model: str,
custom_llm_provider: Optional[str] = None,
model_info: Optional[ModelInfo] = None,
data_residency: Optional[str] = None,
) -> Tuple[float, float]:
"""
Calculate the cost of a batch job.
@ -2286,6 +2312,11 @@ def batch_cost_calculator(
usage.completion_tokens * (output_cost_per_token) / 2
) # batch cost is usually half of the regular token cost
uplift = _get_regional_uplift_multiplier(model_info, data_residency)
if uplift != 1.0:
total_prompt_cost *= uplift
total_completion_cost *= uplift
return total_prompt_cost, total_completion_cost
@ -2431,6 +2462,7 @@ def handle_realtime_stream_cost_calculation(
combined_usage_object: Usage,
custom_llm_provider: str,
litellm_model_name: str,
data_residency: Optional[str] = None,
) -> float:
"""
Handles the cost calculation for realtime stream responses.
@ -2461,6 +2493,7 @@ def handle_realtime_stream_cost_calculation(
model=model_name,
usage=combined_usage_object,
custom_llm_provider=custom_llm_provider,
data_residency=data_residency,
)
except Exception:
continue

View File

@ -1,5 +1,7 @@
from typing import Optional
from litellm.llms.openai.data_residency import infer_openai_data_residency
# Pre-define optional kwargs keys as frozenset for O(1) lookups
# These are extracted from kwargs only if present, avoiding unnecessary .get() calls
_OPTIONAL_KWARGS_KEYS = frozenset(
@ -103,6 +105,10 @@ def get_litellm_params(
if litellm_trace_id is None:
litellm_trace_id = _meta.get("trace_id") or _meta.get("session_id")
data_residency: Optional[str] = infer_openai_data_residency(
custom_llm_provider, api_base
)
# Build base dict with explicit parameters (always included)
litellm_params = {
"acompletion": acompletion,
@ -112,6 +118,7 @@ def get_litellm_params(
"verbose": verbose,
"custom_llm_provider": custom_llm_provider,
"api_base": api_base,
"data_residency": data_residency,
"litellm_call_id": litellm_call_id,
"model_alias_map": model_alias_map,
"completion_call_id": completion_call_id,

View File

@ -1546,6 +1546,11 @@ class Logging(LiteLLMLoggingBaseClass):
if self.optional_params
else None
),
"data_residency": (
self.litellm_params.get("data_residency")
if hasattr(self, "litellm_params") and self.litellm_params
else None
),
}
except Exception as e: # error creating kwargs for cost calculation
debug_info = StandardLoggingModelCostFailureDebugInformation(

View File

@ -9,6 +9,7 @@ from litellm.types.utils import (
CacheCreationTokenDetails,
CallTypes,
CompletionTokensDetailsWrapper,
DataResidency,
ImageResponse,
ModelInfo,
PassthroughCallTypes,
@ -617,11 +618,46 @@ def _calculate_input_cost(
return prompt_cost
def _get_regional_uplift_multiplier(
model_info: ModelInfo, data_residency: Optional[str]
) -> float:
"""
Resolve the per-model regional-processing uplift multiplier for a given
data-residency region.
OpenAI applies a flat percentage uplift (e.g. +10%) on all token costs for
requests served from a regionalized hostname (eu./us.api.openai.com). The
multiplier is stored on the model entry as
``regional_processing_uplift_multiplier_<region>`` (e.g. 1.10).
Returns 1.0 (no uplift) when ``data_residency`` is ``None`` or when the
model has no multiplier configured for the given region.
"""
if data_residency is None:
return 1.0
residency = data_residency.lower()
if residency not in {r.value for r in DataResidency}:
return 1.0
multiplier = model_info.get(f"regional_processing_uplift_multiplier_{residency}")
if multiplier is None:
return 1.0
try:
return float(cast(float, multiplier))
except (TypeError, ValueError):
verbose_logger.exception(
"Invalid regional_processing_uplift_multiplier_%s for model; "
"defaulting to 1.0",
residency,
)
return 1.0
def generic_cost_per_token( # noqa: PLR0915
model: str,
usage: Usage,
custom_llm_provider: str,
service_tier: Optional[str] = None,
data_residency: Optional[str] = None,
) -> Tuple[float, float]:
"""
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
@ -631,6 +667,8 @@ def generic_cost_per_token( # noqa: PLR0915
Input:
- model: str, the model name without provider prefix
- usage: LiteLLM Usage block, containing anthropic caching information
- data_residency: optional OpenAI data-residency region (e.g. "eu", "us"),
used to apply the per-model regional-processing uplift multiplier.
Returns:
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
@ -781,6 +819,14 @@ def generic_cost_per_token( # noqa: PLR0915
)
completion_cost += float(image_tokens) * _output_cost_per_image_token
## REGIONAL DATA-RESIDENCY UPLIFT
# Applied as a flat multiplier across all token costs for the request
# when the upstream is a regionalized OpenAI host (eu./us.api.openai.com).
uplift = _get_regional_uplift_multiplier(model_info, data_residency)
if uplift != 1.0:
prompt_cost *= uplift
completion_cost *= uplift
return prompt_cost, completion_cost

View File

@ -19,7 +19,10 @@ def cost_router(call_type: CallTypes) -> Literal["cost_per_token", "cost_per_sec
def cost_per_token(
model: str, usage: Usage, service_tier: Optional[str] = None
model: str,
usage: Usage,
service_tier: Optional[str] = None,
data_residency: Optional[str] = None,
) -> Tuple[float, float]:
"""
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
@ -27,6 +30,9 @@ def cost_per_token(
Input:
- model: str, the model name without provider prefix
- usage: LiteLLM Usage block, containing anthropic caching information
- data_residency: optional OpenAI data-residency region (e.g. "eu", "us"),
inferred from api_base. Applies the model's regional-processing
uplift multiplier when set.
Returns:
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
@ -37,6 +43,7 @@ def cost_per_token(
usage=usage,
custom_llm_provider="openai",
service_tier=service_tier,
data_residency=data_residency,
)
# ### Non-cached text tokens
# non_cached_text_tokens = usage.prompt_tokens

View File

@ -0,0 +1,41 @@
"""
Helpers for resolving OpenAI data-residency (regional processing) from an
api_base URL.
OpenAI enforces hostname-per-region for projects with geography restrictions
enabled and rejects requests sent to the wrong host, so the api_base hostname
is the authoritative signal of which region a request was processed in.
"""
from typing import Dict, Optional
from urllib.parse import urlparse
# Mapping of OpenAI regional hostnames to the corresponding data-residency
# value used by the cost calculator. See
# https://developers.openai.com/api/docs/pricing for the regional-processing
# uplift these hostnames trigger.
_OPENAI_REGIONAL_HOSTS: Dict[str, str] = {
"eu.api.openai.com": "eu",
"us.api.openai.com": "us",
}
def infer_openai_data_residency(
custom_llm_provider: Optional[str], api_base: Optional[str]
) -> Optional[str]:
"""
Derive the OpenAI data-residency region from an api_base URL.
Returns ``"eu"`` for the EU regional host, ``"us"`` for the US regional
host, and ``None`` for the default global host, any non-OpenAI provider,
or any non-OpenAI URL.
"""
if custom_llm_provider != "openai" or not api_base:
return None
try:
host = urlparse(api_base).hostname
except (TypeError, ValueError):
return None
if not host:
return None
return _OPENAI_REGIONAL_HOSTS.get(host.lower())

View File

@ -1011,6 +1011,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_native_structured_output": true,
"supports_output_config": true,
"supports_max_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
@ -1041,6 +1042,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_native_structured_output": true,
"supports_output_config": true,
"supports_max_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
@ -1071,6 +1073,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_native_structured_output": true,
"supports_output_config": true,
"supports_max_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
@ -1100,6 +1103,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_native_structured_output": true,
"supports_output_config": true,
"supports_max_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
@ -1129,6 +1133,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_native_structured_output": true,
"supports_output_config": true,
"supports_max_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
@ -1328,6 +1333,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_native_structured_output": true,
"supports_output_config": true,
"supports_minimal_reasoning_effort": true
},
"global.anthropic.claude-sonnet-4-6": {
@ -1358,6 +1364,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_native_structured_output": true,
"supports_output_config": true,
"supports_minimal_reasoning_effort": true
},
"us.anthropic.claude-sonnet-4-6": {
@ -1388,6 +1395,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_native_structured_output": true,
"supports_output_config": true,
"supports_minimal_reasoning_effort": true
},
"eu.anthropic.claude-sonnet-4-6": {
@ -1417,6 +1425,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_native_structured_output": true,
"supports_output_config": true,
"supports_minimal_reasoning_effort": true
},
"au.anthropic.claude-sonnet-4-6": {
@ -1446,6 +1455,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_native_structured_output": true,
"supports_output_config": true,
"supports_minimal_reasoning_effort": true
},
"jp.anthropic.claude-sonnet-4-6": {
@ -1475,6 +1485,7 @@
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_native_structured_output": true,
"supports_output_config": true,
"supports_minimal_reasoning_effort": true
},
"anthropic.claude-sonnet-4-20250514-v1:0": {
@ -1996,6 +2007,7 @@
"supports_tool_choice": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 159,
"supports_output_config": true,
"supports_max_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
@ -2093,6 +2105,7 @@
"supports_tool_choice": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_output_config": true,
"supports_minimal_reasoning_effort": true
},
"azure/computer-use-preview": {
@ -9654,6 +9667,7 @@
"supports_tool_choice": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_output_config": true,
"supports_minimal_reasoning_effort": true
},
"claude-sonnet-4-5-20250929-v1:0": {
@ -9851,6 +9865,7 @@
"us": 1.1,
"fast": 6.0
},
"supports_output_config": true,
"supports_max_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
@ -9886,7 +9901,8 @@
"fast": 6.0
},
"supports_max_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
"supports_minimal_reasoning_effort": true,
"supports_output_config": true
},
"claude-opus-4-7": {
"cache_creation_input_token_cost": 6.25e-06,
@ -9921,7 +9937,8 @@
"us": 1.1,
"fast": 6.0
},
"supports_minimal_reasoning_effort": true
"supports_minimal_reasoning_effort": true,
"supports_output_config": true
},
"claude-opus-4-7-20260416": {
"cache_creation_input_token_cost": 6.25e-06,
@ -9956,7 +9973,8 @@
"us": 1.1,
"fast": 6.0
},
"supports_minimal_reasoning_effort": true
"supports_minimal_reasoning_effort": true,
"supports_output_config": true
},
"claude-sonnet-4-20250514": {
"deprecation_date": "2026-05-14",
@ -14958,7 +14976,7 @@
"mode": "chat",
"output_cost_per_reasoning_token": 1.5e-06,
"output_cost_per_token": 1.5e-06,
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models",
"source": "https://ai.google.dev/gemini-api/docs/models",
"supported_endpoints": [
"/v1/chat/completions",
"/v1/completions",
@ -19014,6 +19032,8 @@
"output_cost_per_token": 8e-06,
"output_cost_per_token_batches": 4e-06,
"output_cost_per_token_priority": 1.4e-05,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
@ -19087,6 +19107,8 @@
"output_cost_per_token": 1.6e-06,
"output_cost_per_token_batches": 8e-07,
"output_cost_per_token_priority": 2.8e-06,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
@ -19160,6 +19182,8 @@
"output_cost_per_token": 4e-07,
"output_cost_per_token_batches": 2e-07,
"output_cost_per_token_priority": 8e-07,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
@ -19231,6 +19255,8 @@
"output_cost_per_token": 1e-05,
"output_cost_per_token_batches": 5e-06,
"output_cost_per_token_priority": 1.7e-05,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
@ -19272,6 +19298,8 @@
"mode": "chat",
"output_cost_per_token": 1e-05,
"output_cost_per_token_batches": 5e-06,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
@ -19293,6 +19321,8 @@
"mode": "chat",
"output_cost_per_token": 1e-05,
"output_cost_per_token_batches": 5e-06,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
@ -19581,6 +19611,8 @@
"output_cost_per_token": 6e-07,
"output_cost_per_token_batches": 3e-07,
"output_cost_per_token_priority": 1e-06,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
@ -20284,6 +20316,8 @@
"output_cost_per_token": 1e-05,
"output_cost_per_token_flex": 5e-06,
"output_cost_per_token_priority": 2e-05,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
@ -21206,6 +21240,8 @@
"mode": "responses",
"output_cost_per_token": 0.00012,
"output_cost_per_token_batches": 6e-05,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supported_endpoints": [
"/v1/batch",
"/v1/responses"
@ -21612,6 +21648,8 @@
"output_cost_per_token": 2e-06,
"output_cost_per_token_flex": 1e-06,
"output_cost_per_token_priority": 3.6e-06,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
@ -21693,6 +21731,8 @@
"max_input_tokens": 272000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"mode": "chat",
"output_cost_per_token": 4e-07,
"output_cost_per_token_flex": 2e-07,
@ -28243,10 +28283,10 @@
"supports_tool_choice": true
},
"openrouter/xiaomi/mimo-v2-flash": {
"input_cost_per_token": 9e-08,
"output_cost_per_token": 2.9e-07,
"input_cost_per_token": 1e-07,
"output_cost_per_token": 3e-07,
"cache_creation_input_token_cost": 0.0,
"cache_read_input_token_cost": 0.0,
"cache_read_input_token_cost": 1e-08,
"litellm_provider": "openrouter",
"max_input_tokens": 262144,
"max_output_tokens": 16384,
@ -28256,7 +28296,43 @@
"supports_tool_choice": true,
"supports_reasoning": true,
"supports_vision": false,
"supports_prompt_caching": false
"supports_prompt_caching": true
},
"openrouter/xiaomi/mimo-v2.5-pro": {
"input_cost_per_token": 1e-06,
"output_cost_per_token": 3e-06,
"cache_creation_input_token_cost": 0.0,
"cache_read_input_token_cost": 2e-07,
"litellm_provider": "openrouter",
"max_input_tokens": 1048576,
"max_output_tokens": 16384,
"max_tokens": 16384,
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_reasoning": true,
"supports_vision": false,
"supports_response_schema": true,
"supports_prompt_caching": true
},
"openrouter/xiaomi/mimo-v2.5": {
"input_cost_per_token": 4e-07,
"output_cost_per_token": 2e-06,
"cache_creation_input_token_cost": 0.0,
"cache_read_input_token_cost": 8e-08,
"litellm_provider": "openrouter",
"max_input_tokens": 1048576,
"max_output_tokens": 131072,
"max_tokens": 131072,
"mode": "chat",
"supports_function_calling": true,
"supports_tool_choice": true,
"supports_reasoning": true,
"supports_vision": true,
"supports_audio_input": true,
"supports_video_input": true,
"supports_response_schema": true,
"supports_prompt_caching": true
},
"openrouter/z-ai/glm-4.7": {
"input_cost_per_token": 4e-07,
@ -28987,14 +29063,16 @@
"mode": "responses",
"supports_web_search": true,
"supports_reasoning": false,
"supports_function_calling": true
"supports_function_calling": true,
"supports_output_config": true
},
"perplexity/anthropic/claude-opus-4-7": {
"litellm_provider": "perplexity",
"mode": "responses",
"supports_web_search": true,
"supports_reasoning": false,
"supports_function_calling": true
"supports_function_calling": true,
"supports_output_config": true
},
"perplexity/anthropic/claude-opus-4-5": {
"litellm_provider": "perplexity",
@ -33405,6 +33483,7 @@
"supports_tool_choice": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_output_config": true,
"supports_max_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
@ -33433,6 +33512,7 @@
"supports_tool_choice": true,
"supports_vision": true,
"tool_use_system_prompt_tokens": 346,
"supports_output_config": true,
"supports_max_reasoning_effort": true,
"supports_minimal_reasoning_effort": true
},
@ -33546,6 +33626,7 @@
"search_context_size_low": 0.01,
"search_context_size_medium": 0.01
},
"supports_output_config": true,
"supports_minimal_reasoning_effort": true
},
"vertex_ai/claude-sonnet-4-5@20250929": {
@ -40658,6 +40739,7 @@
"search_context_size_low": 0.01,
"search_context_size_medium": 0.01
},
"supports_output_config": true,
"supports_minimal_reasoning_effort": true
},
"duckduckgo/search": {

View File

@ -54,6 +54,7 @@ if TYPE_CHECKING:
else:
ResponseText = str # Fallback for ResponseText import
from litellm.litellm_core_utils.get_litellm_params import get_litellm_params
from litellm.llms.openai.data_residency import infer_openai_data_residency
from litellm.secret_managers.main import get_secret_str
from litellm.types.responses.main import *
from litellm.types.router import GenericLiteLLMParams
@ -1139,6 +1140,9 @@ def responses(
"aresponses": _is_async,
"litellm_call_id": litellm_call_id,
"model_info": kwargs.get("model_info"),
"data_residency": infer_openai_data_residency(
custom_llm_provider, litellm_params.api_base
),
"metadata": (
kwargs["litellm_metadata"]
if "litellm_metadata" in kwargs
@ -2032,6 +2036,9 @@ def compact_responses(
litellm_params={
**responses_api_request_params,
"litellm_call_id": litellm_call_id,
"data_residency": infer_openai_data_residency(
custom_llm_provider, litellm_params.api_base
),
},
custom_llm_provider=custom_llm_provider,
)
@ -2129,6 +2136,11 @@ async def _aresponses_websocket(
api_key=api_key,
)
litellm_params_dict["data_residency"] = infer_openai_data_residency(
_custom_llm_provider,
dynamic_api_base or litellm_params.api_base or litellm.api_base,
)
litellm_logging_obj.update_from_kwargs(
kwargs=kwargs,
model=model,

View File

@ -219,6 +219,12 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
output_cost_per_token_priority: Optional[
float
] # OpenAI priority service tier pricing
regional_processing_uplift_multiplier_eu: Optional[
float
] # OpenAI EU data-residency uplift multiplier applied to all token costs (e.g. 1.10 = +10%)
regional_processing_uplift_multiplier_us: Optional[
float
] # OpenAI US data-residency uplift multiplier applied to all token costs (e.g. 1.10 = +10%)
output_cost_per_character: Optional[float] # only for vertex ai models
output_cost_per_audio_token: Optional[float]
output_cost_per_token_above_128k_tokens: Optional[
@ -3601,6 +3607,20 @@ class ServiceTier(Enum):
PRIORITY = "priority"
class DataResidency(Enum):
"""
OpenAI data-residency / regional-processing regions.
Inferred from the OpenAI api_base host (eu.api.openai.com -> EU,
us.api.openai.com -> US). Used to apply the regional-processing
cost uplift (see ``regional_processing_uplift_multiplier_<region>``
on ModelInfo).
"""
US = "us"
EU = "eu"
LLMResponseTypes = Union[
ModelResponse,
EmbeddingResponse,

View File

@ -5942,6 +5942,12 @@ def _get_model_info_helper( # noqa: PLR0915
output_cost_per_token_priority=_model_info.get(
"output_cost_per_token_priority", None
),
regional_processing_uplift_multiplier_eu=_model_info.get(
"regional_processing_uplift_multiplier_eu", None
),
regional_processing_uplift_multiplier_us=_model_info.get(
"regional_processing_uplift_multiplier_us", None
),
output_cost_per_audio_token=_model_info.get(
"output_cost_per_audio_token", None
),

View File

@ -19050,6 +19050,8 @@
"output_cost_per_token": 8e-06,
"output_cost_per_token_batches": 4e-06,
"output_cost_per_token_priority": 1.4e-05,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
@ -19123,6 +19125,8 @@
"output_cost_per_token": 1.6e-06,
"output_cost_per_token_batches": 8e-07,
"output_cost_per_token_priority": 2.8e-06,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
@ -19196,6 +19200,8 @@
"output_cost_per_token": 4e-07,
"output_cost_per_token_batches": 2e-07,
"output_cost_per_token_priority": 8e-07,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
@ -19267,6 +19273,8 @@
"output_cost_per_token": 1e-05,
"output_cost_per_token_batches": 5e-06,
"output_cost_per_token_priority": 1.7e-05,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
@ -19308,6 +19316,8 @@
"mode": "chat",
"output_cost_per_token": 1e-05,
"output_cost_per_token_batches": 5e-06,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
@ -19329,6 +19339,8 @@
"mode": "chat",
"output_cost_per_token": 1e-05,
"output_cost_per_token_batches": 5e-06,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
@ -19617,6 +19629,8 @@
"output_cost_per_token": 6e-07,
"output_cost_per_token_batches": 3e-07,
"output_cost_per_token_priority": 1e-06,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
@ -20320,6 +20334,8 @@
"output_cost_per_token": 1e-05,
"output_cost_per_token_flex": 5e-06,
"output_cost_per_token_priority": 2e-05,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
@ -21242,6 +21258,8 @@
"mode": "responses",
"output_cost_per_token": 0.00012,
"output_cost_per_token_batches": 6e-05,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supported_endpoints": [
"/v1/batch",
"/v1/responses"
@ -21648,6 +21666,8 @@
"output_cost_per_token": 2e-06,
"output_cost_per_token_flex": 1e-06,
"output_cost_per_token_priority": 3.6e-06,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/batch",
@ -21729,6 +21749,8 @@
"max_input_tokens": 272000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"regional_processing_uplift_multiplier_eu": 1.10,
"regional_processing_uplift_multiplier_us": 1.10,
"mode": "chat",
"output_cost_per_token": 4e-07,
"output_cost_per_token_flex": 2e-07,

View File

@ -145,6 +145,37 @@ def test_batch_cost_calculator_func_uses_custom_model_info():
), f"Expected total cost {expected}, got {cost}"
@pytest.mark.parametrize("data_residency", ["eu", "us"])
def test_batch_cost_calculator_applies_data_residency_uplift(
data_residency, monkeypatch
):
"""batch_cost_calculator should apply the regional uplift multiplier when
data_residency is set and the model carries a configured multiplier."""
monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
prev_model_cost = litellm.model_cost
litellm.model_cost = litellm.get_model_cost_map(url="")
try:
usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
base_prompt, base_completion = batch_cost_calculator(
usage=usage,
model="gpt-5",
custom_llm_provider="openai",
)
regional_prompt, regional_completion = batch_cost_calculator(
usage=usage,
model="gpt-5",
custom_llm_provider="openai",
data_residency=data_residency,
)
assert base_prompt > 0 and base_completion > 0
assert regional_prompt == pytest.approx(base_prompt * 1.10, rel=1e-9)
assert regional_completion == pytest.approx(base_completion * 1.10, rel=1e-9)
finally:
litellm.model_cost = prev_model_cost
@pytest.mark.asyncio
async def test_calculate_batch_cost_and_usage_uses_custom_model_info():
"""calculate_batch_cost_and_usage should thread model_info."""

View File

@ -1418,3 +1418,123 @@ def test_image_count_prevents_text_tokens_fallback():
f"got {prompt_cost}. text_tokens fallback may be double-charging."
)
assert completion_cost == 0.0
# ---------------------------------------------------------------------------
# Data-residency (OpenAI regional processing) tests
# ---------------------------------------------------------------------------
@pytest.fixture
def _local_model_cost_map():
prev_env = os.environ.get("LITELLM_LOCAL_MODEL_COST_MAP")
prev_model_cost = litellm.model_cost
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
try:
yield
finally:
litellm.model_cost = prev_model_cost
if prev_env is None:
os.environ.pop("LITELLM_LOCAL_MODEL_COST_MAP", None)
else:
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = prev_env
@pytest.mark.parametrize("data_residency", ["eu", "us"])
def test_data_residency_applies_uplift(data_residency, _local_model_cost_map):
"""gpt-5 should apply the regional processing uplift multiplier when
data_residency is set."""
from litellm.types.utils import Usage
usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
base = generic_cost_per_token(
model="gpt-5",
usage=usage,
custom_llm_provider="openai",
)
regional = generic_cost_per_token(
model="gpt-5",
usage=usage,
custom_llm_provider="openai",
data_residency=data_residency,
)
base_total = base[0] + base[1]
regional_total = regional[0] + regional[1]
assert base_total > 0
assert regional_total == pytest.approx(base_total * 1.10, rel=1e-9)
assert regional[0] == pytest.approx(base[0] * 1.10, rel=1e-9)
assert regional[1] == pytest.approx(base[1] * 1.10, rel=1e-9)
def test_data_residency_no_uplift_for_unmarked_model(_local_model_cost_map):
"""A model without a regional_processing_uplift_multiplier_* entry should
fall back to base pricing, not error."""
from litellm.types.utils import Usage
usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
base = generic_cost_per_token(
model="gpt-3.5-turbo",
usage=usage,
custom_llm_provider="openai",
)
with_residency = generic_cost_per_token(
model="gpt-3.5-turbo",
usage=usage,
custom_llm_provider="openai",
data_residency="eu",
)
assert base == with_residency
def test_data_residency_none_no_uplift(_local_model_cost_map):
"""data_residency=None should be a no-op even for models with a multiplier."""
from litellm.types.utils import Usage
usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
base = generic_cost_per_token(
model="gpt-5",
usage=usage,
custom_llm_provider="openai",
)
explicit_none = generic_cost_per_token(
model="gpt-5",
usage=usage,
custom_llm_provider="openai",
data_residency=None,
)
assert base == explicit_none
def test_data_residency_composes_with_service_tier(_local_model_cost_map):
"""The uplift multiplies the priority-tier cost, not the standard one."""
from litellm.types.utils import Usage
usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
priority_base = generic_cost_per_token(
model="gpt-5",
usage=usage,
custom_llm_provider="openai",
service_tier="priority",
)
priority_eu = generic_cost_per_token(
model="gpt-5",
usage=usage,
custom_llm_provider="openai",
service_tier="priority",
data_residency="eu",
)
priority_base_total = priority_base[0] + priority_base[1]
priority_eu_total = priority_eu[0] + priority_eu[1]
assert priority_base_total > 0
assert priority_eu_total == pytest.approx(priority_base_total * 1.10, rel=1e-9)

View File

@ -125,3 +125,40 @@ class TestGetLitellmParamsExplicitFields:
def test_no_log_from_explicit_param(self):
result = get_litellm_params(no_log=True)
assert result["no-log"] is True
class TestGetLitellmParamsDataResidency:
"""Verify that data_residency is inferred from OpenAI regional api_base."""
def test_eu_host_resolves_to_eu(self):
result = get_litellm_params(
custom_llm_provider="openai",
api_base="https://eu.api.openai.com/v1",
)
assert result["data_residency"] == "eu"
def test_us_host_resolves_to_us(self):
result = get_litellm_params(
custom_llm_provider="openai",
api_base="https://us.api.openai.com/v1",
)
assert result["data_residency"] == "us"
def test_global_host_resolves_to_none(self):
result = get_litellm_params(
custom_llm_provider="openai",
api_base="https://api.openai.com/v1",
)
assert result["data_residency"] is None
def test_no_api_base_is_none(self):
result = get_litellm_params(custom_llm_provider="openai")
assert result["data_residency"] is None
def test_non_openai_provider_does_not_resolve(self):
"""Regional OpenAI host doesn't apply to other providers."""
result = get_litellm_params(
custom_llm_provider="anthropic",
api_base="https://eu.api.openai.com/v1",
)
assert result["data_residency"] is None

View File

@ -0,0 +1,134 @@
"""
Tests that data_residency is correctly populated on the litellm logging
object's litellm_params for OpenAI Responses paths, even when
custom_llm_provider is resolved from the model string inside responses()
rather than passed explicitly.
"""
import json
from unittest.mock import MagicMock, patch
import litellm
def _make_responses_api_response_body() -> dict:
return {
"id": "resp-test",
"object": "response",
"created_at": 1234567890,
"model": "gpt-4.1",
"output": [
{
"type": "message",
"id": "msg-test",
"status": "completed",
"role": "assistant",
"content": [
{
"type": "output_text",
"text": "ok",
"annotations": [],
}
],
}
],
"status": "completed",
"usage": {
"input_tokens": 1,
"output_tokens": 1,
"total_tokens": 2,
},
}
def _make_mock_http_client(response_body: dict) -> MagicMock:
mock_client = MagicMock()
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.headers = {"content-type": "application/json"}
mock_response.json.return_value = response_body
mock_response.text = json.dumps(response_body)
mock_client.post.return_value = mock_response
return mock_client
def _capture_logging_obj():
captured = {}
real_init = litellm.Logging.__init__
def init_spy(self, *args, **kwargs):
real_init(self, *args, **kwargs)
captured["logging_obj"] = self
return captured, init_spy
def test_responses_eu_api_base_sets_data_residency():
"""When api_base is a regional OpenAI host and custom_llm_provider is
inferred from the model (not passed explicitly), data_residency must end
up on the logging object's litellm_params so the cost calculator can apply
the regional uplift."""
mock_client = _make_mock_http_client(_make_responses_api_response_body())
captured, init_spy = _capture_logging_obj()
with (
patch(
"litellm.llms.custom_httpx.llm_http_handler._get_httpx_client",
return_value=mock_client,
),
patch.object(litellm.Logging, "__init__", init_spy),
):
litellm.responses(
model="gpt-4.1",
input="hi",
api_base="https://eu.api.openai.com/v1",
api_key="test-key",
)
logging_obj = captured["logging_obj"]
assert logging_obj.litellm_params.get("data_residency") == "eu"
def test_responses_us_api_base_sets_data_residency():
mock_client = _make_mock_http_client(_make_responses_api_response_body())
captured, init_spy = _capture_logging_obj()
with (
patch(
"litellm.llms.custom_httpx.llm_http_handler._get_httpx_client",
return_value=mock_client,
),
patch.object(litellm.Logging, "__init__", init_spy),
):
litellm.responses(
model="gpt-4.1",
input="hi",
api_base="https://us.api.openai.com/v1",
api_key="test-key",
)
logging_obj = captured["logging_obj"]
assert logging_obj.litellm_params.get("data_residency") == "us"
def test_responses_global_api_base_leaves_data_residency_none():
mock_client = _make_mock_http_client(_make_responses_api_response_body())
captured, init_spy = _capture_logging_obj()
with (
patch(
"litellm.llms.custom_httpx.llm_http_handler._get_httpx_client",
return_value=mock_client,
),
patch.object(litellm.Logging, "__init__", init_spy),
):
litellm.responses(
model="gpt-4.1",
input="hi",
api_base="https://api.openai.com/v1",
api_key="test-key",
)
logging_obj = captured["logging_obj"]
assert logging_obj.litellm_params.get("data_residency") is None

View File

@ -0,0 +1,34 @@
"""Tests for the OpenAI data-residency inference helper."""
import pytest
from litellm.llms.openai.data_residency import infer_openai_data_residency
@pytest.mark.parametrize(
"api_base, expected",
[
("https://eu.api.openai.com/v1", "eu"),
("https://eu.api.openai.com", "eu"),
("https://us.api.openai.com/v1", "us"),
("https://us.api.openai.com", "us"),
("https://EU.api.openai.com/v1", "eu"),
("https://api.openai.com/v1", None),
("https://api.openai.com", None),
("https://example.com/v1", None),
("https://my-azure-endpoint.openai.azure.com/openai/deployments/foo", None),
("", None),
(None, None),
("not a url", None),
],
)
def test_infer_openai_data_residency(api_base, expected):
assert infer_openai_data_residency("openai", api_base) == expected
@pytest.mark.parametrize("custom_llm_provider", [None, "anthropic", "azure", "bedrock"])
def test_infer_openai_data_residency_non_openai_provider(custom_llm_provider):
assert (
infer_openai_data_residency(custom_llm_provider, "https://eu.api.openai.com/v1")
is None
)

View File

@ -737,6 +737,8 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
"output_cost_per_token_priority": {"type": "number"},
"output_cost_per_token_above_200k_tokens_priority": {"type": "number"},
"output_cost_per_token_above_272k_tokens_priority": {"type": "number"},
"regional_processing_uplift_multiplier_eu": {"type": "number"},
"regional_processing_uplift_multiplier_us": {"type": "number"},
"input_cost_per_pixel": {"type": "number"},
"input_cost_per_query": {"type": "number"},
"input_cost_per_request": {"type": "number"},