From c23b19f09c5565abd3607eab540e7697a7fe6b2e Mon Sep 17 00:00:00 2001 From: Mateo Wang <277851410+mateo-berri@users.noreply.github.com> Date: Mon, 25 May 2026 20:36:14 -0700 Subject: [PATCH] feat(openai): apply regional-processing cost uplift for EU/US data residency (#28626) * feat(openai): apply regional-processing cost uplift for EU/US data residency OpenAI charges a 10% uplift on the latest GPT models when requests are served from a regionalized hostname (eu./us.api.openai.com). Infer the region from `api_base`, expose it on `kwargs["litellm_params"]["data_residency"]`, and multiply the computed cost by a per-model `regional_processing_uplift_multiplier_` field. https://claude.ai/code/session_012ebH44s7ohYxjoix5CXzTW * test: allow regional_processing_uplift_multiplier_{eu,us} in model_prices schema * fix(cost): tighten data_residency inference and restore model_cost in tests - Only infer OpenAI data_residency when custom_llm_provider == "openai"; drop the implicit None fallback so non-OpenAI callers can't accidentally pick up a regional tag from a stray OpenAI hostname. - _local_model_cost_map fixture now snapshots and restores litellm.model_cost and LITELLM_LOCAL_MODEL_COST_MAP so tests don't leak state across the session. * refactor(openai): move data_residency helper under llms/openai * fix: thread data_residency through realtime stream cost calculation Co-authored-by: Yassin Kortam * fix(cost): thread data_residency through batch_cost_calculator Apply the OpenAI regional-processing uplift multiplier to retrieve_batch cost paths so Batch API requests served via eu./us.api.openai.com are priced at the same uplifted token rates as completions/transcriptions. * refactor(openai): encapsulate provider check inside infer_openai_data_residency Move the custom_llm_provider == "openai" guard from get_litellm_params into the helper itself so the core utility no longer carries provider-specific dispatch logic. Callers pass through the provider unconditionally; the helper returns None for any non-OpenAI provider. * fix(responses): thread data_residency through Responses logging params The Responses API paths build their logging litellm_params dict after provider resolution but did not include data_residency, so cost calc saw None even when the effective api_base was a regional OpenAI host. --------- Co-authored-by: Claude Co-authored-by: Cursor Agent Co-authored-by: Yassin Kortam --- litellm/cost_calculator.py | 37 ++++- .../litellm_core_utils/get_litellm_params.py | 7 + litellm/litellm_core_utils/litellm_logging.py | 5 + .../litellm_core_utils/llm_cost_calc/utils.py | 46 ++++++ litellm/llms/openai/cost_calculation.py | 9 +- litellm/llms/openai/data_residency.py | 41 ++++++ ...odel_prices_and_context_window_backup.json | 102 +++++++++++-- litellm/responses/main.py | 12 ++ litellm/types/utils.py | 20 +++ litellm/utils.py | 6 + model_prices_and_context_window.json | 22 +++ .../test_batch_custom_pricing.py | 31 ++++ .../llm_cost_calc/test_llm_cost_calc_utils.py | 120 ++++++++++++++++ .../test_get_litellm_params.py | 37 +++++ .../test_openai_responses_data_residency.py | 134 ++++++++++++++++++ .../llms/openai/test_data_residency.py | 34 +++++ tests/test_litellm/test_utils.py | 2 + 17 files changed, 652 insertions(+), 13 deletions(-) create mode 100644 litellm/llms/openai/data_residency.py create mode 100644 tests/test_litellm/llms/openai/responses/test_openai_responses_data_residency.py create mode 100644 tests/test_litellm/llms/openai/test_data_residency.py diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 98e00cf578..ab882559d3 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -24,6 +24,7 @@ from litellm.litellm_core_utils.llm_cost_calc.usage_object_transformation import from litellm.litellm_core_utils.llm_cost_calc.utils import ( CostCalculatorUtils, _generic_cost_per_character, + _get_regional_uplift_multiplier, _get_service_tier_cost_key, _parse_prompt_tokens_details, calculate_cost_component, @@ -312,6 +313,10 @@ def cost_per_token( # noqa: PLR0915 audio_transcription_file_duration: float = 0.0, # for audio transcription calls - the file time in seconds ### SERVICE TIER ### service_tier: Optional[str] = None, # for OpenAI service tier pricing + ### DATA RESIDENCY ### + data_residency: Optional[ + str + ] = None, # for OpenAI regional-processing uplift (e.g. "eu", "us") response: Optional[Any] = None, ### REQUEST MODEL ### request_model: Optional[str] = None, # original request model for router detection @@ -493,6 +498,7 @@ def cost_per_token( # noqa: PLR0915 usage=usage_block, custom_llm_provider=custom_llm_provider, service_tier=service_tier, + data_residency=data_residency, ) return prompt_cost, completion_cost @@ -521,7 +527,10 @@ def cost_per_token( # noqa: PLR0915 or call_type == CallTypes.retrieve_batch ): return batch_cost_calculator( - usage=usage_block, model=model, custom_llm_provider=custom_llm_provider + usage=usage_block, + model=model, + custom_llm_provider=custom_llm_provider, + data_residency=data_residency, ) elif call_type == "atranscription" or call_type == "transcription": if _transcription_usage_has_token_details(usage_block): @@ -529,6 +538,7 @@ def cost_per_token( # noqa: PLR0915 model=model_without_prefix, usage=usage_block, service_tier=service_tier, + data_residency=data_residency, ) return openai_cost_per_second( @@ -579,7 +589,10 @@ def cost_per_token( # noqa: PLR0915 ) elif custom_llm_provider == "openai": return openai_cost_per_token( - model=model, usage=usage_block, service_tier=service_tier + model=model, + usage=usage_block, + service_tier=service_tier, + data_residency=data_residency, ) elif custom_llm_provider == "databricks": return databricks_cost_per_token(model=model, usage=usage_block) @@ -631,6 +644,7 @@ def cost_per_token( # noqa: PLR0915 usage=usage_block, custom_llm_provider=custom_llm_provider, service_tier=service_tier, + data_residency=data_residency, ) if ( @@ -1117,6 +1131,10 @@ def completion_cost( # noqa: PLR0915 litellm_logging_obj: Optional[LitellmLoggingObject] = None, ### SERVICE TIER ### service_tier: Optional[str] = None, # for OpenAI service tier pricing + ### DATA RESIDENCY ### + data_residency: Optional[ + str + ] = None, # for OpenAI regional-processing uplift (e.g. "eu", "us") ) -> float: """ Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm. @@ -1516,6 +1534,7 @@ def completion_cost( # noqa: PLR0915 combined_usage_object=cost_per_token_usage_object, custom_llm_provider=custom_llm_provider, litellm_model_name=model, + data_residency=data_residency, ) elif call_type == _MCP_CALL_TYPE: from litellm.proxy._experimental.mcp_server.cost_calculator import ( @@ -1600,6 +1619,7 @@ def completion_cost( # noqa: PLR0915 audio_transcription_file_duration=audio_transcription_file_duration, rerank_billed_units=rerank_billed_units, service_tier=service_tier, + data_residency=data_residency, response=completion_response, request_model=request_model_for_cost, ) @@ -1811,6 +1831,10 @@ def response_cost_calculator( litellm_logging_obj: Optional[LitellmLoggingObject] = None, ### SERVICE TIER ### service_tier: Optional[str] = None, # for OpenAI service tier pricing + ### DATA RESIDENCY ### + data_residency: Optional[ + str + ] = None, # for OpenAI regional-processing uplift (e.g. "eu", "us") ) -> float: """ Returns @@ -1844,6 +1868,7 @@ def response_cost_calculator( router_model_id=router_model_id, litellm_logging_obj=litellm_logging_obj, service_tier=service_tier, + data_residency=data_residency, ) return response_cost except Exception as e: @@ -2202,6 +2227,7 @@ def batch_cost_calculator( model: str, custom_llm_provider: Optional[str] = None, model_info: Optional[ModelInfo] = None, + data_residency: Optional[str] = None, ) -> Tuple[float, float]: """ Calculate the cost of a batch job. @@ -2286,6 +2312,11 @@ def batch_cost_calculator( usage.completion_tokens * (output_cost_per_token) / 2 ) # batch cost is usually half of the regular token cost + uplift = _get_regional_uplift_multiplier(model_info, data_residency) + if uplift != 1.0: + total_prompt_cost *= uplift + total_completion_cost *= uplift + return total_prompt_cost, total_completion_cost @@ -2431,6 +2462,7 @@ def handle_realtime_stream_cost_calculation( combined_usage_object: Usage, custom_llm_provider: str, litellm_model_name: str, + data_residency: Optional[str] = None, ) -> float: """ Handles the cost calculation for realtime stream responses. @@ -2461,6 +2493,7 @@ def handle_realtime_stream_cost_calculation( model=model_name, usage=combined_usage_object, custom_llm_provider=custom_llm_provider, + data_residency=data_residency, ) except Exception: continue diff --git a/litellm/litellm_core_utils/get_litellm_params.py b/litellm/litellm_core_utils/get_litellm_params.py index ad9538ac17..b32803b5df 100644 --- a/litellm/litellm_core_utils/get_litellm_params.py +++ b/litellm/litellm_core_utils/get_litellm_params.py @@ -1,5 +1,7 @@ from typing import Optional +from litellm.llms.openai.data_residency import infer_openai_data_residency + # Pre-define optional kwargs keys as frozenset for O(1) lookups # These are extracted from kwargs only if present, avoiding unnecessary .get() calls _OPTIONAL_KWARGS_KEYS = frozenset( @@ -103,6 +105,10 @@ def get_litellm_params( if litellm_trace_id is None: litellm_trace_id = _meta.get("trace_id") or _meta.get("session_id") + data_residency: Optional[str] = infer_openai_data_residency( + custom_llm_provider, api_base + ) + # Build base dict with explicit parameters (always included) litellm_params = { "acompletion": acompletion, @@ -112,6 +118,7 @@ def get_litellm_params( "verbose": verbose, "custom_llm_provider": custom_llm_provider, "api_base": api_base, + "data_residency": data_residency, "litellm_call_id": litellm_call_id, "model_alias_map": model_alias_map, "completion_call_id": completion_call_id, diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 63fa0e6469..ef0e674715 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -1546,6 +1546,11 @@ class Logging(LiteLLMLoggingBaseClass): if self.optional_params else None ), + "data_residency": ( + self.litellm_params.get("data_residency") + if hasattr(self, "litellm_params") and self.litellm_params + else None + ), } except Exception as e: # error creating kwargs for cost calculation debug_info = StandardLoggingModelCostFailureDebugInformation( diff --git a/litellm/litellm_core_utils/llm_cost_calc/utils.py b/litellm/litellm_core_utils/llm_cost_calc/utils.py index 59d0465e6d..6c999590dd 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/utils.py +++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py @@ -9,6 +9,7 @@ from litellm.types.utils import ( CacheCreationTokenDetails, CallTypes, CompletionTokensDetailsWrapper, + DataResidency, ImageResponse, ModelInfo, PassthroughCallTypes, @@ -617,11 +618,46 @@ def _calculate_input_cost( return prompt_cost +def _get_regional_uplift_multiplier( + model_info: ModelInfo, data_residency: Optional[str] +) -> float: + """ + Resolve the per-model regional-processing uplift multiplier for a given + data-residency region. + + OpenAI applies a flat percentage uplift (e.g. +10%) on all token costs for + requests served from a regionalized hostname (eu./us.api.openai.com). The + multiplier is stored on the model entry as + ``regional_processing_uplift_multiplier_`` (e.g. 1.10). + + Returns 1.0 (no uplift) when ``data_residency`` is ``None`` or when the + model has no multiplier configured for the given region. + """ + if data_residency is None: + return 1.0 + residency = data_residency.lower() + if residency not in {r.value for r in DataResidency}: + return 1.0 + multiplier = model_info.get(f"regional_processing_uplift_multiplier_{residency}") + if multiplier is None: + return 1.0 + try: + return float(cast(float, multiplier)) + except (TypeError, ValueError): + verbose_logger.exception( + "Invalid regional_processing_uplift_multiplier_%s for model; " + "defaulting to 1.0", + residency, + ) + return 1.0 + + def generic_cost_per_token( # noqa: PLR0915 model: str, usage: Usage, custom_llm_provider: str, service_tier: Optional[str] = None, + data_residency: Optional[str] = None, ) -> Tuple[float, float]: """ Calculates the cost per token for a given model, prompt tokens, and completion tokens. @@ -631,6 +667,8 @@ def generic_cost_per_token( # noqa: PLR0915 Input: - model: str, the model name without provider prefix - usage: LiteLLM Usage block, containing anthropic caching information + - data_residency: optional OpenAI data-residency region (e.g. "eu", "us"), + used to apply the per-model regional-processing uplift multiplier. Returns: Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd @@ -781,6 +819,14 @@ def generic_cost_per_token( # noqa: PLR0915 ) completion_cost += float(image_tokens) * _output_cost_per_image_token + ## REGIONAL DATA-RESIDENCY UPLIFT + # Applied as a flat multiplier across all token costs for the request + # when the upstream is a regionalized OpenAI host (eu./us.api.openai.com). + uplift = _get_regional_uplift_multiplier(model_info, data_residency) + if uplift != 1.0: + prompt_cost *= uplift + completion_cost *= uplift + return prompt_cost, completion_cost diff --git a/litellm/llms/openai/cost_calculation.py b/litellm/llms/openai/cost_calculation.py index 32b71a43af..6935cafd0d 100644 --- a/litellm/llms/openai/cost_calculation.py +++ b/litellm/llms/openai/cost_calculation.py @@ -19,7 +19,10 @@ def cost_router(call_type: CallTypes) -> Literal["cost_per_token", "cost_per_sec def cost_per_token( - model: str, usage: Usage, service_tier: Optional[str] = None + model: str, + usage: Usage, + service_tier: Optional[str] = None, + data_residency: Optional[str] = None, ) -> Tuple[float, float]: """ Calculates the cost per token for a given model, prompt tokens, and completion tokens. @@ -27,6 +30,9 @@ def cost_per_token( Input: - model: str, the model name without provider prefix - usage: LiteLLM Usage block, containing anthropic caching information + - data_residency: optional OpenAI data-residency region (e.g. "eu", "us"), + inferred from api_base. Applies the model's regional-processing + uplift multiplier when set. Returns: Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd @@ -37,6 +43,7 @@ def cost_per_token( usage=usage, custom_llm_provider="openai", service_tier=service_tier, + data_residency=data_residency, ) # ### Non-cached text tokens # non_cached_text_tokens = usage.prompt_tokens diff --git a/litellm/llms/openai/data_residency.py b/litellm/llms/openai/data_residency.py new file mode 100644 index 0000000000..7162f70ca5 --- /dev/null +++ b/litellm/llms/openai/data_residency.py @@ -0,0 +1,41 @@ +""" +Helpers for resolving OpenAI data-residency (regional processing) from an +api_base URL. + +OpenAI enforces hostname-per-region for projects with geography restrictions +enabled and rejects requests sent to the wrong host, so the api_base hostname +is the authoritative signal of which region a request was processed in. +""" + +from typing import Dict, Optional +from urllib.parse import urlparse + +# Mapping of OpenAI regional hostnames to the corresponding data-residency +# value used by the cost calculator. See +# https://developers.openai.com/api/docs/pricing for the regional-processing +# uplift these hostnames trigger. +_OPENAI_REGIONAL_HOSTS: Dict[str, str] = { + "eu.api.openai.com": "eu", + "us.api.openai.com": "us", +} + + +def infer_openai_data_residency( + custom_llm_provider: Optional[str], api_base: Optional[str] +) -> Optional[str]: + """ + Derive the OpenAI data-residency region from an api_base URL. + + Returns ``"eu"`` for the EU regional host, ``"us"`` for the US regional + host, and ``None`` for the default global host, any non-OpenAI provider, + or any non-OpenAI URL. + """ + if custom_llm_provider != "openai" or not api_base: + return None + try: + host = urlparse(api_base).hostname + except (TypeError, ValueError): + return None + if not host: + return None + return _OPENAI_REGIONAL_HOSTS.get(host.lower()) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 62e576ea0f..bed030d284 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1011,6 +1011,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_max_reasoning_effort": true, "supports_minimal_reasoning_effort": true }, @@ -1041,6 +1042,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_max_reasoning_effort": true, "supports_minimal_reasoning_effort": true }, @@ -1071,6 +1073,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_max_reasoning_effort": true, "supports_minimal_reasoning_effort": true }, @@ -1100,6 +1103,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_max_reasoning_effort": true, "supports_minimal_reasoning_effort": true }, @@ -1129,6 +1133,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_max_reasoning_effort": true, "supports_minimal_reasoning_effort": true }, @@ -1328,6 +1333,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_minimal_reasoning_effort": true }, "global.anthropic.claude-sonnet-4-6": { @@ -1358,6 +1364,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_minimal_reasoning_effort": true }, "us.anthropic.claude-sonnet-4-6": { @@ -1388,6 +1395,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_minimal_reasoning_effort": true }, "eu.anthropic.claude-sonnet-4-6": { @@ -1417,6 +1425,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_minimal_reasoning_effort": true }, "au.anthropic.claude-sonnet-4-6": { @@ -1446,6 +1455,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_minimal_reasoning_effort": true }, "jp.anthropic.claude-sonnet-4-6": { @@ -1475,6 +1485,7 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 346, "supports_native_structured_output": true, + "supports_output_config": true, "supports_minimal_reasoning_effort": true }, "anthropic.claude-sonnet-4-20250514-v1:0": { @@ -1996,6 +2007,7 @@ "supports_tool_choice": true, "supports_vision": true, "tool_use_system_prompt_tokens": 159, + "supports_output_config": true, "supports_max_reasoning_effort": true, "supports_minimal_reasoning_effort": true }, @@ -2093,6 +2105,7 @@ "supports_tool_choice": true, "supports_vision": true, "tool_use_system_prompt_tokens": 346, + "supports_output_config": true, "supports_minimal_reasoning_effort": true }, "azure/computer-use-preview": { @@ -9654,6 +9667,7 @@ "supports_tool_choice": true, "supports_vision": true, "tool_use_system_prompt_tokens": 346, + "supports_output_config": true, "supports_minimal_reasoning_effort": true }, "claude-sonnet-4-5-20250929-v1:0": { @@ -9851,6 +9865,7 @@ "us": 1.1, "fast": 6.0 }, + "supports_output_config": true, "supports_max_reasoning_effort": true, "supports_minimal_reasoning_effort": true }, @@ -9886,7 +9901,8 @@ "fast": 6.0 }, "supports_max_reasoning_effort": true, - "supports_minimal_reasoning_effort": true + "supports_minimal_reasoning_effort": true, + "supports_output_config": true }, "claude-opus-4-7": { "cache_creation_input_token_cost": 6.25e-06, @@ -9921,7 +9937,8 @@ "us": 1.1, "fast": 6.0 }, - "supports_minimal_reasoning_effort": true + "supports_minimal_reasoning_effort": true, + "supports_output_config": true }, "claude-opus-4-7-20260416": { "cache_creation_input_token_cost": 6.25e-06, @@ -9956,7 +9973,8 @@ "us": 1.1, "fast": 6.0 }, - "supports_minimal_reasoning_effort": true + "supports_minimal_reasoning_effort": true, + "supports_output_config": true }, "claude-sonnet-4-20250514": { "deprecation_date": "2026-05-14", @@ -14958,7 +14976,7 @@ "mode": "chat", "output_cost_per_reasoning_token": 1.5e-06, "output_cost_per_token": 1.5e-06, - "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models", + "source": "https://ai.google.dev/gemini-api/docs/models", "supported_endpoints": [ "/v1/chat/completions", "/v1/completions", @@ -19014,6 +19032,8 @@ "output_cost_per_token": 8e-06, "output_cost_per_token_batches": 4e-06, "output_cost_per_token_priority": 1.4e-05, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -19087,6 +19107,8 @@ "output_cost_per_token": 1.6e-06, "output_cost_per_token_batches": 8e-07, "output_cost_per_token_priority": 2.8e-06, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -19160,6 +19182,8 @@ "output_cost_per_token": 4e-07, "output_cost_per_token_batches": 2e-07, "output_cost_per_token_priority": 8e-07, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -19231,6 +19255,8 @@ "output_cost_per_token": 1e-05, "output_cost_per_token_batches": 5e-06, "output_cost_per_token_priority": 1.7e-05, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -19272,6 +19298,8 @@ "mode": "chat", "output_cost_per_token": 1e-05, "output_cost_per_token_batches": 5e-06, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -19293,6 +19321,8 @@ "mode": "chat", "output_cost_per_token": 1e-05, "output_cost_per_token_batches": 5e-06, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -19581,6 +19611,8 @@ "output_cost_per_token": 6e-07, "output_cost_per_token_batches": 3e-07, "output_cost_per_token_priority": 1e-06, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -20284,6 +20316,8 @@ "output_cost_per_token": 1e-05, "output_cost_per_token_flex": 5e-06, "output_cost_per_token_priority": 2e-05, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -21206,6 +21240,8 @@ "mode": "responses", "output_cost_per_token": 0.00012, "output_cost_per_token_batches": 6e-05, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supported_endpoints": [ "/v1/batch", "/v1/responses" @@ -21612,6 +21648,8 @@ "output_cost_per_token": 2e-06, "output_cost_per_token_flex": 1e-06, "output_cost_per_token_priority": 3.6e-06, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -21693,6 +21731,8 @@ "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "mode": "chat", "output_cost_per_token": 4e-07, "output_cost_per_token_flex": 2e-07, @@ -28243,10 +28283,10 @@ "supports_tool_choice": true }, "openrouter/xiaomi/mimo-v2-flash": { - "input_cost_per_token": 9e-08, - "output_cost_per_token": 2.9e-07, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 3e-07, "cache_creation_input_token_cost": 0.0, - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 1e-08, "litellm_provider": "openrouter", "max_input_tokens": 262144, "max_output_tokens": 16384, @@ -28256,7 +28296,43 @@ "supports_tool_choice": true, "supports_reasoning": true, "supports_vision": false, - "supports_prompt_caching": false + "supports_prompt_caching": true + }, + "openrouter/xiaomi/mimo-v2.5-pro": { + "input_cost_per_token": 1e-06, + "output_cost_per_token": 3e-06, + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 2e-07, + "litellm_provider": "openrouter", + "max_input_tokens": 1048576, + "max_output_tokens": 16384, + "max_tokens": 16384, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": false, + "supports_response_schema": true, + "supports_prompt_caching": true + }, + "openrouter/xiaomi/mimo-v2.5": { + "input_cost_per_token": 4e-07, + "output_cost_per_token": 2e-06, + "cache_creation_input_token_cost": 0.0, + "cache_read_input_token_cost": 8e-08, + "litellm_provider": "openrouter", + "max_input_tokens": 1048576, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_vision": true, + "supports_audio_input": true, + "supports_video_input": true, + "supports_response_schema": true, + "supports_prompt_caching": true }, "openrouter/z-ai/glm-4.7": { "input_cost_per_token": 4e-07, @@ -28987,14 +29063,16 @@ "mode": "responses", "supports_web_search": true, "supports_reasoning": false, - "supports_function_calling": true + "supports_function_calling": true, + "supports_output_config": true }, "perplexity/anthropic/claude-opus-4-7": { "litellm_provider": "perplexity", "mode": "responses", "supports_web_search": true, "supports_reasoning": false, - "supports_function_calling": true + "supports_function_calling": true, + "supports_output_config": true }, "perplexity/anthropic/claude-opus-4-5": { "litellm_provider": "perplexity", @@ -33405,6 +33483,7 @@ "supports_tool_choice": true, "supports_vision": true, "tool_use_system_prompt_tokens": 346, + "supports_output_config": true, "supports_max_reasoning_effort": true, "supports_minimal_reasoning_effort": true }, @@ -33433,6 +33512,7 @@ "supports_tool_choice": true, "supports_vision": true, "tool_use_system_prompt_tokens": 346, + "supports_output_config": true, "supports_max_reasoning_effort": true, "supports_minimal_reasoning_effort": true }, @@ -33546,6 +33626,7 @@ "search_context_size_low": 0.01, "search_context_size_medium": 0.01 }, + "supports_output_config": true, "supports_minimal_reasoning_effort": true }, "vertex_ai/claude-sonnet-4-5@20250929": { @@ -40658,6 +40739,7 @@ "search_context_size_low": 0.01, "search_context_size_medium": 0.01 }, + "supports_output_config": true, "supports_minimal_reasoning_effort": true }, "duckduckgo/search": { diff --git a/litellm/responses/main.py b/litellm/responses/main.py index 35680889d8..e4c713f67c 100644 --- a/litellm/responses/main.py +++ b/litellm/responses/main.py @@ -54,6 +54,7 @@ if TYPE_CHECKING: else: ResponseText = str # Fallback for ResponseText import from litellm.litellm_core_utils.get_litellm_params import get_litellm_params +from litellm.llms.openai.data_residency import infer_openai_data_residency from litellm.secret_managers.main import get_secret_str from litellm.types.responses.main import * from litellm.types.router import GenericLiteLLMParams @@ -1139,6 +1140,9 @@ def responses( "aresponses": _is_async, "litellm_call_id": litellm_call_id, "model_info": kwargs.get("model_info"), + "data_residency": infer_openai_data_residency( + custom_llm_provider, litellm_params.api_base + ), "metadata": ( kwargs["litellm_metadata"] if "litellm_metadata" in kwargs @@ -2032,6 +2036,9 @@ def compact_responses( litellm_params={ **responses_api_request_params, "litellm_call_id": litellm_call_id, + "data_residency": infer_openai_data_residency( + custom_llm_provider, litellm_params.api_base + ), }, custom_llm_provider=custom_llm_provider, ) @@ -2129,6 +2136,11 @@ async def _aresponses_websocket( api_key=api_key, ) + litellm_params_dict["data_residency"] = infer_openai_data_residency( + _custom_llm_provider, + dynamic_api_base or litellm_params.api_base or litellm.api_base, + ) + litellm_logging_obj.update_from_kwargs( kwargs=kwargs, model=model, diff --git a/litellm/types/utils.py b/litellm/types/utils.py index 282baff07f..e7bce27170 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -219,6 +219,12 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False): output_cost_per_token_priority: Optional[ float ] # OpenAI priority service tier pricing + regional_processing_uplift_multiplier_eu: Optional[ + float + ] # OpenAI EU data-residency uplift multiplier applied to all token costs (e.g. 1.10 = +10%) + regional_processing_uplift_multiplier_us: Optional[ + float + ] # OpenAI US data-residency uplift multiplier applied to all token costs (e.g. 1.10 = +10%) output_cost_per_character: Optional[float] # only for vertex ai models output_cost_per_audio_token: Optional[float] output_cost_per_token_above_128k_tokens: Optional[ @@ -3601,6 +3607,20 @@ class ServiceTier(Enum): PRIORITY = "priority" +class DataResidency(Enum): + """ + OpenAI data-residency / regional-processing regions. + + Inferred from the OpenAI api_base host (eu.api.openai.com -> EU, + us.api.openai.com -> US). Used to apply the regional-processing + cost uplift (see ``regional_processing_uplift_multiplier_`` + on ModelInfo). + """ + + US = "us" + EU = "eu" + + LLMResponseTypes = Union[ ModelResponse, EmbeddingResponse, diff --git a/litellm/utils.py b/litellm/utils.py index 2ba6ef9cae..760a615664 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5942,6 +5942,12 @@ def _get_model_info_helper( # noqa: PLR0915 output_cost_per_token_priority=_model_info.get( "output_cost_per_token_priority", None ), + regional_processing_uplift_multiplier_eu=_model_info.get( + "regional_processing_uplift_multiplier_eu", None + ), + regional_processing_uplift_multiplier_us=_model_info.get( + "regional_processing_uplift_multiplier_us", None + ), output_cost_per_audio_token=_model_info.get( "output_cost_per_audio_token", None ), diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 62553e46ac..6e1c79c4e3 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -19050,6 +19050,8 @@ "output_cost_per_token": 8e-06, "output_cost_per_token_batches": 4e-06, "output_cost_per_token_priority": 1.4e-05, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -19123,6 +19125,8 @@ "output_cost_per_token": 1.6e-06, "output_cost_per_token_batches": 8e-07, "output_cost_per_token_priority": 2.8e-06, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -19196,6 +19200,8 @@ "output_cost_per_token": 4e-07, "output_cost_per_token_batches": 2e-07, "output_cost_per_token_priority": 8e-07, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -19267,6 +19273,8 @@ "output_cost_per_token": 1e-05, "output_cost_per_token_batches": 5e-06, "output_cost_per_token_priority": 1.7e-05, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -19308,6 +19316,8 @@ "mode": "chat", "output_cost_per_token": 1e-05, "output_cost_per_token_batches": 5e-06, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -19329,6 +19339,8 @@ "mode": "chat", "output_cost_per_token": 1e-05, "output_cost_per_token_batches": 5e-06, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -19617,6 +19629,8 @@ "output_cost_per_token": 6e-07, "output_cost_per_token_batches": 3e-07, "output_cost_per_token_priority": 1e-06, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supports_function_calling": true, "supports_parallel_function_calling": true, "supports_pdf_input": true, @@ -20320,6 +20334,8 @@ "output_cost_per_token": 1e-05, "output_cost_per_token_flex": 5e-06, "output_cost_per_token_priority": 2e-05, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -21242,6 +21258,8 @@ "mode": "responses", "output_cost_per_token": 0.00012, "output_cost_per_token_batches": 6e-05, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supported_endpoints": [ "/v1/batch", "/v1/responses" @@ -21648,6 +21666,8 @@ "output_cost_per_token": 2e-06, "output_cost_per_token_flex": 1e-06, "output_cost_per_token_priority": 3.6e-06, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "supported_endpoints": [ "/v1/chat/completions", "/v1/batch", @@ -21729,6 +21749,8 @@ "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, + "regional_processing_uplift_multiplier_eu": 1.10, + "regional_processing_uplift_multiplier_us": 1.10, "mode": "chat", "output_cost_per_token": 4e-07, "output_cost_per_token_flex": 2e-07, diff --git a/tests/batches_tests/test_batch_custom_pricing.py b/tests/batches_tests/test_batch_custom_pricing.py index 46870f1227..cb2ca385ff 100644 --- a/tests/batches_tests/test_batch_custom_pricing.py +++ b/tests/batches_tests/test_batch_custom_pricing.py @@ -145,6 +145,37 @@ def test_batch_cost_calculator_func_uses_custom_model_info(): ), f"Expected total cost {expected}, got {cost}" +@pytest.mark.parametrize("data_residency", ["eu", "us"]) +def test_batch_cost_calculator_applies_data_residency_uplift( + data_residency, monkeypatch +): + """batch_cost_calculator should apply the regional uplift multiplier when + data_residency is set and the model carries a configured multiplier.""" + monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True") + prev_model_cost = litellm.model_cost + litellm.model_cost = litellm.get_model_cost_map(url="") + try: + usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500) + + base_prompt, base_completion = batch_cost_calculator( + usage=usage, + model="gpt-5", + custom_llm_provider="openai", + ) + regional_prompt, regional_completion = batch_cost_calculator( + usage=usage, + model="gpt-5", + custom_llm_provider="openai", + data_residency=data_residency, + ) + + assert base_prompt > 0 and base_completion > 0 + assert regional_prompt == pytest.approx(base_prompt * 1.10, rel=1e-9) + assert regional_completion == pytest.approx(base_completion * 1.10, rel=1e-9) + finally: + litellm.model_cost = prev_model_cost + + @pytest.mark.asyncio async def test_calculate_batch_cost_and_usage_uses_custom_model_info(): """calculate_batch_cost_and_usage should thread model_info.""" diff --git a/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py index a7a2b7720d..2b47a23226 100644 --- a/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py +++ b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py @@ -1418,3 +1418,123 @@ def test_image_count_prevents_text_tokens_fallback(): f"got {prompt_cost}. text_tokens fallback may be double-charging." ) assert completion_cost == 0.0 + + +# --------------------------------------------------------------------------- +# Data-residency (OpenAI regional processing) tests +# --------------------------------------------------------------------------- + + +@pytest.fixture +def _local_model_cost_map(): + prev_env = os.environ.get("LITELLM_LOCAL_MODEL_COST_MAP") + prev_model_cost = litellm.model_cost + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + try: + yield + finally: + litellm.model_cost = prev_model_cost + if prev_env is None: + os.environ.pop("LITELLM_LOCAL_MODEL_COST_MAP", None) + else: + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = prev_env + + +@pytest.mark.parametrize("data_residency", ["eu", "us"]) +def test_data_residency_applies_uplift(data_residency, _local_model_cost_map): + """gpt-5 should apply the regional processing uplift multiplier when + data_residency is set.""" + from litellm.types.utils import Usage + + usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500) + + base = generic_cost_per_token( + model="gpt-5", + usage=usage, + custom_llm_provider="openai", + ) + regional = generic_cost_per_token( + model="gpt-5", + usage=usage, + custom_llm_provider="openai", + data_residency=data_residency, + ) + + base_total = base[0] + base[1] + regional_total = regional[0] + regional[1] + + assert base_total > 0 + assert regional_total == pytest.approx(base_total * 1.10, rel=1e-9) + assert regional[0] == pytest.approx(base[0] * 1.10, rel=1e-9) + assert regional[1] == pytest.approx(base[1] * 1.10, rel=1e-9) + + +def test_data_residency_no_uplift_for_unmarked_model(_local_model_cost_map): + """A model without a regional_processing_uplift_multiplier_* entry should + fall back to base pricing, not error.""" + from litellm.types.utils import Usage + + usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500) + + base = generic_cost_per_token( + model="gpt-3.5-turbo", + usage=usage, + custom_llm_provider="openai", + ) + with_residency = generic_cost_per_token( + model="gpt-3.5-turbo", + usage=usage, + custom_llm_provider="openai", + data_residency="eu", + ) + + assert base == with_residency + + +def test_data_residency_none_no_uplift(_local_model_cost_map): + """data_residency=None should be a no-op even for models with a multiplier.""" + from litellm.types.utils import Usage + + usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500) + + base = generic_cost_per_token( + model="gpt-5", + usage=usage, + custom_llm_provider="openai", + ) + explicit_none = generic_cost_per_token( + model="gpt-5", + usage=usage, + custom_llm_provider="openai", + data_residency=None, + ) + + assert base == explicit_none + + +def test_data_residency_composes_with_service_tier(_local_model_cost_map): + """The uplift multiplies the priority-tier cost, not the standard one.""" + from litellm.types.utils import Usage + + usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500) + + priority_base = generic_cost_per_token( + model="gpt-5", + usage=usage, + custom_llm_provider="openai", + service_tier="priority", + ) + priority_eu = generic_cost_per_token( + model="gpt-5", + usage=usage, + custom_llm_provider="openai", + service_tier="priority", + data_residency="eu", + ) + + priority_base_total = priority_base[0] + priority_base[1] + priority_eu_total = priority_eu[0] + priority_eu[1] + + assert priority_base_total > 0 + assert priority_eu_total == pytest.approx(priority_base_total * 1.10, rel=1e-9) diff --git a/tests/test_litellm/litellm_core_utils/test_get_litellm_params.py b/tests/test_litellm/litellm_core_utils/test_get_litellm_params.py index dbcb048c25..55db31efd2 100644 --- a/tests/test_litellm/litellm_core_utils/test_get_litellm_params.py +++ b/tests/test_litellm/litellm_core_utils/test_get_litellm_params.py @@ -125,3 +125,40 @@ class TestGetLitellmParamsExplicitFields: def test_no_log_from_explicit_param(self): result = get_litellm_params(no_log=True) assert result["no-log"] is True + + +class TestGetLitellmParamsDataResidency: + """Verify that data_residency is inferred from OpenAI regional api_base.""" + + def test_eu_host_resolves_to_eu(self): + result = get_litellm_params( + custom_llm_provider="openai", + api_base="https://eu.api.openai.com/v1", + ) + assert result["data_residency"] == "eu" + + def test_us_host_resolves_to_us(self): + result = get_litellm_params( + custom_llm_provider="openai", + api_base="https://us.api.openai.com/v1", + ) + assert result["data_residency"] == "us" + + def test_global_host_resolves_to_none(self): + result = get_litellm_params( + custom_llm_provider="openai", + api_base="https://api.openai.com/v1", + ) + assert result["data_residency"] is None + + def test_no_api_base_is_none(self): + result = get_litellm_params(custom_llm_provider="openai") + assert result["data_residency"] is None + + def test_non_openai_provider_does_not_resolve(self): + """Regional OpenAI host doesn't apply to other providers.""" + result = get_litellm_params( + custom_llm_provider="anthropic", + api_base="https://eu.api.openai.com/v1", + ) + assert result["data_residency"] is None diff --git a/tests/test_litellm/llms/openai/responses/test_openai_responses_data_residency.py b/tests/test_litellm/llms/openai/responses/test_openai_responses_data_residency.py new file mode 100644 index 0000000000..ac89428617 --- /dev/null +++ b/tests/test_litellm/llms/openai/responses/test_openai_responses_data_residency.py @@ -0,0 +1,134 @@ +""" +Tests that data_residency is correctly populated on the litellm logging +object's litellm_params for OpenAI Responses paths, even when +custom_llm_provider is resolved from the model string inside responses() +rather than passed explicitly. +""" + +import json +from unittest.mock import MagicMock, patch + +import litellm + + +def _make_responses_api_response_body() -> dict: + return { + "id": "resp-test", + "object": "response", + "created_at": 1234567890, + "model": "gpt-4.1", + "output": [ + { + "type": "message", + "id": "msg-test", + "status": "completed", + "role": "assistant", + "content": [ + { + "type": "output_text", + "text": "ok", + "annotations": [], + } + ], + } + ], + "status": "completed", + "usage": { + "input_tokens": 1, + "output_tokens": 1, + "total_tokens": 2, + }, + } + + +def _make_mock_http_client(response_body: dict) -> MagicMock: + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.headers = {"content-type": "application/json"} + mock_response.json.return_value = response_body + mock_response.text = json.dumps(response_body) + mock_client.post.return_value = mock_response + return mock_client + + +def _capture_logging_obj(): + captured = {} + + real_init = litellm.Logging.__init__ + + def init_spy(self, *args, **kwargs): + real_init(self, *args, **kwargs) + captured["logging_obj"] = self + + return captured, init_spy + + +def test_responses_eu_api_base_sets_data_residency(): + """When api_base is a regional OpenAI host and custom_llm_provider is + inferred from the model (not passed explicitly), data_residency must end + up on the logging object's litellm_params so the cost calculator can apply + the regional uplift.""" + mock_client = _make_mock_http_client(_make_responses_api_response_body()) + captured, init_spy = _capture_logging_obj() + + with ( + patch( + "litellm.llms.custom_httpx.llm_http_handler._get_httpx_client", + return_value=mock_client, + ), + patch.object(litellm.Logging, "__init__", init_spy), + ): + litellm.responses( + model="gpt-4.1", + input="hi", + api_base="https://eu.api.openai.com/v1", + api_key="test-key", + ) + + logging_obj = captured["logging_obj"] + assert logging_obj.litellm_params.get("data_residency") == "eu" + + +def test_responses_us_api_base_sets_data_residency(): + mock_client = _make_mock_http_client(_make_responses_api_response_body()) + captured, init_spy = _capture_logging_obj() + + with ( + patch( + "litellm.llms.custom_httpx.llm_http_handler._get_httpx_client", + return_value=mock_client, + ), + patch.object(litellm.Logging, "__init__", init_spy), + ): + litellm.responses( + model="gpt-4.1", + input="hi", + api_base="https://us.api.openai.com/v1", + api_key="test-key", + ) + + logging_obj = captured["logging_obj"] + assert logging_obj.litellm_params.get("data_residency") == "us" + + +def test_responses_global_api_base_leaves_data_residency_none(): + mock_client = _make_mock_http_client(_make_responses_api_response_body()) + captured, init_spy = _capture_logging_obj() + + with ( + patch( + "litellm.llms.custom_httpx.llm_http_handler._get_httpx_client", + return_value=mock_client, + ), + patch.object(litellm.Logging, "__init__", init_spy), + ): + litellm.responses( + model="gpt-4.1", + input="hi", + api_base="https://api.openai.com/v1", + api_key="test-key", + ) + + logging_obj = captured["logging_obj"] + assert logging_obj.litellm_params.get("data_residency") is None diff --git a/tests/test_litellm/llms/openai/test_data_residency.py b/tests/test_litellm/llms/openai/test_data_residency.py new file mode 100644 index 0000000000..ecb5739133 --- /dev/null +++ b/tests/test_litellm/llms/openai/test_data_residency.py @@ -0,0 +1,34 @@ +"""Tests for the OpenAI data-residency inference helper.""" + +import pytest + +from litellm.llms.openai.data_residency import infer_openai_data_residency + + +@pytest.mark.parametrize( + "api_base, expected", + [ + ("https://eu.api.openai.com/v1", "eu"), + ("https://eu.api.openai.com", "eu"), + ("https://us.api.openai.com/v1", "us"), + ("https://us.api.openai.com", "us"), + ("https://EU.api.openai.com/v1", "eu"), + ("https://api.openai.com/v1", None), + ("https://api.openai.com", None), + ("https://example.com/v1", None), + ("https://my-azure-endpoint.openai.azure.com/openai/deployments/foo", None), + ("", None), + (None, None), + ("not a url", None), + ], +) +def test_infer_openai_data_residency(api_base, expected): + assert infer_openai_data_residency("openai", api_base) == expected + + +@pytest.mark.parametrize("custom_llm_provider", [None, "anthropic", "azure", "bedrock"]) +def test_infer_openai_data_residency_non_openai_provider(custom_llm_provider): + assert ( + infer_openai_data_residency(custom_llm_provider, "https://eu.api.openai.com/v1") + is None + ) diff --git a/tests/test_litellm/test_utils.py b/tests/test_litellm/test_utils.py index 40d9cf3231..e646c75eda 100644 --- a/tests/test_litellm/test_utils.py +++ b/tests/test_litellm/test_utils.py @@ -737,6 +737,8 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): "output_cost_per_token_priority": {"type": "number"}, "output_cost_per_token_above_200k_tokens_priority": {"type": "number"}, "output_cost_per_token_above_272k_tokens_priority": {"type": "number"}, + "regional_processing_uplift_multiplier_eu": {"type": "number"}, + "regional_processing_uplift_multiplier_us": {"type": "number"}, "input_cost_per_pixel": {"type": "number"}, "input_cost_per_query": {"type": "number"}, "input_cost_per_request": {"type": "number"},