feat(openai): apply regional-processing cost uplift for EU/US data residency (#28626)

* feat(openai): apply regional-processing cost uplift for EU/US data residency OpenAI charges a 10% uplift on the latest GPT models when requests are served from a regionalized hostname (eu./us.api.openai.com). Infer the region from `api_base`, expose it on `kwargs["litellm_params"]["data_residency"]`, and multiply the computed cost by a per-model `regional_processing_uplift_multiplier_<region>` field. https://claude.ai/code/session_012ebH44s7ohYxjoix5CXzTW * test: allow regional_processing_uplift_multiplier_{eu,us} in model_prices schema * fix(cost): tighten data_residency inference and restore model_cost in tests - Only infer OpenAI data_residency when custom_llm_provider == "openai"; drop the implicit None fallback so non-OpenAI callers can't accidentally pick up a regional tag from a stray OpenAI hostname. - _local_model_cost_map fixture now snapshots and restores litellm.model_cost and LITELLM_LOCAL_MODEL_COST_MAP so tests don't leak state across the session. * refactor(openai): move data_residency helper under llms/openai * fix: thread data_residency through realtime stream cost calculation Co-authored-by: Yassin Kortam <yassin@berri.ai> * fix(cost): thread data_residency through batch_cost_calculator Apply the OpenAI regional-processing uplift multiplier to retrieve_batch cost paths so Batch API requests served via eu./us.api.openai.com are priced at the same uplifted token rates as completions/transcriptions. * refactor(openai): encapsulate provider check inside infer_openai_data_residency Move the custom_llm_provider == "openai" guard from get_litellm_params into the helper itself so the core utility no longer carries provider-specific dispatch logic. Callers pass through the provider unconditionally; the helper returns None for any non-OpenAI provider. * fix(responses): thread data_residency through Responses logging params The Responses API paths build their logging litellm_params dict after provider resolution but did not include data_residency, so cost calc saw None even when the effective api_base was a regional OpenAI host. --------- Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Cursor Agent <cursoragent@cursor.com> Co-authored-by: Yassin Kortam <yassin@berri.ai>
2026-05-25 20:36:14 -07:00 · 2026-05-25 20:36:14 -07:00 · c23b19f09c
commit c23b19f09c
parent f38c16c71e
17 changed files with 652 additions and 13 deletions
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -24,6 +24,7 @@ from litellm.litellm_core_utils.llm_cost_calc.usage_object_transformation import
 from litellm.litellm_core_utils.llm_cost_calc.utils import (
    CostCalculatorUtils,
    _generic_cost_per_character,
+    _get_regional_uplift_multiplier,
    _get_service_tier_cost_key,
    _parse_prompt_tokens_details,
    calculate_cost_component,
@ -312,6 +313,10 @@ def cost_per_token(  # noqa: PLR0915
    audio_transcription_file_duration: float = 0.0,  # for audio transcription calls - the file time in seconds
    ### SERVICE TIER ###
    service_tier: Optional[str] = None,  # for OpenAI service tier pricing
+    ### DATA RESIDENCY ###
+    data_residency: Optional[
+        str
+    ] = None,  # for OpenAI regional-processing uplift (e.g. "eu", "us")
    response: Optional[Any] = None,
    ### REQUEST MODEL ###
    request_model: Optional[str] = None,  # original request model for router detection
@ -493,6 +498,7 @@ def cost_per_token(  # noqa: PLR0915
                usage=usage_block,
                custom_llm_provider=custom_llm_provider,
                service_tier=service_tier,
+                data_residency=data_residency,
            )

        return prompt_cost, completion_cost
@ -521,7 +527,10 @@ def cost_per_token(  # noqa: PLR0915
        or call_type == CallTypes.retrieve_batch
    ):
        return batch_cost_calculator(
-            usage=usage_block, model=model, custom_llm_provider=custom_llm_provider
+            usage=usage_block,
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            data_residency=data_residency,
        )
    elif call_type == "atranscription" or call_type == "transcription":
        if _transcription_usage_has_token_details(usage_block):
@ -529,6 +538,7 @@ def cost_per_token(  # noqa: PLR0915
                model=model_without_prefix,
                usage=usage_block,
                service_tier=service_tier,
+                data_residency=data_residency,
            )

        return openai_cost_per_second(
@ -579,7 +589,10 @@ def cost_per_token(  # noqa: PLR0915
        )
    elif custom_llm_provider == "openai":
        return openai_cost_per_token(
-            model=model, usage=usage_block, service_tier=service_tier
+            model=model,
+            usage=usage_block,
+            service_tier=service_tier,
+            data_residency=data_residency,
        )
    elif custom_llm_provider == "databricks":
        return databricks_cost_per_token(model=model, usage=usage_block)
@ -631,6 +644,7 @@ def cost_per_token(  # noqa: PLR0915
                usage=usage_block,
                custom_llm_provider=custom_llm_provider,
                service_tier=service_tier,
+                data_residency=data_residency,
            )

        if (
@ -1117,6 +1131,10 @@ def completion_cost(  # noqa: PLR0915
    litellm_logging_obj: Optional[LitellmLoggingObject] = None,
    ### SERVICE TIER ###
    service_tier: Optional[str] = None,  # for OpenAI service tier pricing
+    ### DATA RESIDENCY ###
+    data_residency: Optional[
+        str
+    ] = None,  # for OpenAI regional-processing uplift (e.g. "eu", "us")
 ) -> float:
    """
    Calculate the cost of a given completion call fot GPT-3.5-turbo, llama2, any litellm supported llm.
@ -1516,6 +1534,7 @@ def completion_cost(  # noqa: PLR0915
                        combined_usage_object=cost_per_token_usage_object,
                        custom_llm_provider=custom_llm_provider,
                        litellm_model_name=model,
+                        data_residency=data_residency,
                    )
                elif call_type == _MCP_CALL_TYPE:
                    from litellm.proxy._experimental.mcp_server.cost_calculator import (
@ -1600,6 +1619,7 @@ def completion_cost(  # noqa: PLR0915
                    audio_transcription_file_duration=audio_transcription_file_duration,
                    rerank_billed_units=rerank_billed_units,
                    service_tier=service_tier,
+                    data_residency=data_residency,
                    response=completion_response,
                    request_model=request_model_for_cost,
                )
@ -1811,6 +1831,10 @@ def response_cost_calculator(
    litellm_logging_obj: Optional[LitellmLoggingObject] = None,
    ### SERVICE TIER ###
    service_tier: Optional[str] = None,  # for OpenAI service tier pricing
+    ### DATA RESIDENCY ###
+    data_residency: Optional[
+        str
+    ] = None,  # for OpenAI regional-processing uplift (e.g. "eu", "us")
 ) -> float:
    """
    Returns
@ -1844,6 +1868,7 @@ def response_cost_calculator(
                router_model_id=router_model_id,
                litellm_logging_obj=litellm_logging_obj,
                service_tier=service_tier,
+                data_residency=data_residency,
            )
        return response_cost
    except Exception as e:
@ -2202,6 +2227,7 @@ def batch_cost_calculator(
    model: str,
    custom_llm_provider: Optional[str] = None,
    model_info: Optional[ModelInfo] = None,
+    data_residency: Optional[str] = None,
 ) -> Tuple[float, float]:
    """
    Calculate the cost of a batch job.
@ -2286,6 +2312,11 @@ def batch_cost_calculator(
            usage.completion_tokens * (output_cost_per_token) / 2
        )  # batch cost is usually half of the regular token cost

+    uplift = _get_regional_uplift_multiplier(model_info, data_residency)
+    if uplift != 1.0:
+        total_prompt_cost *= uplift
+        total_completion_cost *= uplift
+
    return total_prompt_cost, total_completion_cost


@ -2431,6 +2462,7 @@ def handle_realtime_stream_cost_calculation(
    combined_usage_object: Usage,
    custom_llm_provider: str,
    litellm_model_name: str,
+    data_residency: Optional[str] = None,
 ) -> float:
    """
    Handles the cost calculation for realtime stream responses.
@ -2461,6 +2493,7 @@ def handle_realtime_stream_cost_calculation(
                model=model_name,
                usage=combined_usage_object,
                custom_llm_provider=custom_llm_provider,
+                data_residency=data_residency,
            )
        except Exception:
            continue
--- a/litellm/litellm_core_utils/get_litellm_params.py
+++ b/litellm/litellm_core_utils/get_litellm_params.py
@ -1,5 +1,7 @@
 from typing import Optional

+from litellm.llms.openai.data_residency import infer_openai_data_residency
+
 # Pre-define optional kwargs keys as frozenset for O(1) lookups
 # These are extracted from kwargs only if present, avoiding unnecessary .get() calls
 _OPTIONAL_KWARGS_KEYS = frozenset(
@ -103,6 +105,10 @@ def get_litellm_params(
    if litellm_trace_id is None:
        litellm_trace_id = _meta.get("trace_id") or _meta.get("session_id")

+    data_residency: Optional[str] = infer_openai_data_residency(
+        custom_llm_provider, api_base
+    )
+
    # Build base dict with explicit parameters (always included)
    litellm_params = {
        "acompletion": acompletion,
@ -112,6 +118,7 @@ def get_litellm_params(
        "verbose": verbose,
        "custom_llm_provider": custom_llm_provider,
        "api_base": api_base,
+        "data_residency": data_residency,
        "litellm_call_id": litellm_call_id,
        "model_alias_map": model_alias_map,
        "completion_call_id": completion_call_id,
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -1546,6 +1546,11 @@ class Logging(LiteLLMLoggingBaseClass):
                    if self.optional_params
                    else None
                ),
+                "data_residency": (
+                    self.litellm_params.get("data_residency")
+                    if hasattr(self, "litellm_params") and self.litellm_params
+                    else None
+                ),
            }
        except Exception as e:  # error creating kwargs for cost calculation
            debug_info = StandardLoggingModelCostFailureDebugInformation(
--- a/litellm/litellm_core_utils/llm_cost_calc/utils.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@ -9,6 +9,7 @@ from litellm.types.utils import (
    CacheCreationTokenDetails,
    CallTypes,
    CompletionTokensDetailsWrapper,
+    DataResidency,
    ImageResponse,
    ModelInfo,
    PassthroughCallTypes,
@ -617,11 +618,46 @@ def _calculate_input_cost(
    return prompt_cost


+def _get_regional_uplift_multiplier(
+    model_info: ModelInfo, data_residency: Optional[str]
+) -> float:
+    """
+    Resolve the per-model regional-processing uplift multiplier for a given
+    data-residency region.
+
+    OpenAI applies a flat percentage uplift (e.g. +10%) on all token costs for
+    requests served from a regionalized hostname (eu./us.api.openai.com). The
+    multiplier is stored on the model entry as
+    ``regional_processing_uplift_multiplier_<region>`` (e.g. 1.10).
+
+    Returns 1.0 (no uplift) when ``data_residency`` is ``None`` or when the
+    model has no multiplier configured for the given region.
+    """
+    if data_residency is None:
+        return 1.0
+    residency = data_residency.lower()
+    if residency not in {r.value for r in DataResidency}:
+        return 1.0
+    multiplier = model_info.get(f"regional_processing_uplift_multiplier_{residency}")
+    if multiplier is None:
+        return 1.0
+    try:
+        return float(cast(float, multiplier))
+    except (TypeError, ValueError):
+        verbose_logger.exception(
+            "Invalid regional_processing_uplift_multiplier_%s for model; "
+            "defaulting to 1.0",
+            residency,
+        )
+        return 1.0
+
+
 def generic_cost_per_token(  # noqa: PLR0915
    model: str,
    usage: Usage,
    custom_llm_provider: str,
    service_tier: Optional[str] = None,
+    data_residency: Optional[str] = None,
 ) -> Tuple[float, float]:
    """
    Calculates the cost per token for a given model, prompt tokens, and completion tokens.
@ -631,6 +667,8 @@ def generic_cost_per_token(  # noqa: PLR0915
    Input:
        - model: str, the model name without provider prefix
        - usage: LiteLLM Usage block, containing anthropic caching information
+        - data_residency: optional OpenAI data-residency region (e.g. "eu", "us"),
+          used to apply the per-model regional-processing uplift multiplier.

    Returns:
        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
@ -781,6 +819,14 @@ def generic_cost_per_token(  # noqa: PLR0915
        )
        completion_cost += float(image_tokens) * _output_cost_per_image_token

+    ## REGIONAL DATA-RESIDENCY UPLIFT
+    # Applied as a flat multiplier across all token costs for the request
+    # when the upstream is a regionalized OpenAI host (eu./us.api.openai.com).
+    uplift = _get_regional_uplift_multiplier(model_info, data_residency)
+    if uplift != 1.0:
+        prompt_cost *= uplift
+        completion_cost *= uplift
+
    return prompt_cost, completion_cost


--- a/litellm/llms/openai/cost_calculation.py
+++ b/litellm/llms/openai/cost_calculation.py
@ -19,7 +19,10 @@ def cost_router(call_type: CallTypes) -> Literal["cost_per_token", "cost_per_sec


 def cost_per_token(
-    model: str, usage: Usage, service_tier: Optional[str] = None
+    model: str,
+    usage: Usage,
+    service_tier: Optional[str] = None,
+    data_residency: Optional[str] = None,
 ) -> Tuple[float, float]:
    """
    Calculates the cost per token for a given model, prompt tokens, and completion tokens.
@ -27,6 +30,9 @@ def cost_per_token(
    Input:
        - model: str, the model name without provider prefix
        - usage: LiteLLM Usage block, containing anthropic caching information
+        - data_residency: optional OpenAI data-residency region (e.g. "eu", "us"),
+          inferred from api_base. Applies the model's regional-processing
+          uplift multiplier when set.

    Returns:
        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
@ -37,6 +43,7 @@ def cost_per_token(
        usage=usage,
        custom_llm_provider="openai",
        service_tier=service_tier,
+        data_residency=data_residency,
    )
    # ### Non-cached text tokens
    # non_cached_text_tokens = usage.prompt_tokens
--- a/litellm/llms/openai/data_residency.py
+++ b/litellm/llms/openai/data_residency.py
@ -0,0 +1,41 @@
+"""
+Helpers for resolving OpenAI data-residency (regional processing) from an
+api_base URL.
+
+OpenAI enforces hostname-per-region for projects with geography restrictions
+enabled and rejects requests sent to the wrong host, so the api_base hostname
+is the authoritative signal of which region a request was processed in.
+"""
+
+from typing import Dict, Optional
+from urllib.parse import urlparse
+
+# Mapping of OpenAI regional hostnames to the corresponding data-residency
+# value used by the cost calculator. See
+# https://developers.openai.com/api/docs/pricing for the regional-processing
+# uplift these hostnames trigger.
+_OPENAI_REGIONAL_HOSTS: Dict[str, str] = {
+    "eu.api.openai.com": "eu",
+    "us.api.openai.com": "us",
+}
+
+
+def infer_openai_data_residency(
+    custom_llm_provider: Optional[str], api_base: Optional[str]
+) -> Optional[str]:
+    """
+    Derive the OpenAI data-residency region from an api_base URL.
+
+    Returns ``"eu"`` for the EU regional host, ``"us"`` for the US regional
+    host, and ``None`` for the default global host, any non-OpenAI provider,
+    or any non-OpenAI URL.
+    """
+    if custom_llm_provider != "openai" or not api_base:
+        return None
+    try:
+        host = urlparse(api_base).hostname
+    except (TypeError, ValueError):
+        return None
+    if not host:
+        return None
+    return _OPENAI_REGIONAL_HOSTS.get(host.lower())
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -1011,6 +1011,7 @@
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
        "supports_native_structured_output": true,
+        "supports_output_config": true,
        "supports_max_reasoning_effort": true,
        "supports_minimal_reasoning_effort": true
    },
@ -1041,6 +1042,7 @@
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
        "supports_native_structured_output": true,
+        "supports_output_config": true,
        "supports_max_reasoning_effort": true,
        "supports_minimal_reasoning_effort": true
    },
@ -1071,6 +1073,7 @@
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
        "supports_native_structured_output": true,
+        "supports_output_config": true,
        "supports_max_reasoning_effort": true,
        "supports_minimal_reasoning_effort": true
    },
@ -1100,6 +1103,7 @@
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
        "supports_native_structured_output": true,
+        "supports_output_config": true,
        "supports_max_reasoning_effort": true,
        "supports_minimal_reasoning_effort": true
    },
@ -1129,6 +1133,7 @@
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
        "supports_native_structured_output": true,
+        "supports_output_config": true,
        "supports_max_reasoning_effort": true,
        "supports_minimal_reasoning_effort": true
    },
@ -1328,6 +1333,7 @@
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
        "supports_native_structured_output": true,
+        "supports_output_config": true,
        "supports_minimal_reasoning_effort": true
    },
    "global.anthropic.claude-sonnet-4-6": {
@ -1358,6 +1364,7 @@
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
        "supports_native_structured_output": true,
+        "supports_output_config": true,
        "supports_minimal_reasoning_effort": true
    },
    "us.anthropic.claude-sonnet-4-6": {
@ -1388,6 +1395,7 @@
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
        "supports_native_structured_output": true,
+        "supports_output_config": true,
        "supports_minimal_reasoning_effort": true
    },
    "eu.anthropic.claude-sonnet-4-6": {
@ -1417,6 +1425,7 @@
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
        "supports_native_structured_output": true,
+        "supports_output_config": true,
        "supports_minimal_reasoning_effort": true
    },
    "au.anthropic.claude-sonnet-4-6": {
@ -1446,6 +1455,7 @@
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
        "supports_native_structured_output": true,
+        "supports_output_config": true,
        "supports_minimal_reasoning_effort": true
    },
    "jp.anthropic.claude-sonnet-4-6": {
@ -1475,6 +1485,7 @@
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
        "supports_native_structured_output": true,
+        "supports_output_config": true,
        "supports_minimal_reasoning_effort": true
    },
    "anthropic.claude-sonnet-4-20250514-v1:0": {
@ -1996,6 +2007,7 @@
        "supports_tool_choice": true,
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 159,
+        "supports_output_config": true,
        "supports_max_reasoning_effort": true,
        "supports_minimal_reasoning_effort": true
    },
@ -2093,6 +2105,7 @@
        "supports_tool_choice": true,
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
+        "supports_output_config": true,
        "supports_minimal_reasoning_effort": true
    },
    "azure/computer-use-preview": {
@ -9654,6 +9667,7 @@
        "supports_tool_choice": true,
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
+        "supports_output_config": true,
        "supports_minimal_reasoning_effort": true
    },
    "claude-sonnet-4-5-20250929-v1:0": {
@ -9851,6 +9865,7 @@
            "us": 1.1,
            "fast": 6.0
        },
+        "supports_output_config": true,
        "supports_max_reasoning_effort": true,
        "supports_minimal_reasoning_effort": true
    },
@ -9886,7 +9901,8 @@
            "fast": 6.0
        },
        "supports_max_reasoning_effort": true,
-        "supports_minimal_reasoning_effort": true
+        "supports_minimal_reasoning_effort": true,
+        "supports_output_config": true
    },
    "claude-opus-4-7": {
        "cache_creation_input_token_cost": 6.25e-06,
@ -9921,7 +9937,8 @@
            "us": 1.1,
            "fast": 6.0
        },
-        "supports_minimal_reasoning_effort": true
+        "supports_minimal_reasoning_effort": true,
+        "supports_output_config": true
    },
    "claude-opus-4-7-20260416": {
        "cache_creation_input_token_cost": 6.25e-06,
@ -9956,7 +9973,8 @@
            "us": 1.1,
            "fast": 6.0
        },
-        "supports_minimal_reasoning_effort": true
+        "supports_minimal_reasoning_effort": true,
+        "supports_output_config": true
    },
    "claude-sonnet-4-20250514": {
        "deprecation_date": "2026-05-14",
@ -14958,7 +14976,7 @@
        "mode": "chat",
        "output_cost_per_reasoning_token": 1.5e-06,
        "output_cost_per_token": 1.5e-06,
-        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#gemini-models",
+        "source": "https://ai.google.dev/gemini-api/docs/models",
        "supported_endpoints": [
            "/v1/chat/completions",
            "/v1/completions",
@ -19014,6 +19032,8 @@
        "output_cost_per_token": 8e-06,
        "output_cost_per_token_batches": 4e-06,
        "output_cost_per_token_priority": 1.4e-05,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supported_endpoints": [
            "/v1/chat/completions",
            "/v1/batch",
@ -19087,6 +19107,8 @@
        "output_cost_per_token": 1.6e-06,
        "output_cost_per_token_batches": 8e-07,
        "output_cost_per_token_priority": 2.8e-06,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supported_endpoints": [
            "/v1/chat/completions",
            "/v1/batch",
@ -19160,6 +19182,8 @@
        "output_cost_per_token": 4e-07,
        "output_cost_per_token_batches": 2e-07,
        "output_cost_per_token_priority": 8e-07,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supported_endpoints": [
            "/v1/chat/completions",
            "/v1/batch",
@ -19231,6 +19255,8 @@
        "output_cost_per_token": 1e-05,
        "output_cost_per_token_batches": 5e-06,
        "output_cost_per_token_priority": 1.7e-05,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_pdf_input": true,
@ -19272,6 +19298,8 @@
        "mode": "chat",
        "output_cost_per_token": 1e-05,
        "output_cost_per_token_batches": 5e-06,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_pdf_input": true,
@ -19293,6 +19321,8 @@
        "mode": "chat",
        "output_cost_per_token": 1e-05,
        "output_cost_per_token_batches": 5e-06,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_pdf_input": true,
@ -19581,6 +19611,8 @@
        "output_cost_per_token": 6e-07,
        "output_cost_per_token_batches": 3e-07,
        "output_cost_per_token_priority": 1e-06,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_pdf_input": true,
@ -20284,6 +20316,8 @@
        "output_cost_per_token": 1e-05,
        "output_cost_per_token_flex": 5e-06,
        "output_cost_per_token_priority": 2e-05,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supported_endpoints": [
            "/v1/chat/completions",
            "/v1/batch",
@ -21206,6 +21240,8 @@
        "mode": "responses",
        "output_cost_per_token": 0.00012,
        "output_cost_per_token_batches": 6e-05,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supported_endpoints": [
            "/v1/batch",
            "/v1/responses"
@ -21612,6 +21648,8 @@
        "output_cost_per_token": 2e-06,
        "output_cost_per_token_flex": 1e-06,
        "output_cost_per_token_priority": 3.6e-06,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supported_endpoints": [
            "/v1/chat/completions",
            "/v1/batch",
@ -21693,6 +21731,8 @@
        "max_input_tokens": 272000,
        "max_output_tokens": 128000,
        "max_tokens": 128000,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "mode": "chat",
        "output_cost_per_token": 4e-07,
        "output_cost_per_token_flex": 2e-07,
@ -28243,10 +28283,10 @@
        "supports_tool_choice": true
    },
    "openrouter/xiaomi/mimo-v2-flash": {
-        "input_cost_per_token": 9e-08,
-        "output_cost_per_token": 2.9e-07,
+        "input_cost_per_token": 1e-07,
+        "output_cost_per_token": 3e-07,
        "cache_creation_input_token_cost": 0.0,
-        "cache_read_input_token_cost": 0.0,
+        "cache_read_input_token_cost": 1e-08,
        "litellm_provider": "openrouter",
        "max_input_tokens": 262144,
        "max_output_tokens": 16384,
@ -28256,7 +28296,43 @@
        "supports_tool_choice": true,
        "supports_reasoning": true,
        "supports_vision": false,
-        "supports_prompt_caching": false
+        "supports_prompt_caching": true
+    },
+    "openrouter/xiaomi/mimo-v2.5-pro": {
+        "input_cost_per_token": 1e-06,
+        "output_cost_per_token": 3e-06,
+        "cache_creation_input_token_cost": 0.0,
+        "cache_read_input_token_cost": 2e-07,
+        "litellm_provider": "openrouter",
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 16384,
+        "max_tokens": 16384,
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": true,
+        "supports_reasoning": true,
+        "supports_vision": false,
+        "supports_response_schema": true,
+        "supports_prompt_caching": true
+    },
+    "openrouter/xiaomi/mimo-v2.5": {
+        "input_cost_per_token": 4e-07,
+        "output_cost_per_token": 2e-06,
+        "cache_creation_input_token_cost": 0.0,
+        "cache_read_input_token_cost": 8e-08,
+        "litellm_provider": "openrouter",
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 131072,
+        "max_tokens": 131072,
+        "mode": "chat",
+        "supports_function_calling": true,
+        "supports_tool_choice": true,
+        "supports_reasoning": true,
+        "supports_vision": true,
+        "supports_audio_input": true,
+        "supports_video_input": true,
+        "supports_response_schema": true,
+        "supports_prompt_caching": true
    },
    "openrouter/z-ai/glm-4.7": {
        "input_cost_per_token": 4e-07,
@ -28987,14 +29063,16 @@
        "mode": "responses",
        "supports_web_search": true,
        "supports_reasoning": false,
-        "supports_function_calling": true
+        "supports_function_calling": true,
+        "supports_output_config": true
    },
    "perplexity/anthropic/claude-opus-4-7": {
        "litellm_provider": "perplexity",
        "mode": "responses",
        "supports_web_search": true,
        "supports_reasoning": false,
-        "supports_function_calling": true
+        "supports_function_calling": true,
+        "supports_output_config": true
    },
    "perplexity/anthropic/claude-opus-4-5": {
        "litellm_provider": "perplexity",
@ -33405,6 +33483,7 @@
        "supports_tool_choice": true,
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
+        "supports_output_config": true,
        "supports_max_reasoning_effort": true,
        "supports_minimal_reasoning_effort": true
    },
@ -33433,6 +33512,7 @@
        "supports_tool_choice": true,
        "supports_vision": true,
        "tool_use_system_prompt_tokens": 346,
+        "supports_output_config": true,
        "supports_max_reasoning_effort": true,
        "supports_minimal_reasoning_effort": true
    },
@ -33546,6 +33626,7 @@
            "search_context_size_low": 0.01,
            "search_context_size_medium": 0.01
        },
+        "supports_output_config": true,
        "supports_minimal_reasoning_effort": true
    },
    "vertex_ai/claude-sonnet-4-5@20250929": {
@ -40658,6 +40739,7 @@
            "search_context_size_low": 0.01,
            "search_context_size_medium": 0.01
        },
+        "supports_output_config": true,
        "supports_minimal_reasoning_effort": true
    },
    "duckduckgo/search": {
--- a/litellm/responses/main.py
+++ b/litellm/responses/main.py
@ -54,6 +54,7 @@ if TYPE_CHECKING:
 else:
    ResponseText = str  # Fallback for ResponseText import
 from litellm.litellm_core_utils.get_litellm_params import get_litellm_params
+from litellm.llms.openai.data_residency import infer_openai_data_residency
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.responses.main import *
 from litellm.types.router import GenericLiteLLMParams
@ -1139,6 +1140,9 @@ def responses(
                "aresponses": _is_async,
                "litellm_call_id": litellm_call_id,
                "model_info": kwargs.get("model_info"),
+                "data_residency": infer_openai_data_residency(
+                    custom_llm_provider, litellm_params.api_base
+                ),
                "metadata": (
                    kwargs["litellm_metadata"]
                    if "litellm_metadata" in kwargs
@ -2032,6 +2036,9 @@ def compact_responses(
            litellm_params={
                **responses_api_request_params,
                "litellm_call_id": litellm_call_id,
+                "data_residency": infer_openai_data_residency(
+                    custom_llm_provider, litellm_params.api_base
+                ),
            },
            custom_llm_provider=custom_llm_provider,
        )
@ -2129,6 +2136,11 @@ async def _aresponses_websocket(
        api_key=api_key,
    )

+    litellm_params_dict["data_residency"] = infer_openai_data_residency(
+        _custom_llm_provider,
+        dynamic_api_base or litellm_params.api_base or litellm.api_base,
+    )
+
    litellm_logging_obj.update_from_kwargs(
        kwargs=kwargs,
        model=model,
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -219,6 +219,12 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
    output_cost_per_token_priority: Optional[
        float
    ]  # OpenAI priority service tier pricing
+    regional_processing_uplift_multiplier_eu: Optional[
+        float
+    ]  # OpenAI EU data-residency uplift multiplier applied to all token costs (e.g. 1.10 = +10%)
+    regional_processing_uplift_multiplier_us: Optional[
+        float
+    ]  # OpenAI US data-residency uplift multiplier applied to all token costs (e.g. 1.10 = +10%)
    output_cost_per_character: Optional[float]  # only for vertex ai models
    output_cost_per_audio_token: Optional[float]
    output_cost_per_token_above_128k_tokens: Optional[
@ -3601,6 +3607,20 @@ class ServiceTier(Enum):
    PRIORITY = "priority"


+class DataResidency(Enum):
+    """
+    OpenAI data-residency / regional-processing regions.
+
+    Inferred from the OpenAI api_base host (eu.api.openai.com -> EU,
+    us.api.openai.com -> US). Used to apply the regional-processing
+    cost uplift (see ``regional_processing_uplift_multiplier_<region>``
+    on ModelInfo).
+    """
+
+    US = "us"
+    EU = "eu"
+
+
 LLMResponseTypes = Union[
    ModelResponse,
    EmbeddingResponse,
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -5942,6 +5942,12 @@ def _get_model_info_helper(  # noqa: PLR0915
                output_cost_per_token_priority=_model_info.get(
                    "output_cost_per_token_priority", None
                ),
+                regional_processing_uplift_multiplier_eu=_model_info.get(
+                    "regional_processing_uplift_multiplier_eu", None
+                ),
+                regional_processing_uplift_multiplier_us=_model_info.get(
+                    "regional_processing_uplift_multiplier_us", None
+                ),
                output_cost_per_audio_token=_model_info.get(
                    "output_cost_per_audio_token", None
                ),
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -19050,6 +19050,8 @@
        "output_cost_per_token": 8e-06,
        "output_cost_per_token_batches": 4e-06,
        "output_cost_per_token_priority": 1.4e-05,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supported_endpoints": [
            "/v1/chat/completions",
            "/v1/batch",
@ -19123,6 +19125,8 @@
        "output_cost_per_token": 1.6e-06,
        "output_cost_per_token_batches": 8e-07,
        "output_cost_per_token_priority": 2.8e-06,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supported_endpoints": [
            "/v1/chat/completions",
            "/v1/batch",
@ -19196,6 +19200,8 @@
        "output_cost_per_token": 4e-07,
        "output_cost_per_token_batches": 2e-07,
        "output_cost_per_token_priority": 8e-07,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supported_endpoints": [
            "/v1/chat/completions",
            "/v1/batch",
@ -19267,6 +19273,8 @@
        "output_cost_per_token": 1e-05,
        "output_cost_per_token_batches": 5e-06,
        "output_cost_per_token_priority": 1.7e-05,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_pdf_input": true,
@ -19308,6 +19316,8 @@
        "mode": "chat",
        "output_cost_per_token": 1e-05,
        "output_cost_per_token_batches": 5e-06,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_pdf_input": true,
@ -19329,6 +19339,8 @@
        "mode": "chat",
        "output_cost_per_token": 1e-05,
        "output_cost_per_token_batches": 5e-06,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_pdf_input": true,
@ -19617,6 +19629,8 @@
        "output_cost_per_token": 6e-07,
        "output_cost_per_token_batches": 3e-07,
        "output_cost_per_token_priority": 1e-06,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supports_function_calling": true,
        "supports_parallel_function_calling": true,
        "supports_pdf_input": true,
@ -20320,6 +20334,8 @@
        "output_cost_per_token": 1e-05,
        "output_cost_per_token_flex": 5e-06,
        "output_cost_per_token_priority": 2e-05,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supported_endpoints": [
            "/v1/chat/completions",
            "/v1/batch",
@ -21242,6 +21258,8 @@
        "mode": "responses",
        "output_cost_per_token": 0.00012,
        "output_cost_per_token_batches": 6e-05,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supported_endpoints": [
            "/v1/batch",
            "/v1/responses"
@ -21648,6 +21666,8 @@
        "output_cost_per_token": 2e-06,
        "output_cost_per_token_flex": 1e-06,
        "output_cost_per_token_priority": 3.6e-06,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "supported_endpoints": [
            "/v1/chat/completions",
            "/v1/batch",
@ -21729,6 +21749,8 @@
        "max_input_tokens": 272000,
        "max_output_tokens": 128000,
        "max_tokens": 128000,
+        "regional_processing_uplift_multiplier_eu": 1.10,
+        "regional_processing_uplift_multiplier_us": 1.10,
        "mode": "chat",
        "output_cost_per_token": 4e-07,
        "output_cost_per_token_flex": 2e-07,
--- a/tests/batches_tests/test_batch_custom_pricing.py
+++ b/tests/batches_tests/test_batch_custom_pricing.py
@ -145,6 +145,37 @@ def test_batch_cost_calculator_func_uses_custom_model_info():
    ), f"Expected total cost {expected}, got {cost}"


+@pytest.mark.parametrize("data_residency", ["eu", "us"])
+def test_batch_cost_calculator_applies_data_residency_uplift(
+    data_residency, monkeypatch
+):
+    """batch_cost_calculator should apply the regional uplift multiplier when
+    data_residency is set and the model carries a configured multiplier."""
+    monkeypatch.setenv("LITELLM_LOCAL_MODEL_COST_MAP", "True")
+    prev_model_cost = litellm.model_cost
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+    try:
+        usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
+
+        base_prompt, base_completion = batch_cost_calculator(
+            usage=usage,
+            model="gpt-5",
+            custom_llm_provider="openai",
+        )
+        regional_prompt, regional_completion = batch_cost_calculator(
+            usage=usage,
+            model="gpt-5",
+            custom_llm_provider="openai",
+            data_residency=data_residency,
+        )
+
+        assert base_prompt > 0 and base_completion > 0
+        assert regional_prompt == pytest.approx(base_prompt * 1.10, rel=1e-9)
+        assert regional_completion == pytest.approx(base_completion * 1.10, rel=1e-9)
+    finally:
+        litellm.model_cost = prev_model_cost
+
+
@pytest.mark.asyncio
 async def test_calculate_batch_cost_and_usage_uses_custom_model_info():
    """calculate_batch_cost_and_usage should thread model_info."""
--- a/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py
+++ b/tests/test_litellm/litellm_core_utils/llm_cost_calc/test_llm_cost_calc_utils.py
@ -1418,3 +1418,123 @@ def test_image_count_prevents_text_tokens_fallback():
        f"got {prompt_cost}. text_tokens fallback may be double-charging."
    )
    assert completion_cost == 0.0
+
+
+# ---------------------------------------------------------------------------
+# Data-residency (OpenAI regional processing) tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture
+def _local_model_cost_map():
+    prev_env = os.environ.get("LITELLM_LOCAL_MODEL_COST_MAP")
+    prev_model_cost = litellm.model_cost
+    os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+    litellm.model_cost = litellm.get_model_cost_map(url="")
+    try:
+        yield
+    finally:
+        litellm.model_cost = prev_model_cost
+        if prev_env is None:
+            os.environ.pop("LITELLM_LOCAL_MODEL_COST_MAP", None)
+        else:
+            os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = prev_env
+
+
+@pytest.mark.parametrize("data_residency", ["eu", "us"])
+def test_data_residency_applies_uplift(data_residency, _local_model_cost_map):
+    """gpt-5 should apply the regional processing uplift multiplier when
+    data_residency is set."""
+    from litellm.types.utils import Usage
+
+    usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
+
+    base = generic_cost_per_token(
+        model="gpt-5",
+        usage=usage,
+        custom_llm_provider="openai",
+    )
+    regional = generic_cost_per_token(
+        model="gpt-5",
+        usage=usage,
+        custom_llm_provider="openai",
+        data_residency=data_residency,
+    )
+
+    base_total = base[0] + base[1]
+    regional_total = regional[0] + regional[1]
+
+    assert base_total > 0
+    assert regional_total == pytest.approx(base_total * 1.10, rel=1e-9)
+    assert regional[0] == pytest.approx(base[0] * 1.10, rel=1e-9)
+    assert regional[1] == pytest.approx(base[1] * 1.10, rel=1e-9)
+
+
+def test_data_residency_no_uplift_for_unmarked_model(_local_model_cost_map):
+    """A model without a regional_processing_uplift_multiplier_* entry should
+    fall back to base pricing, not error."""
+    from litellm.types.utils import Usage
+
+    usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
+
+    base = generic_cost_per_token(
+        model="gpt-3.5-turbo",
+        usage=usage,
+        custom_llm_provider="openai",
+    )
+    with_residency = generic_cost_per_token(
+        model="gpt-3.5-turbo",
+        usage=usage,
+        custom_llm_provider="openai",
+        data_residency="eu",
+    )
+
+    assert base == with_residency
+
+
+def test_data_residency_none_no_uplift(_local_model_cost_map):
+    """data_residency=None should be a no-op even for models with a multiplier."""
+    from litellm.types.utils import Usage
+
+    usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
+
+    base = generic_cost_per_token(
+        model="gpt-5",
+        usage=usage,
+        custom_llm_provider="openai",
+    )
+    explicit_none = generic_cost_per_token(
+        model="gpt-5",
+        usage=usage,
+        custom_llm_provider="openai",
+        data_residency=None,
+    )
+
+    assert base == explicit_none
+
+
+def test_data_residency_composes_with_service_tier(_local_model_cost_map):
+    """The uplift multiplies the priority-tier cost, not the standard one."""
+    from litellm.types.utils import Usage
+
+    usage = Usage(prompt_tokens=1000, completion_tokens=500, total_tokens=1500)
+
+    priority_base = generic_cost_per_token(
+        model="gpt-5",
+        usage=usage,
+        custom_llm_provider="openai",
+        service_tier="priority",
+    )
+    priority_eu = generic_cost_per_token(
+        model="gpt-5",
+        usage=usage,
+        custom_llm_provider="openai",
+        service_tier="priority",
+        data_residency="eu",
+    )
+
+    priority_base_total = priority_base[0] + priority_base[1]
+    priority_eu_total = priority_eu[0] + priority_eu[1]
+
+    assert priority_base_total > 0
+    assert priority_eu_total == pytest.approx(priority_base_total * 1.10, rel=1e-9)
--- a/tests/test_litellm/litellm_core_utils/test_get_litellm_params.py
+++ b/tests/test_litellm/litellm_core_utils/test_get_litellm_params.py
@ -125,3 +125,40 @@ class TestGetLitellmParamsExplicitFields:
    def test_no_log_from_explicit_param(self):
        result = get_litellm_params(no_log=True)
        assert result["no-log"] is True
+
+
+class TestGetLitellmParamsDataResidency:
+    """Verify that data_residency is inferred from OpenAI regional api_base."""
+
+    def test_eu_host_resolves_to_eu(self):
+        result = get_litellm_params(
+            custom_llm_provider="openai",
+            api_base="https://eu.api.openai.com/v1",
+        )
+        assert result["data_residency"] == "eu"
+
+    def test_us_host_resolves_to_us(self):
+        result = get_litellm_params(
+            custom_llm_provider="openai",
+            api_base="https://us.api.openai.com/v1",
+        )
+        assert result["data_residency"] == "us"
+
+    def test_global_host_resolves_to_none(self):
+        result = get_litellm_params(
+            custom_llm_provider="openai",
+            api_base="https://api.openai.com/v1",
+        )
+        assert result["data_residency"] is None
+
+    def test_no_api_base_is_none(self):
+        result = get_litellm_params(custom_llm_provider="openai")
+        assert result["data_residency"] is None
+
+    def test_non_openai_provider_does_not_resolve(self):
+        """Regional OpenAI host doesn't apply to other providers."""
+        result = get_litellm_params(
+            custom_llm_provider="anthropic",
+            api_base="https://eu.api.openai.com/v1",
+        )
+        assert result["data_residency"] is None
--- a/tests/test_litellm/llms/openai/responses/test_openai_responses_data_residency.py
+++ b/tests/test_litellm/llms/openai/responses/test_openai_responses_data_residency.py
@ -0,0 +1,134 @@
+"""
+Tests that data_residency is correctly populated on the litellm logging
+object's litellm_params for OpenAI Responses paths, even when
+custom_llm_provider is resolved from the model string inside responses()
+rather than passed explicitly.
+"""
+
+import json
+from unittest.mock import MagicMock, patch
+
+import litellm
+
+
+def _make_responses_api_response_body() -> dict:
+    return {
+        "id": "resp-test",
+        "object": "response",
+        "created_at": 1234567890,
+        "model": "gpt-4.1",
+        "output": [
+            {
+                "type": "message",
+                "id": "msg-test",
+                "status": "completed",
+                "role": "assistant",
+                "content": [
+                    {
+                        "type": "output_text",
+                        "text": "ok",
+                        "annotations": [],
+                    }
+                ],
+            }
+        ],
+        "status": "completed",
+        "usage": {
+            "input_tokens": 1,
+            "output_tokens": 1,
+            "total_tokens": 2,
+        },
+    }
+
+
+def _make_mock_http_client(response_body: dict) -> MagicMock:
+    mock_client = MagicMock()
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.headers = {"content-type": "application/json"}
+    mock_response.json.return_value = response_body
+    mock_response.text = json.dumps(response_body)
+    mock_client.post.return_value = mock_response
+    return mock_client
+
+
+def _capture_logging_obj():
+    captured = {}
+
+    real_init = litellm.Logging.__init__
+
+    def init_spy(self, *args, **kwargs):
+        real_init(self, *args, **kwargs)
+        captured["logging_obj"] = self
+
+    return captured, init_spy
+
+
+def test_responses_eu_api_base_sets_data_residency():
+    """When api_base is a regional OpenAI host and custom_llm_provider is
+    inferred from the model (not passed explicitly), data_residency must end
+    up on the logging object's litellm_params so the cost calculator can apply
+    the regional uplift."""
+    mock_client = _make_mock_http_client(_make_responses_api_response_body())
+    captured, init_spy = _capture_logging_obj()
+
+    with (
+        patch(
+            "litellm.llms.custom_httpx.llm_http_handler._get_httpx_client",
+            return_value=mock_client,
+        ),
+        patch.object(litellm.Logging, "__init__", init_spy),
+    ):
+        litellm.responses(
+            model="gpt-4.1",
+            input="hi",
+            api_base="https://eu.api.openai.com/v1",
+            api_key="test-key",
+        )
+
+    logging_obj = captured["logging_obj"]
+    assert logging_obj.litellm_params.get("data_residency") == "eu"
+
+
+def test_responses_us_api_base_sets_data_residency():
+    mock_client = _make_mock_http_client(_make_responses_api_response_body())
+    captured, init_spy = _capture_logging_obj()
+
+    with (
+        patch(
+            "litellm.llms.custom_httpx.llm_http_handler._get_httpx_client",
+            return_value=mock_client,
+        ),
+        patch.object(litellm.Logging, "__init__", init_spy),
+    ):
+        litellm.responses(
+            model="gpt-4.1",
+            input="hi",
+            api_base="https://us.api.openai.com/v1",
+            api_key="test-key",
+        )
+
+    logging_obj = captured["logging_obj"]
+    assert logging_obj.litellm_params.get("data_residency") == "us"
+
+
+def test_responses_global_api_base_leaves_data_residency_none():
+    mock_client = _make_mock_http_client(_make_responses_api_response_body())
+    captured, init_spy = _capture_logging_obj()
+
+    with (
+        patch(
+            "litellm.llms.custom_httpx.llm_http_handler._get_httpx_client",
+            return_value=mock_client,
+        ),
+        patch.object(litellm.Logging, "__init__", init_spy),
+    ):
+        litellm.responses(
+            model="gpt-4.1",
+            input="hi",
+            api_base="https://api.openai.com/v1",
+            api_key="test-key",
+        )
+
+    logging_obj = captured["logging_obj"]
+    assert logging_obj.litellm_params.get("data_residency") is None
--- a/tests/test_litellm/llms/openai/test_data_residency.py
+++ b/tests/test_litellm/llms/openai/test_data_residency.py
@ -0,0 +1,34 @@
+"""Tests for the OpenAI data-residency inference helper."""
+
+import pytest
+
+from litellm.llms.openai.data_residency import infer_openai_data_residency
+
+
+@pytest.mark.parametrize(
+    "api_base, expected",
+    [
+        ("https://eu.api.openai.com/v1", "eu"),
+        ("https://eu.api.openai.com", "eu"),
+        ("https://us.api.openai.com/v1", "us"),
+        ("https://us.api.openai.com", "us"),
+        ("https://EU.api.openai.com/v1", "eu"),
+        ("https://api.openai.com/v1", None),
+        ("https://api.openai.com", None),
+        ("https://example.com/v1", None),
+        ("https://my-azure-endpoint.openai.azure.com/openai/deployments/foo", None),
+        ("", None),
+        (None, None),
+        ("not a url", None),
+    ],
+)
+def test_infer_openai_data_residency(api_base, expected):
+    assert infer_openai_data_residency("openai", api_base) == expected
+
+
+@pytest.mark.parametrize("custom_llm_provider", [None, "anthropic", "azure", "bedrock"])
+def test_infer_openai_data_residency_non_openai_provider(custom_llm_provider):
+    assert (
+        infer_openai_data_residency(custom_llm_provider, "https://eu.api.openai.com/v1")
+        is None
+    )
--- a/tests/test_litellm/test_utils.py
+++ b/tests/test_litellm/test_utils.py
@ -737,6 +737,8 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
                "output_cost_per_token_priority": {"type": "number"},
                "output_cost_per_token_above_200k_tokens_priority": {"type": "number"},
                "output_cost_per_token_above_272k_tokens_priority": {"type": "number"},
+                "regional_processing_uplift_multiplier_eu": {"type": "number"},
+                "regional_processing_uplift_multiplier_us": {"type": "number"},
                "input_cost_per_pixel": {"type": "number"},
                "input_cost_per_query": {"type": "number"},
                "input_cost_per_request": {"type": "number"},