Revert "Litellm staging 01 15 2026"

2026-01-17 06:31:34 +09:00 · 2026-01-17 06:31:34 +09:00 · 7aba0f738a
commit 7aba0f738a
parent 3e9e65c123
14 changed files with 60 additions and 416 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -144,8 +144,8 @@ jobs:
            pip install "google-generativeai==0.3.2"
            pip install "google-cloud-aiplatform==1.43.0"
            pip install pyarrow
-            pip install "boto3==1.40.61"
-            pip install "aioboto3==15.5.0"
+            pip install "boto3==1.36.0"
+            pip install "aioboto3==13.4.0"
            pip install langchain
            pip install lunary==0.2.5
            pip install "azure-identity==1.16.1"
@ -260,8 +260,8 @@ jobs:
            pip install "google-generativeai==0.3.2"
            pip install "google-cloud-aiplatform==1.43.0"
            pip install pyarrow
-            pip install "boto3==1.40.61"
-            pip install "aioboto3==15.5.0"
+            pip install "boto3==1.36.0"
+            pip install "aioboto3==13.4.0"
            pip install langchain
            pip install lunary==0.2.5
            pip install "azure-identity==1.16.1"
@ -367,8 +367,8 @@ jobs:
            pip install "google-generativeai==0.3.2"
            pip install "google-cloud-aiplatform==1.43.0"
            pip install pyarrow
-            pip install "boto3==1.40.61"
-            pip install "aioboto3==15.5.0"
+            pip install "boto3==1.36.0"
+            pip install "aioboto3==13.4.0"
            pip install langchain
            pip install lunary==0.2.5
            pip install "azure-identity==1.16.1"
@ -637,8 +637,8 @@ jobs:
            pip install "google-generativeai==0.3.2"
            pip install "google-cloud-aiplatform==1.43.0"
            pip install pyarrow
-            pip install "boto3==1.40.61"
-            pip install "aioboto3==15.5.0"
+            pip install "boto3==1.36.0"
+            pip install "aioboto3==13.4.0"
            pip install langchain
            pip install "langfuse>=2.0.0"
            pip install "logfire==0.29.0"
@ -759,8 +759,8 @@ jobs:
            pip install "google-cloud-aiplatform==1.43.0"
            pip install "google-genai==1.22.0"
            pip install pyarrow
-            pip install "boto3==1.40.61"
-            pip install "aioboto3==15.5.0"
+            pip install "boto3==1.36.0"
+            pip install "aioboto3==13.4.0"
            pip install langchain
            pip install lunary==0.2.5
            pip install "azure-identity==1.16.1"
@ -865,8 +865,8 @@ jobs:
            pip install "google-cloud-aiplatform==1.43.0"
            pip install "google-genai==1.22.0"
            pip install pyarrow
-            pip install "boto3==1.40.61"
-            pip install "aioboto3==15.5.0"
+            pip install "boto3==1.36.0"
+            pip install "aioboto3==13.4.0"
            pip install langchain
            pip install lunary==0.2.5
            pip install "azure-identity==1.16.1"
@ -972,8 +972,8 @@ jobs:
            pip install "google-cloud-aiplatform==1.43.0"
            pip install "google-genai==1.22.0"
            pip install pyarrow
-            pip install "boto3==1.40.61"
-            pip install "aioboto3==15.5.0"
+            pip install "boto3==1.36.0"
+            pip install "aioboto3==13.4.0"
            pip install langchain
            pip install lunary==0.2.5
            pip install "azure-identity==1.16.1"
@ -1198,7 +1198,7 @@ jobs:
            pip install "pytest-asyncio==0.21.1"
            pip install "respx==0.22.0"
            pip install "pydantic==2.10.2"
-            pip install "boto3==1.40.61"
+            pip install "boto3==1.36.0"
      # Run pytest and generate JUnit XML report
      - run:
          name: Run tests
@ -1879,7 +1879,7 @@ jobs:
            pip install aiohttp
            pip install openai
            pip install click
-            pip install "boto3==1.40.61"
+            pip install "boto3==1.36.0"
            pip install jinja2
            pip install "tokenizers==0.20.0"
            pip install "uvloop==0.21.0"
@ -2176,8 +2176,8 @@ jobs:
            pip install "google-generativeai==0.3.2"
            pip install "google-cloud-aiplatform==1.43.0"
            pip install pyarrow
-            pip install "boto3==1.40.61"
-            pip install "aioboto3==15.5.0"
+            pip install "boto3==1.36.0"
+            pip install "aioboto3==13.4.0"
            pip install langchain
            pip install "langfuse>=2.0.0"
            pip install "logfire==0.29.0"
@ -2316,8 +2316,8 @@ jobs:
            pip install "google-generativeai==0.3.2"
            pip install "google-cloud-aiplatform==1.43.0"
            pip install pyarrow
-            pip install "boto3==1.40.61"
-            pip install "aioboto3==15.5.0"
+            pip install "boto3==1.36.0"
+            pip install "aioboto3==13.4.0"
            pip install langchain
            pip install "langchain_mcp_adapters==0.0.5"
            pip install "langfuse>=2.0.0"
@ -2462,8 +2462,8 @@ jobs:
            pip install "google-generativeai==0.3.2"
            pip install "google-cloud-aiplatform==1.43.0"
            pip install pyarrow
-            pip install "boto3==1.40.61"
-            pip install "aioboto3==15.5.0"
+            pip install "boto3==1.36.0"
+            pip install "aioboto3==13.4.0"
            pip install langchain
            pip install "langfuse>=2.0.0"
            pip install "logfire==0.29.0"
@ -3118,7 +3118,7 @@ jobs:
            pip install "pytest==7.3.1"
            pip install "pytest-mock==3.12.0"
            pip install "pytest-asyncio==0.21.1"
-            pip install "boto3==1.40.61"
+            pip install "boto3==1.36.0"
            pip install "mypy==1.18.2"
            pip install pyarrow
            pip install numpydoc
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@ -49,9 +49,7 @@ if TYPE_CHECKING:
    ProxyConfig = _ProxyConfig
 else:
    ProxyConfig = Any
-from litellm.proxy.litellm_pre_call_utils import (
-    add_litellm_data_to_request,
-)
+from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
 from litellm.types.utils import ModelResponse, ModelResponseStream, Usage


--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@ -846,9 +846,7 @@ async def add_litellm_data_to_request(  # noqa: PLR0915

    # Add headers to metadata for guardrails to access (fixes #17477)
    # Guardrails use metadata["headers"] to access request headers (e.g., User-Agent)
-    if _metadata_variable_name in data and isinstance(
-        data[_metadata_variable_name], dict
-    ):
+    if _metadata_variable_name in data and isinstance(data[_metadata_variable_name], dict):
        data[_metadata_variable_name]["headers"] = _headers

    # check for forwardable headers
@ -1316,9 +1314,6 @@ def move_guardrails_to_metadata(

    - If guardrails set on API Key metadata then sets guardrails on request metadata
    - If guardrails not set on API key, then checks request metadata
-
-    Note: We copy (not pop) guardrails from data to metadata to ensure deployment-level
-    guardrails merged by the router remain in kwargs for async_pre_call_deployment_hook.
    """
    # Check key-level guardrails
    _add_guardrails_from_key_or_team_metadata(
@ -1331,25 +1326,15 @@ def move_guardrails_to_metadata(
    #########################################################################################
    # User's might send "guardrails" in the request body, we need to add them to the request metadata.
    # Since downstream logic requires "guardrails" to be in the request metadata
-    #
-    # IMPORTANT: We copy instead of pop to preserve guardrails in kwargs for
-    # async_pre_call_deployment_hook (custom_guardrail.py:290) which checks kwargs.get("guardrails").
-    # This is the event-based approach for deployment-level guardrails.
    #########################################################################################
    if "guardrails" in data:
-        request_body_guardrails = data.get("guardrails")
-        if request_body_guardrails is None:
-            return
+        request_body_guardrails = data.pop("guardrails")
        if "guardrails" in data[_metadata_variable_name] and isinstance(
            data[_metadata_variable_name]["guardrails"], list
        ):
-            # Merge unique guardrails
-            existing = data[_metadata_variable_name]["guardrails"]
-            for g in request_body_guardrails:
-                if g not in existing:
-                    existing.append(g)
+            data[_metadata_variable_name]["guardrails"].extend(request_body_guardrails)
        else:
-            data[_metadata_variable_name]["guardrails"] = list(request_body_guardrails)
+            data[_metadata_variable_name]["guardrails"] = request_body_guardrails

    #########################################################################################
    if "guardrail_config" in data:
--- a/litellm/proxy/prisma_migration.py
+++ b/litellm/proxy/prisma_migration.py
@ -26,5 +26,3 @@ if exit_code != 0:
    verbose_proxy_logger.error(
        f"'prisma generate' stderr: {result.stderr}"
    )  # Log stderr
-
-sys.exit(exit_code)
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -187,7 +187,6 @@ class ProxyInitializationHelpers:
        ssl_certfile_path: str,
        ssl_keyfile_path: str,
        max_requests_before_restart: Optional[int] = None,
-        keepalive_timeout: Optional[int] = None,
    ):
        """
        Run litellm with `gunicorn`
@ -268,10 +267,6 @@ class ProxyInitializationHelpers:
            "access_log_format": '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s',
        }

-        # Optional: set keepalive timeout if specified by user
-        if keepalive_timeout is not None:
-            gunicorn_options["keepalive"] = keepalive_timeout
-
        # Optional: recycle workers after N requests to mitigate memory growth
        if max_requests_before_restart is not None:
            gunicorn_options["max_requests"] = max_requests_before_restart
@ -494,7 +489,7 @@ class ProxyInitializationHelpers:
    "--keepalive_timeout",
    default=None,
    type=int,
-    help="Set the keepalive timeout in seconds. For Uvicorn: timeout_keep_alive parameter. For Gunicorn: keepalive parameter. Default: Uvicorn uses ~75s, Gunicorn uses 90s",
+    help="Set the uvicorn keepalive timeout in seconds (uvicorn timeout_keep_alive parameter)",
    envvar="KEEPALIVE_TIMEOUT",
 )
@click.option(
@ -864,7 +859,6 @@ def run_server(  # noqa: PLR0915
                ssl_certfile_path=ssl_certfile_path,
                ssl_keyfile_path=ssl_keyfile_path,
                max_requests_before_restart=max_requests_before_restart,
-                keepalive_timeout=keepalive_timeout,
            )
        elif run_hypercorn is True:
            ProxyInitializationHelpers._init_hypercorn_server(
--- a/litellm/proxy/video_endpoints/endpoints.py
+++ b/litellm/proxy/video_endpoints/endpoints.py
@ -256,9 +256,7 @@ async def video_status(
    # Resolve model_name from model_id if available
    # This allows the router to automatically inject litellm_params from the model config
    if model_id_from_decoded and llm_router:
-        resolved_model = llm_router.resolve_model_name_from_model_id(
-            model_id_from_decoded, custom_llm_provider=provider_from_id
-        )
+        resolved_model = llm_router.resolve_model_name_from_model_id(model_id_from_decoded)
        if resolved_model:
            data["model"] = resolved_model

@ -356,9 +354,7 @@ async def video_content(
    # Resolve model_name from model_id if available
    # This allows the router to automatically inject litellm_params from the model config
    if model_id_from_decoded and llm_router:
-        resolved_model = llm_router.resolve_model_name_from_model_id(
-            model_id_from_decoded, custom_llm_provider=provider_from_id
-        )
+        resolved_model = llm_router.resolve_model_name_from_model_id(model_id_from_decoded)
        if resolved_model:
            data["model"] = resolved_model
    # Process request using ProxyBaseLLMRequestProcessing
@ -470,9 +466,7 @@ async def video_remix(
    # Resolve model_name from model_id if available
    # This allows the router to automatically inject litellm_params from the model config
    if model_id_from_decoded and llm_router:
-        resolved_model = llm_router.resolve_model_name_from_model_id(
-            model_id_from_decoded, custom_llm_provider=provider_from_id
-        )
+        resolved_model = llm_router.resolve_model_name_from_model_id(model_id_from_decoded)
        if resolved_model:
            data["model"] = resolved_model

--- a/litellm/router.py
+++ b/litellm/router.py
@ -6971,7 +6971,7 @@ class Router:
        return candidate_id in self.model_id_to_deployment_index_map

    def resolve_model_name_from_model_id(
-        self, model_id: Optional[str], custom_llm_provider: Optional[str] = None
+        self, model_id: Optional[str]
    ) -> Optional[str]:
        """
        Resolve model_name from model_id.
@ -6981,15 +6981,12 @@ class Router:

        Strategy:
        1. First, check if model_id directly matches a model_name or deployment ID
-        2. If custom_llm_provider is provided, check with provider prefix
-        3. Search through router's model_list to find a match by litellm_params.model
-        4. If custom_llm_provider is provided, try to find a wildcard pattern match
-        5. Return the model_name if found, None otherwise
+        2. If not, search through router's model_list to find a match by litellm_params.model
+        3. Return the model_name if found, None otherwise

        Args:
            model_id: The model_id extracted from decoded video_id
                     (could be model_name or litellm_params.model value)
-            custom_llm_provider: The provider name (e.g., "vertex_ai") for wildcard matching

        Returns:
            model_name if found, None otherwise. If None, the request will fall through
@ -7002,26 +6999,15 @@ class Router:
        if model_id in self.model_names or self.has_model_id(model_id):
            return model_id

-        # Strategy 2: Check with provider prefix (e.g., "vertex_ai/veo-3.0-generate-preview")
-        if custom_llm_provider:
-            full_model_name = f"{custom_llm_provider}/{model_id}"
-            if full_model_name in self.model_names or self.has_model_id(full_model_name):
-                return full_model_name
-
-        # Strategy 3: Search through router's model_list to find by litellm_params.model
+        # Strategy 2: Search through router's model_list to find by litellm_params.model
        all_models = self.get_model_list(model_name=None)
        if not all_models:
            return None

-        # First pass: exact matches (non-wildcard)
        for deployment in all_models:
            litellm_params = deployment.get("litellm_params", {})
            actual_model = litellm_params.get("model")

-            # Skip wildcard patterns in first pass
-            if actual_model and actual_model.endswith("/*"):
-                continue
-
            # Match by exact match or by checking if actual_model ends with /model_id or :model_id
            # e.g., model_id="veo-2.0-generate-001" matches actual_model="vertex_ai/veo-2.0-generate-001"
            matches = (
@ -7035,19 +7021,6 @@ class Router:
                if model_name:
                    return model_name

-        # Strategy 4: Wildcard patterns using PatternMatchRouter
-        # For video status/content, we need to match model_id like "veo-3.0-generate-preview"
-        # to wildcard patterns like "vertex_ai/*"
-        if custom_llm_provider:
-            full_model_name = f"{custom_llm_provider}/{model_id}"
-            pattern_deployments = self.pattern_router.route(full_model_name)
-            if pattern_deployments:
-                # Return the first matching wildcard model_name
-                for pattern_deployment in pattern_deployments:
-                    matched_model_name = pattern_deployment.get("model_name")
-                    if matched_model_name:
-                        return matched_model_name
-
        # No match found
        return None

--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -10201,48 +10201,6 @@
        "mode": "completion",
        "output_cost_per_token": 5e-07
    },
-    "deepseek-v3-2-251201": {
-        "input_cost_per_token": 0.0,
-        "litellm_provider": "volcengine",
-        "max_input_tokens": 98304,
-        "max_output_tokens": 32768,
-        "max_tokens": 32768,
-        "mode": "chat",
-        "output_cost_per_token": 0.0,
-        "supports_assistant_prefill": true,
-        "supports_function_calling": true,
-        "supports_prompt_caching": true,
-        "supports_reasoning": true,
-        "supports_tool_choice": true
-    },
-    "glm-4-7-251222": {
-        "input_cost_per_token": 0.0,
-        "litellm_provider": "volcengine",
-        "max_input_tokens": 204800,
-        "max_output_tokens": 131072,
-        "max_tokens": 131072,
-        "mode": "chat",
-        "output_cost_per_token": 0.0,
-        "supports_assistant_prefill": true,
-        "supports_function_calling": true,
-        "supports_prompt_caching": true,
-        "supports_reasoning": true,
-        "supports_tool_choice": true
-    },
-    "kimi-k2-thinking-251104": {
-        "input_cost_per_token": 0.0,
-        "litellm_provider": "volcengine",
-        "max_input_tokens": 229376,
-        "max_output_tokens": 32768,
-        "max_tokens": 32768,
-        "mode": "chat",
-        "output_cost_per_token": 0.0,
-        "supports_assistant_prefill": true,
-        "supports_function_calling": true,
-        "supports_prompt_caching": true,
-        "supports_reasoning": true,
-        "supports_tool_choice": true
-    },
    "doubao-embedding": {
        "input_cost_per_token": 0.0,
        "litellm_provider": "volcengine",
--- a/poetry.lock
+++ b/poetry.lock
@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.2.0 and should not be changed by hand.

 [[package]]
 name = "aiofiles"
@ -525,36 +525,36 @@ files = [

 [[package]]
 name = "boto3"
-version = "1.40.61"
+version = "1.36.0"
 description = "The AWS SDK for Python"
 optional = true
-python-versions = ">=3.9"
+python-versions = ">=3.8"
 groups = ["main"]
 markers = "extra == \"proxy\""
 files = [
-    {file = "boto3-1.40.61-py3-none-any.whl", hash = "sha256:6b9c57b2a922b5d8c17766e29ed792586a818098efe84def27c8f582b33f898c"},
-    {file = "boto3-1.40.61.tar.gz", hash = "sha256:d6c56277251adf6c2bdd25249feae625abe4966831676689ff23b4694dea5b12"},
+    {file = "boto3-1.36.0-py3-none-any.whl", hash = "sha256:d0ca7a58ce25701a52232cc8df9d87854824f1f2964b929305722ebc7959d5a9"},
+    {file = "boto3-1.36.0.tar.gz", hash = "sha256:159898f51c2997a12541c0e02d6e5a8fe2993ddb307b9478fd9a339f98b57e00"},
 ]

 [package.dependencies]
-botocore = ">=1.40.61,<1.41.0"
+botocore = ">=1.36.0,<1.37.0"
 jmespath = ">=0.7.1,<2.0.0"
-s3transfer = ">=0.14.0,<0.15.0"
+s3transfer = ">=0.11.0,<0.12.0"

 [package.extras]
 crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]

 [[package]]
 name = "botocore"
-version = "1.40.76"
+version = "1.36.26"
 description = "Low-level, data-driven core of boto 3."
 optional = true
-python-versions = ">=3.9"
+python-versions = ">=3.8"
 groups = ["main"]
 markers = "extra == \"proxy\""
 files = [
-    {file = "botocore-1.40.76-py3-none-any.whl", hash = "sha256:fe425d386e48ac64c81cbb4a7181688d813df2e2b4c78b95ebe833c9e868c6f4"},
-    {file = "botocore-1.40.76.tar.gz", hash = "sha256:2b16024d68b29b973005adfb5039adfe9099ebe772d40a90ca89f2e165c495dc"},
+    {file = "botocore-1.36.26-py3-none-any.whl", hash = "sha256:4e3f19913887a58502e71ef8d696fe7eaa54de7813ff73390cd5883f837dfa6e"},
+    {file = "botocore-1.36.26.tar.gz", hash = "sha256:4a63bcef7ecf6146fd3a61dc4f9b33b7473b49bdaf1770e9aaca6eee0c9eab62"},
 ]

 [package.dependencies]
@ -566,7 +566,7 @@ urllib3 = [
 ]

 [package.extras]
-crt = ["awscrt (==0.28.4)"]
+crt = ["awscrt (==0.23.8)"]

 [[package]]
 name = "cachetools"
@ -6255,22 +6255,22 @@ files = [

 [[package]]
 name = "s3transfer"
-version = "0.14.0"
+version = "0.11.3"
 description = "An Amazon S3 Transfer Manager"
 optional = true
-python-versions = ">=3.9"
+python-versions = ">=3.8"
 groups = ["main"]
 markers = "extra == \"proxy\""
 files = [
-    {file = "s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456"},
-    {file = "s3transfer-0.14.0.tar.gz", hash = "sha256:eff12264e7c8b4985074ccce27a3b38a485bb7f7422cc8046fee9be4983e4125"},
+    {file = "s3transfer-0.11.3-py3-none-any.whl", hash = "sha256:ca855bdeb885174b5ffa95b9913622459d4ad8e331fc98eb01e6d5eb6a30655d"},
+    {file = "s3transfer-0.11.3.tar.gz", hash = "sha256:edae4977e3a122445660c7c114bba949f9d191bae3b34a096f18a1c8c354527a"},
 ]

 [package.dependencies]
-botocore = ">=1.37.4,<2.0a.0"
+botocore = ">=1.36.0,<2.0a.0"

 [package.extras]
-crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"]
+crt = ["botocore[crt] (>=1.36.0,<2.0a.0)"]

 [[package]]
 name = "scikit-learn"
@ -7981,4 +7981,4 @@ utils = ["numpydoc"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.9,<4.0"
-content-hash = "f391c702cf58ef2ba7641acdc3ae13d7c8e672faede68c0a624bd2ba0fb46b12"
+content-hash = "ea62b77c662ab9fc486e421c576f0868bcde16d62a24703ee1f4916a0465ffb2"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -56,7 +56,7 @@ google-cloud-iam = {version = "^2.19.1", optional = true}
 resend = {version = ">=0.8.0", optional = true}
 pynacl = {version = "^1.5.0", optional = true}
 websockets = {version = "^15.0.1", optional = true}
-boto3 = {version = "1.40.61", optional = true}
+boto3 = {version = "1.36.0", optional = true}
 redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"}
 mcp = {version = "^1.21.2", optional = true, python = ">=3.10"}
 litellm-proxy-extras = {version = "0.4.22", optional = true}
--- a/requirements.txt
+++ b/requirements.txt
@ -10,7 +10,7 @@ uvicorn==0.31.1 # server dep
 gunicorn==23.0.0 # server dep
 fastuuid==0.13.5 # for uuid4
 uvloop==0.21.0 # uvicorn dep, gives us much better performance under load
-boto3==1.40.61 # aws bedrock/sagemaker calls
+boto3==1.36.0 # aws bedrock/sagemaker calls
 redis==5.2.1 # redis caching
 prisma==0.11.0 # for db
 nodejs-wheel-binaries==24.12.0 ## required by prisma for migrations, prevents runtime download (updated from nodejs-bin for security fixes)
@ -59,7 +59,7 @@ click==8.1.7 # for proxy cli
 rich==13.7.1 # for litellm proxy cli
 jinja2==3.1.6 # for prompt templates
 aiohttp==3.13.3 # for network calls
-aioboto3==15.5.0 # for async sagemaker calls
+aioboto3==13.4.0 # for async sagemaker calls
 tenacity==8.5.0  # for retrying requests, when litellm.num_retries set
 pydantic>=2.11,<3 # proxy + openai req. + mcp
 jsonschema>=4.23.0,<5.0.0 # validating json schema - aligned with openapi-core + mcp
--- a/tests/code_coverage_tests/license_cache.json
+++ b/tests/code_coverage_tests/license_cache.json
--- a/tests/test_litellm/proxy/test_proxy_cli.py
+++ b/tests/test_litellm/proxy/test_proxy_cli.py
@ -483,75 +483,6 @@ class TestProxyInitializationHelpers:
                # Verify that uvicorn.run was called again
                mock_uvicorn_run.assert_called_once()

-    @patch("litellm.proxy.proxy_cli.ProxyInitializationHelpers._run_gunicorn_server")
-    @patch("builtins.print")
-    def test_gunicorn_keepalive_timeout_flag(self, mock_print, mock_gunicorn):
-        """Test that the keepalive_timeout flag is properly passed to Gunicorn"""
-        from click.testing import CliRunner
-
-        from litellm.proxy.proxy_cli import run_server
-
-        runner = CliRunner()
-
-        mock_app = MagicMock()
-        mock_proxy_config = MagicMock()
-        mock_key_mgmt = MagicMock()
-        mock_save_worker_config = MagicMock()
-
-        with patch.dict(
-            "sys.modules",
-            {
-                "proxy_server": MagicMock(
-                    app=mock_app,
-                    ProxyConfig=mock_proxy_config,
-                    KeyManagementSettings=mock_key_mgmt,
-                    save_worker_config=mock_save_worker_config,
-                )
-            },
-        ):
-            result = runner.invoke(
-                run_server, ["--local", "--run_gunicorn", "--keepalive_timeout", "120"]
-            )
-            assert result.exit_code == 0
-
-            # Verify _run_gunicorn_server was called with keepalive_timeout
-            mock_gunicorn.assert_called_once()
-            call_kwargs = mock_gunicorn.call_args.kwargs
-            assert call_kwargs["keepalive_timeout"] == 120
-
-    @patch("litellm.proxy.proxy_cli.ProxyInitializationHelpers._run_gunicorn_server")
-    @patch("builtins.print")
-    def test_gunicorn_keepalive_default(self, mock_print, mock_gunicorn):
-        """Test that Gunicorn uses default 90s when keepalive_timeout not specified"""
-        from click.testing import CliRunner
-
-        from litellm.proxy.proxy_cli import run_server
-
-        runner = CliRunner()
-
-        mock_app = MagicMock()
-        mock_proxy_config = MagicMock()
-        mock_key_mgmt = MagicMock()
-        mock_save_worker_config = MagicMock()
-
-        with patch.dict(
-            "sys.modules",
-            {
-                "proxy_server": MagicMock(
-                    app=mock_app,
-                    ProxyConfig=mock_proxy_config,
-                    KeyManagementSettings=mock_key_mgmt,
-                    save_worker_config=mock_save_worker_config,
-                )
-            },
-        ):
-            result = runner.invoke(run_server, ["--local", "--run_gunicorn"])
-            assert result.exit_code == 0
-
-            # Verify default behavior (keepalive_timeout is None, Gunicorn will use 90)
-            call_kwargs = mock_gunicorn.call_args.kwargs
-            assert call_kwargs.get("keepalive_timeout") is None
-

 class TestHealthAppFactory:
    """Test cases for the health app factory module"""
--- a/tests/test_litellm/test_router.py
+++ b/tests/test_litellm/test_router.py
@ -2054,190 +2054,3 @@ async def test_aguardrail():

    assert result["result"] == "success"
    assert result["selected_guardrail"]["id"] == "guardrail-1"
-
-
-def test_resolve_model_name_from_model_id_wildcard_pattern():
-    """
-    Test that resolve_model_name_from_model_id correctly resolves model names
-    for wildcard patterns using PatternMatchRouter.
-
-    This is critical for video status/content endpoints where model_id extracted
-    from video_id (e.g., "veo-3.0-generate-preview") needs to match wildcard
-    patterns like "vertex_ai/*" to inject credentials from the model config.
-    """
-    # Set up router with wildcard pattern
-    router = litellm.Router(
-        model_list=[
-            {
-                "model_name": "vertex_ai/*",
-                "litellm_params": {
-                    "model": "vertex_ai/*",
-                    "vertex_project": "test-project",
-                    "vertex_location": "us-central1",
-                },
-            },
-            {
-                "model_name": "specific-model",
-                "litellm_params": {
-                    "model": "vertex_ai/gemini-pro",
-                    "vertex_project": "specific-project",
-                    "vertex_location": "us-east1",
-                },
-            },
-        ],
-    )
-
-    # Test Case 1: Wildcard pattern matching with custom_llm_provider
-    # This simulates video_id like "vertex_ai:veo-3.0-generate-preview:..."
-    result = router.resolve_model_name_from_model_id(
-        model_id="veo-3.0-generate-preview",
-        custom_llm_provider="vertex_ai",
-    )
-    assert result == "vertex_ai/*", f"Expected 'vertex_ai/*', got '{result}'"
-
-    # Test Case 2: Different model name should also match wildcard
-    result = router.resolve_model_name_from_model_id(
-        model_id="gemini-2.0-flash",
-        custom_llm_provider="vertex_ai",
-    )
-    assert result == "vertex_ai/*", f"Expected 'vertex_ai/*', got '{result}'"
-
-    # Test Case 3: Without custom_llm_provider, should not match wildcard
-    result = router.resolve_model_name_from_model_id(
-        model_id="veo-3.0-generate-preview",
-        custom_llm_provider=None,
-    )
-    assert result is None, f"Expected None without provider, got '{result}'"
-
-    # Test Case 4: Exact model_name match should take precedence
-    result = router.resolve_model_name_from_model_id(
-        model_id="specific-model",
-        custom_llm_provider="vertex_ai",
-    )
-    assert result == "specific-model", f"Expected 'specific-model', got '{result}'"
-
-
-def test_resolve_model_name_from_model_id_exact_match():
-    """
-    Test that resolve_model_name_from_model_id correctly resolves exact model names.
-    """
-    router = litellm.Router(
-        model_list=[
-            {
-                "model_name": "my-gpt-model",
-                "litellm_params": {
-                    "model": "azure/gpt-4",
-                    "api_key": "test-key",
-                },
-            },
-            {
-                "model_name": "veo-model",
-                "litellm_params": {
-                    "model": "vertex_ai/veo-2.0-generate-001",
-                    "vertex_project": "test-project",
-                },
-            },
-        ],
-    )
-
-    # Test Case 1: Direct model_name match
-    result = router.resolve_model_name_from_model_id(model_id="my-gpt-model")
-    assert result == "my-gpt-model", f"Expected 'my-gpt-model', got '{result}'"
-
-    # Test Case 2: Match by litellm_params.model suffix
-    result = router.resolve_model_name_from_model_id(model_id="veo-2.0-generate-001")
-    assert result == "veo-model", f"Expected 'veo-model', got '{result}'"
-
-    # Test Case 3: Non-existent model should return None
-    result = router.resolve_model_name_from_model_id(model_id="non-existent-model")
-    assert result is None, f"Expected None, got '{result}'"
-
-
-def test_resolve_model_name_from_model_id_provider_prefix():
-    """
-    Test that resolve_model_name_from_model_id handles provider prefix correctly.
-    """
-    router = litellm.Router(
-        model_list=[
-            {
-                "model_name": "vertex_ai/gemini-pro",
-                "litellm_params": {
-                    "model": "vertex_ai/gemini-pro",
-                    "vertex_project": "test-project",
-                },
-            },
-        ],
-    )
-
-    # Test Case 1: Full model name with provider prefix as model_name
-    result = router.resolve_model_name_from_model_id(
-        model_id="vertex_ai/gemini-pro",
-        custom_llm_provider=None,
-    )
-    assert result == "vertex_ai/gemini-pro", f"Expected 'vertex_ai/gemini-pro', got '{result}'"
-
-    # Test Case 2: Model ID with provider prefix constructed from custom_llm_provider
-    result = router.resolve_model_name_from_model_id(
-        model_id="gemini-pro",
-        custom_llm_provider="vertex_ai",
-    )
-    assert result == "vertex_ai/gemini-pro", f"Expected 'vertex_ai/gemini-pro', got '{result}'"
-
-
-def test_resolve_model_name_from_model_id_multiple_wildcards():
-    """
-    Test that resolve_model_name_from_model_id works with multiple wildcard patterns.
-    """
-    router = litellm.Router(
-        model_list=[
-            {
-                "model_name": "vertex_ai/*",
-                "litellm_params": {
-                    "model": "vertex_ai/*",
-                    "vertex_project": "vertex-project",
-                },
-            },
-            {
-                "model_name": "openai/*",
-                "litellm_params": {
-                    "model": "openai/*",
-                    "api_key": "openai-key",
-                },
-            },
-            {
-                "model_name": "anthropic/*",
-                "litellm_params": {
-                    "model": "anthropic/*",
-                    "api_key": "anthropic-key",
-                },
-            },
-        ],
-    )
-
-    # Test Case 1: Match vertex_ai wildcard
-    result = router.resolve_model_name_from_model_id(
-        model_id="veo-3.0-generate-preview",
-        custom_llm_provider="vertex_ai",
-    )
-    assert result == "vertex_ai/*", f"Expected 'vertex_ai/*', got '{result}'"
-
-    # Test Case 2: Match openai wildcard
-    result = router.resolve_model_name_from_model_id(
-        model_id="gpt-4o",
-        custom_llm_provider="openai",
-    )
-    assert result == "openai/*", f"Expected 'openai/*', got '{result}'"
-
-    # Test Case 3: Match anthropic wildcard
-    result = router.resolve_model_name_from_model_id(
-        model_id="claude-3-opus",
-        custom_llm_provider="anthropic",
-    )
-    assert result == "anthropic/*", f"Expected 'anthropic/*', got '{result}'"
-
-    # Test Case 4: Non-matching provider should return None
-    result = router.resolve_model_name_from_model_id(
-        model_id="some-model",
-        custom_llm_provider="bedrock",
-    )
-    assert result is None, f"Expected None for non-matching provider, got '{result}'"