Revert "Litellm staging 01 15 2026"

This commit is contained in:
YutaSaito 2026-01-17 06:31:34 +09:00 committed by GitHub
parent 3e9e65c123
commit 7aba0f738a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 60 additions and 416 deletions

View File

@ -144,8 +144,8 @@ jobs:
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
pip install pyarrow
pip install "boto3==1.40.61"
pip install "aioboto3==15.5.0"
pip install "boto3==1.36.0"
pip install "aioboto3==13.4.0"
pip install langchain
pip install lunary==0.2.5
pip install "azure-identity==1.16.1"
@ -260,8 +260,8 @@ jobs:
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
pip install pyarrow
pip install "boto3==1.40.61"
pip install "aioboto3==15.5.0"
pip install "boto3==1.36.0"
pip install "aioboto3==13.4.0"
pip install langchain
pip install lunary==0.2.5
pip install "azure-identity==1.16.1"
@ -367,8 +367,8 @@ jobs:
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
pip install pyarrow
pip install "boto3==1.40.61"
pip install "aioboto3==15.5.0"
pip install "boto3==1.36.0"
pip install "aioboto3==13.4.0"
pip install langchain
pip install lunary==0.2.5
pip install "azure-identity==1.16.1"
@ -637,8 +637,8 @@ jobs:
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
pip install pyarrow
pip install "boto3==1.40.61"
pip install "aioboto3==15.5.0"
pip install "boto3==1.36.0"
pip install "aioboto3==13.4.0"
pip install langchain
pip install "langfuse>=2.0.0"
pip install "logfire==0.29.0"
@ -759,8 +759,8 @@ jobs:
pip install "google-cloud-aiplatform==1.43.0"
pip install "google-genai==1.22.0"
pip install pyarrow
pip install "boto3==1.40.61"
pip install "aioboto3==15.5.0"
pip install "boto3==1.36.0"
pip install "aioboto3==13.4.0"
pip install langchain
pip install lunary==0.2.5
pip install "azure-identity==1.16.1"
@ -865,8 +865,8 @@ jobs:
pip install "google-cloud-aiplatform==1.43.0"
pip install "google-genai==1.22.0"
pip install pyarrow
pip install "boto3==1.40.61"
pip install "aioboto3==15.5.0"
pip install "boto3==1.36.0"
pip install "aioboto3==13.4.0"
pip install langchain
pip install lunary==0.2.5
pip install "azure-identity==1.16.1"
@ -972,8 +972,8 @@ jobs:
pip install "google-cloud-aiplatform==1.43.0"
pip install "google-genai==1.22.0"
pip install pyarrow
pip install "boto3==1.40.61"
pip install "aioboto3==15.5.0"
pip install "boto3==1.36.0"
pip install "aioboto3==13.4.0"
pip install langchain
pip install lunary==0.2.5
pip install "azure-identity==1.16.1"
@ -1198,7 +1198,7 @@ jobs:
pip install "pytest-asyncio==0.21.1"
pip install "respx==0.22.0"
pip install "pydantic==2.10.2"
pip install "boto3==1.40.61"
pip install "boto3==1.36.0"
# Run pytest and generate JUnit XML report
- run:
name: Run tests
@ -1879,7 +1879,7 @@ jobs:
pip install aiohttp
pip install openai
pip install click
pip install "boto3==1.40.61"
pip install "boto3==1.36.0"
pip install jinja2
pip install "tokenizers==0.20.0"
pip install "uvloop==0.21.0"
@ -2176,8 +2176,8 @@ jobs:
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
pip install pyarrow
pip install "boto3==1.40.61"
pip install "aioboto3==15.5.0"
pip install "boto3==1.36.0"
pip install "aioboto3==13.4.0"
pip install langchain
pip install "langfuse>=2.0.0"
pip install "logfire==0.29.0"
@ -2316,8 +2316,8 @@ jobs:
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
pip install pyarrow
pip install "boto3==1.40.61"
pip install "aioboto3==15.5.0"
pip install "boto3==1.36.0"
pip install "aioboto3==13.4.0"
pip install langchain
pip install "langchain_mcp_adapters==0.0.5"
pip install "langfuse>=2.0.0"
@ -2462,8 +2462,8 @@ jobs:
pip install "google-generativeai==0.3.2"
pip install "google-cloud-aiplatform==1.43.0"
pip install pyarrow
pip install "boto3==1.40.61"
pip install "aioboto3==15.5.0"
pip install "boto3==1.36.0"
pip install "aioboto3==13.4.0"
pip install langchain
pip install "langfuse>=2.0.0"
pip install "logfire==0.29.0"
@ -3118,7 +3118,7 @@ jobs:
pip install "pytest==7.3.1"
pip install "pytest-mock==3.12.0"
pip install "pytest-asyncio==0.21.1"
pip install "boto3==1.40.61"
pip install "boto3==1.36.0"
pip install "mypy==1.18.2"
pip install pyarrow
pip install numpydoc

View File

@ -49,9 +49,7 @@ if TYPE_CHECKING:
ProxyConfig = _ProxyConfig
else:
ProxyConfig = Any
from litellm.proxy.litellm_pre_call_utils import (
add_litellm_data_to_request,
)
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
from litellm.types.utils import ModelResponse, ModelResponseStream, Usage

View File

@ -846,9 +846,7 @@ async def add_litellm_data_to_request( # noqa: PLR0915
# Add headers to metadata for guardrails to access (fixes #17477)
# Guardrails use metadata["headers"] to access request headers (e.g., User-Agent)
if _metadata_variable_name in data and isinstance(
data[_metadata_variable_name], dict
):
if _metadata_variable_name in data and isinstance(data[_metadata_variable_name], dict):
data[_metadata_variable_name]["headers"] = _headers
# check for forwardable headers
@ -1316,9 +1314,6 @@ def move_guardrails_to_metadata(
- If guardrails set on API Key metadata then sets guardrails on request metadata
- If guardrails not set on API key, then checks request metadata
Note: We copy (not pop) guardrails from data to metadata to ensure deployment-level
guardrails merged by the router remain in kwargs for async_pre_call_deployment_hook.
"""
# Check key-level guardrails
_add_guardrails_from_key_or_team_metadata(
@ -1331,25 +1326,15 @@ def move_guardrails_to_metadata(
#########################################################################################
# User's might send "guardrails" in the request body, we need to add them to the request metadata.
# Since downstream logic requires "guardrails" to be in the request metadata
#
# IMPORTANT: We copy instead of pop to preserve guardrails in kwargs for
# async_pre_call_deployment_hook (custom_guardrail.py:290) which checks kwargs.get("guardrails").
# This is the event-based approach for deployment-level guardrails.
#########################################################################################
if "guardrails" in data:
request_body_guardrails = data.get("guardrails")
if request_body_guardrails is None:
return
request_body_guardrails = data.pop("guardrails")
if "guardrails" in data[_metadata_variable_name] and isinstance(
data[_metadata_variable_name]["guardrails"], list
):
# Merge unique guardrails
existing = data[_metadata_variable_name]["guardrails"]
for g in request_body_guardrails:
if g not in existing:
existing.append(g)
data[_metadata_variable_name]["guardrails"].extend(request_body_guardrails)
else:
data[_metadata_variable_name]["guardrails"] = list(request_body_guardrails)
data[_metadata_variable_name]["guardrails"] = request_body_guardrails
#########################################################################################
if "guardrail_config" in data:

View File

@ -26,5 +26,3 @@ if exit_code != 0:
verbose_proxy_logger.error(
f"'prisma generate' stderr: {result.stderr}"
) # Log stderr
sys.exit(exit_code)

View File

@ -187,7 +187,6 @@ class ProxyInitializationHelpers:
ssl_certfile_path: str,
ssl_keyfile_path: str,
max_requests_before_restart: Optional[int] = None,
keepalive_timeout: Optional[int] = None,
):
"""
Run litellm with `gunicorn`
@ -268,10 +267,6 @@ class ProxyInitializationHelpers:
"access_log_format": '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s',
}
# Optional: set keepalive timeout if specified by user
if keepalive_timeout is not None:
gunicorn_options["keepalive"] = keepalive_timeout
# Optional: recycle workers after N requests to mitigate memory growth
if max_requests_before_restart is not None:
gunicorn_options["max_requests"] = max_requests_before_restart
@ -494,7 +489,7 @@ class ProxyInitializationHelpers:
"--keepalive_timeout",
default=None,
type=int,
help="Set the keepalive timeout in seconds. For Uvicorn: timeout_keep_alive parameter. For Gunicorn: keepalive parameter. Default: Uvicorn uses ~75s, Gunicorn uses 90s",
help="Set the uvicorn keepalive timeout in seconds (uvicorn timeout_keep_alive parameter)",
envvar="KEEPALIVE_TIMEOUT",
)
@click.option(
@ -864,7 +859,6 @@ def run_server( # noqa: PLR0915
ssl_certfile_path=ssl_certfile_path,
ssl_keyfile_path=ssl_keyfile_path,
max_requests_before_restart=max_requests_before_restart,
keepalive_timeout=keepalive_timeout,
)
elif run_hypercorn is True:
ProxyInitializationHelpers._init_hypercorn_server(

View File

@ -256,9 +256,7 @@ async def video_status(
# Resolve model_name from model_id if available
# This allows the router to automatically inject litellm_params from the model config
if model_id_from_decoded and llm_router:
resolved_model = llm_router.resolve_model_name_from_model_id(
model_id_from_decoded, custom_llm_provider=provider_from_id
)
resolved_model = llm_router.resolve_model_name_from_model_id(model_id_from_decoded)
if resolved_model:
data["model"] = resolved_model
@ -356,9 +354,7 @@ async def video_content(
# Resolve model_name from model_id if available
# This allows the router to automatically inject litellm_params from the model config
if model_id_from_decoded and llm_router:
resolved_model = llm_router.resolve_model_name_from_model_id(
model_id_from_decoded, custom_llm_provider=provider_from_id
)
resolved_model = llm_router.resolve_model_name_from_model_id(model_id_from_decoded)
if resolved_model:
data["model"] = resolved_model
# Process request using ProxyBaseLLMRequestProcessing
@ -470,9 +466,7 @@ async def video_remix(
# Resolve model_name from model_id if available
# This allows the router to automatically inject litellm_params from the model config
if model_id_from_decoded and llm_router:
resolved_model = llm_router.resolve_model_name_from_model_id(
model_id_from_decoded, custom_llm_provider=provider_from_id
)
resolved_model = llm_router.resolve_model_name_from_model_id(model_id_from_decoded)
if resolved_model:
data["model"] = resolved_model

View File

@ -6971,7 +6971,7 @@ class Router:
return candidate_id in self.model_id_to_deployment_index_map
def resolve_model_name_from_model_id(
self, model_id: Optional[str], custom_llm_provider: Optional[str] = None
self, model_id: Optional[str]
) -> Optional[str]:
"""
Resolve model_name from model_id.
@ -6981,15 +6981,12 @@ class Router:
Strategy:
1. First, check if model_id directly matches a model_name or deployment ID
2. If custom_llm_provider is provided, check with provider prefix
3. Search through router's model_list to find a match by litellm_params.model
4. If custom_llm_provider is provided, try to find a wildcard pattern match
5. Return the model_name if found, None otherwise
2. If not, search through router's model_list to find a match by litellm_params.model
3. Return the model_name if found, None otherwise
Args:
model_id: The model_id extracted from decoded video_id
(could be model_name or litellm_params.model value)
custom_llm_provider: The provider name (e.g., "vertex_ai") for wildcard matching
Returns:
model_name if found, None otherwise. If None, the request will fall through
@ -7002,26 +6999,15 @@ class Router:
if model_id in self.model_names or self.has_model_id(model_id):
return model_id
# Strategy 2: Check with provider prefix (e.g., "vertex_ai/veo-3.0-generate-preview")
if custom_llm_provider:
full_model_name = f"{custom_llm_provider}/{model_id}"
if full_model_name in self.model_names or self.has_model_id(full_model_name):
return full_model_name
# Strategy 3: Search through router's model_list to find by litellm_params.model
# Strategy 2: Search through router's model_list to find by litellm_params.model
all_models = self.get_model_list(model_name=None)
if not all_models:
return None
# First pass: exact matches (non-wildcard)
for deployment in all_models:
litellm_params = deployment.get("litellm_params", {})
actual_model = litellm_params.get("model")
# Skip wildcard patterns in first pass
if actual_model and actual_model.endswith("/*"):
continue
# Match by exact match or by checking if actual_model ends with /model_id or :model_id
# e.g., model_id="veo-2.0-generate-001" matches actual_model="vertex_ai/veo-2.0-generate-001"
matches = (
@ -7035,19 +7021,6 @@ class Router:
if model_name:
return model_name
# Strategy 4: Wildcard patterns using PatternMatchRouter
# For video status/content, we need to match model_id like "veo-3.0-generate-preview"
# to wildcard patterns like "vertex_ai/*"
if custom_llm_provider:
full_model_name = f"{custom_llm_provider}/{model_id}"
pattern_deployments = self.pattern_router.route(full_model_name)
if pattern_deployments:
# Return the first matching wildcard model_name
for pattern_deployment in pattern_deployments:
matched_model_name = pattern_deployment.get("model_name")
if matched_model_name:
return matched_model_name
# No match found
return None

View File

@ -10201,48 +10201,6 @@
"mode": "completion",
"output_cost_per_token": 5e-07
},
"deepseek-v3-2-251201": {
"input_cost_per_token": 0.0,
"litellm_provider": "volcengine",
"max_input_tokens": 98304,
"max_output_tokens": 32768,
"max_tokens": 32768,
"mode": "chat",
"output_cost_per_token": 0.0,
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"glm-4-7-251222": {
"input_cost_per_token": 0.0,
"litellm_provider": "volcengine",
"max_input_tokens": 204800,
"max_output_tokens": 131072,
"max_tokens": 131072,
"mode": "chat",
"output_cost_per_token": 0.0,
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"kimi-k2-thinking-251104": {
"input_cost_per_token": 0.0,
"litellm_provider": "volcengine",
"max_input_tokens": 229376,
"max_output_tokens": 32768,
"max_tokens": 32768,
"mode": "chat",
"output_cost_per_token": 0.0,
"supports_assistant_prefill": true,
"supports_function_calling": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_tool_choice": true
},
"doubao-embedding": {
"input_cost_per_token": 0.0,
"litellm_provider": "volcengine",

38
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 2.2.0 and should not be changed by hand.
[[package]]
name = "aiofiles"
@ -525,36 +525,36 @@ files = [
[[package]]
name = "boto3"
version = "1.40.61"
version = "1.36.0"
description = "The AWS SDK for Python"
optional = true
python-versions = ">=3.9"
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"proxy\""
files = [
{file = "boto3-1.40.61-py3-none-any.whl", hash = "sha256:6b9c57b2a922b5d8c17766e29ed792586a818098efe84def27c8f582b33f898c"},
{file = "boto3-1.40.61.tar.gz", hash = "sha256:d6c56277251adf6c2bdd25249feae625abe4966831676689ff23b4694dea5b12"},
{file = "boto3-1.36.0-py3-none-any.whl", hash = "sha256:d0ca7a58ce25701a52232cc8df9d87854824f1f2964b929305722ebc7959d5a9"},
{file = "boto3-1.36.0.tar.gz", hash = "sha256:159898f51c2997a12541c0e02d6e5a8fe2993ddb307b9478fd9a339f98b57e00"},
]
[package.dependencies]
botocore = ">=1.40.61,<1.41.0"
botocore = ">=1.36.0,<1.37.0"
jmespath = ">=0.7.1,<2.0.0"
s3transfer = ">=0.14.0,<0.15.0"
s3transfer = ">=0.11.0,<0.12.0"
[package.extras]
crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
[[package]]
name = "botocore"
version = "1.40.76"
version = "1.36.26"
description = "Low-level, data-driven core of boto 3."
optional = true
python-versions = ">=3.9"
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"proxy\""
files = [
{file = "botocore-1.40.76-py3-none-any.whl", hash = "sha256:fe425d386e48ac64c81cbb4a7181688d813df2e2b4c78b95ebe833c9e868c6f4"},
{file = "botocore-1.40.76.tar.gz", hash = "sha256:2b16024d68b29b973005adfb5039adfe9099ebe772d40a90ca89f2e165c495dc"},
{file = "botocore-1.36.26-py3-none-any.whl", hash = "sha256:4e3f19913887a58502e71ef8d696fe7eaa54de7813ff73390cd5883f837dfa6e"},
{file = "botocore-1.36.26.tar.gz", hash = "sha256:4a63bcef7ecf6146fd3a61dc4f9b33b7473b49bdaf1770e9aaca6eee0c9eab62"},
]
[package.dependencies]
@ -566,7 +566,7 @@ urllib3 = [
]
[package.extras]
crt = ["awscrt (==0.28.4)"]
crt = ["awscrt (==0.23.8)"]
[[package]]
name = "cachetools"
@ -6255,22 +6255,22 @@ files = [
[[package]]
name = "s3transfer"
version = "0.14.0"
version = "0.11.3"
description = "An Amazon S3 Transfer Manager"
optional = true
python-versions = ">=3.9"
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"proxy\""
files = [
{file = "s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456"},
{file = "s3transfer-0.14.0.tar.gz", hash = "sha256:eff12264e7c8b4985074ccce27a3b38a485bb7f7422cc8046fee9be4983e4125"},
{file = "s3transfer-0.11.3-py3-none-any.whl", hash = "sha256:ca855bdeb885174b5ffa95b9913622459d4ad8e331fc98eb01e6d5eb6a30655d"},
{file = "s3transfer-0.11.3.tar.gz", hash = "sha256:edae4977e3a122445660c7c114bba949f9d191bae3b34a096f18a1c8c354527a"},
]
[package.dependencies]
botocore = ">=1.37.4,<2.0a.0"
botocore = ">=1.36.0,<2.0a.0"
[package.extras]
crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"]
crt = ["botocore[crt] (>=1.36.0,<2.0a.0)"]
[[package]]
name = "scikit-learn"
@ -7981,4 +7981,4 @@ utils = ["numpydoc"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.9,<4.0"
content-hash = "f391c702cf58ef2ba7641acdc3ae13d7c8e672faede68c0a624bd2ba0fb46b12"
content-hash = "ea62b77c662ab9fc486e421c576f0868bcde16d62a24703ee1f4916a0465ffb2"

View File

@ -56,7 +56,7 @@ google-cloud-iam = {version = "^2.19.1", optional = true}
resend = {version = ">=0.8.0", optional = true}
pynacl = {version = "^1.5.0", optional = true}
websockets = {version = "^15.0.1", optional = true}
boto3 = {version = "1.40.61", optional = true}
boto3 = {version = "1.36.0", optional = true}
redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"}
mcp = {version = "^1.21.2", optional = true, python = ">=3.10"}
litellm-proxy-extras = {version = "0.4.22", optional = true}

View File

@ -10,7 +10,7 @@ uvicorn==0.31.1 # server dep
gunicorn==23.0.0 # server dep
fastuuid==0.13.5 # for uuid4
uvloop==0.21.0 # uvicorn dep, gives us much better performance under load
boto3==1.40.61 # aws bedrock/sagemaker calls
boto3==1.36.0 # aws bedrock/sagemaker calls
redis==5.2.1 # redis caching
prisma==0.11.0 # for db
nodejs-wheel-binaries==24.12.0 ## required by prisma for migrations, prevents runtime download (updated from nodejs-bin for security fixes)
@ -59,7 +59,7 @@ click==8.1.7 # for proxy cli
rich==13.7.1 # for litellm proxy cli
jinja2==3.1.6 # for prompt templates
aiohttp==3.13.3 # for network calls
aioboto3==15.5.0 # for async sagemaker calls
aioboto3==13.4.0 # for async sagemaker calls
tenacity==8.5.0 # for retrying requests, when litellm.num_retries set
pydantic>=2.11,<3 # proxy + openai req. + mcp
jsonschema>=4.23.0,<5.0.0 # validating json schema - aligned with openapi-core + mcp

File diff suppressed because one or more lines are too long

View File

@ -483,75 +483,6 @@ class TestProxyInitializationHelpers:
# Verify that uvicorn.run was called again
mock_uvicorn_run.assert_called_once()
@patch("litellm.proxy.proxy_cli.ProxyInitializationHelpers._run_gunicorn_server")
@patch("builtins.print")
def test_gunicorn_keepalive_timeout_flag(self, mock_print, mock_gunicorn):
"""Test that the keepalive_timeout flag is properly passed to Gunicorn"""
from click.testing import CliRunner
from litellm.proxy.proxy_cli import run_server
runner = CliRunner()
mock_app = MagicMock()
mock_proxy_config = MagicMock()
mock_key_mgmt = MagicMock()
mock_save_worker_config = MagicMock()
with patch.dict(
"sys.modules",
{
"proxy_server": MagicMock(
app=mock_app,
ProxyConfig=mock_proxy_config,
KeyManagementSettings=mock_key_mgmt,
save_worker_config=mock_save_worker_config,
)
},
):
result = runner.invoke(
run_server, ["--local", "--run_gunicorn", "--keepalive_timeout", "120"]
)
assert result.exit_code == 0
# Verify _run_gunicorn_server was called with keepalive_timeout
mock_gunicorn.assert_called_once()
call_kwargs = mock_gunicorn.call_args.kwargs
assert call_kwargs["keepalive_timeout"] == 120
@patch("litellm.proxy.proxy_cli.ProxyInitializationHelpers._run_gunicorn_server")
@patch("builtins.print")
def test_gunicorn_keepalive_default(self, mock_print, mock_gunicorn):
"""Test that Gunicorn uses default 90s when keepalive_timeout not specified"""
from click.testing import CliRunner
from litellm.proxy.proxy_cli import run_server
runner = CliRunner()
mock_app = MagicMock()
mock_proxy_config = MagicMock()
mock_key_mgmt = MagicMock()
mock_save_worker_config = MagicMock()
with patch.dict(
"sys.modules",
{
"proxy_server": MagicMock(
app=mock_app,
ProxyConfig=mock_proxy_config,
KeyManagementSettings=mock_key_mgmt,
save_worker_config=mock_save_worker_config,
)
},
):
result = runner.invoke(run_server, ["--local", "--run_gunicorn"])
assert result.exit_code == 0
# Verify default behavior (keepalive_timeout is None, Gunicorn will use 90)
call_kwargs = mock_gunicorn.call_args.kwargs
assert call_kwargs.get("keepalive_timeout") is None
class TestHealthAppFactory:
"""Test cases for the health app factory module"""

View File

@ -2054,190 +2054,3 @@ async def test_aguardrail():
assert result["result"] == "success"
assert result["selected_guardrail"]["id"] == "guardrail-1"
def test_resolve_model_name_from_model_id_wildcard_pattern():
"""
Test that resolve_model_name_from_model_id correctly resolves model names
for wildcard patterns using PatternMatchRouter.
This is critical for video status/content endpoints where model_id extracted
from video_id (e.g., "veo-3.0-generate-preview") needs to match wildcard
patterns like "vertex_ai/*" to inject credentials from the model config.
"""
# Set up router with wildcard pattern
router = litellm.Router(
model_list=[
{
"model_name": "vertex_ai/*",
"litellm_params": {
"model": "vertex_ai/*",
"vertex_project": "test-project",
"vertex_location": "us-central1",
},
},
{
"model_name": "specific-model",
"litellm_params": {
"model": "vertex_ai/gemini-pro",
"vertex_project": "specific-project",
"vertex_location": "us-east1",
},
},
],
)
# Test Case 1: Wildcard pattern matching with custom_llm_provider
# This simulates video_id like "vertex_ai:veo-3.0-generate-preview:..."
result = router.resolve_model_name_from_model_id(
model_id="veo-3.0-generate-preview",
custom_llm_provider="vertex_ai",
)
assert result == "vertex_ai/*", f"Expected 'vertex_ai/*', got '{result}'"
# Test Case 2: Different model name should also match wildcard
result = router.resolve_model_name_from_model_id(
model_id="gemini-2.0-flash",
custom_llm_provider="vertex_ai",
)
assert result == "vertex_ai/*", f"Expected 'vertex_ai/*', got '{result}'"
# Test Case 3: Without custom_llm_provider, should not match wildcard
result = router.resolve_model_name_from_model_id(
model_id="veo-3.0-generate-preview",
custom_llm_provider=None,
)
assert result is None, f"Expected None without provider, got '{result}'"
# Test Case 4: Exact model_name match should take precedence
result = router.resolve_model_name_from_model_id(
model_id="specific-model",
custom_llm_provider="vertex_ai",
)
assert result == "specific-model", f"Expected 'specific-model', got '{result}'"
def test_resolve_model_name_from_model_id_exact_match():
"""
Test that resolve_model_name_from_model_id correctly resolves exact model names.
"""
router = litellm.Router(
model_list=[
{
"model_name": "my-gpt-model",
"litellm_params": {
"model": "azure/gpt-4",
"api_key": "test-key",
},
},
{
"model_name": "veo-model",
"litellm_params": {
"model": "vertex_ai/veo-2.0-generate-001",
"vertex_project": "test-project",
},
},
],
)
# Test Case 1: Direct model_name match
result = router.resolve_model_name_from_model_id(model_id="my-gpt-model")
assert result == "my-gpt-model", f"Expected 'my-gpt-model', got '{result}'"
# Test Case 2: Match by litellm_params.model suffix
result = router.resolve_model_name_from_model_id(model_id="veo-2.0-generate-001")
assert result == "veo-model", f"Expected 'veo-model', got '{result}'"
# Test Case 3: Non-existent model should return None
result = router.resolve_model_name_from_model_id(model_id="non-existent-model")
assert result is None, f"Expected None, got '{result}'"
def test_resolve_model_name_from_model_id_provider_prefix():
"""
Test that resolve_model_name_from_model_id handles provider prefix correctly.
"""
router = litellm.Router(
model_list=[
{
"model_name": "vertex_ai/gemini-pro",
"litellm_params": {
"model": "vertex_ai/gemini-pro",
"vertex_project": "test-project",
},
},
],
)
# Test Case 1: Full model name with provider prefix as model_name
result = router.resolve_model_name_from_model_id(
model_id="vertex_ai/gemini-pro",
custom_llm_provider=None,
)
assert result == "vertex_ai/gemini-pro", f"Expected 'vertex_ai/gemini-pro', got '{result}'"
# Test Case 2: Model ID with provider prefix constructed from custom_llm_provider
result = router.resolve_model_name_from_model_id(
model_id="gemini-pro",
custom_llm_provider="vertex_ai",
)
assert result == "vertex_ai/gemini-pro", f"Expected 'vertex_ai/gemini-pro', got '{result}'"
def test_resolve_model_name_from_model_id_multiple_wildcards():
"""
Test that resolve_model_name_from_model_id works with multiple wildcard patterns.
"""
router = litellm.Router(
model_list=[
{
"model_name": "vertex_ai/*",
"litellm_params": {
"model": "vertex_ai/*",
"vertex_project": "vertex-project",
},
},
{
"model_name": "openai/*",
"litellm_params": {
"model": "openai/*",
"api_key": "openai-key",
},
},
{
"model_name": "anthropic/*",
"litellm_params": {
"model": "anthropic/*",
"api_key": "anthropic-key",
},
},
],
)
# Test Case 1: Match vertex_ai wildcard
result = router.resolve_model_name_from_model_id(
model_id="veo-3.0-generate-preview",
custom_llm_provider="vertex_ai",
)
assert result == "vertex_ai/*", f"Expected 'vertex_ai/*', got '{result}'"
# Test Case 2: Match openai wildcard
result = router.resolve_model_name_from_model_id(
model_id="gpt-4o",
custom_llm_provider="openai",
)
assert result == "openai/*", f"Expected 'openai/*', got '{result}'"
# Test Case 3: Match anthropic wildcard
result = router.resolve_model_name_from_model_id(
model_id="claude-3-opus",
custom_llm_provider="anthropic",
)
assert result == "anthropic/*", f"Expected 'anthropic/*', got '{result}'"
# Test Case 4: Non-matching provider should return None
result = router.resolve_model_name_from_model_id(
model_id="some-model",
custom_llm_provider="bedrock",
)
assert result is None, f"Expected None for non-matching provider, got '{result}'"