Revert "Litellm staging 01 15 2026"
This commit is contained in:
parent
3e9e65c123
commit
7aba0f738a
@ -144,8 +144,8 @@ jobs:
|
||||
pip install "google-generativeai==0.3.2"
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
pip install pyarrow
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "aioboto3==15.5.0"
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "aioboto3==13.4.0"
|
||||
pip install langchain
|
||||
pip install lunary==0.2.5
|
||||
pip install "azure-identity==1.16.1"
|
||||
@ -260,8 +260,8 @@ jobs:
|
||||
pip install "google-generativeai==0.3.2"
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
pip install pyarrow
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "aioboto3==15.5.0"
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "aioboto3==13.4.0"
|
||||
pip install langchain
|
||||
pip install lunary==0.2.5
|
||||
pip install "azure-identity==1.16.1"
|
||||
@ -367,8 +367,8 @@ jobs:
|
||||
pip install "google-generativeai==0.3.2"
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
pip install pyarrow
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "aioboto3==15.5.0"
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "aioboto3==13.4.0"
|
||||
pip install langchain
|
||||
pip install lunary==0.2.5
|
||||
pip install "azure-identity==1.16.1"
|
||||
@ -637,8 +637,8 @@ jobs:
|
||||
pip install "google-generativeai==0.3.2"
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
pip install pyarrow
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "aioboto3==15.5.0"
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "aioboto3==13.4.0"
|
||||
pip install langchain
|
||||
pip install "langfuse>=2.0.0"
|
||||
pip install "logfire==0.29.0"
|
||||
@ -759,8 +759,8 @@ jobs:
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
pip install "google-genai==1.22.0"
|
||||
pip install pyarrow
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "aioboto3==15.5.0"
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "aioboto3==13.4.0"
|
||||
pip install langchain
|
||||
pip install lunary==0.2.5
|
||||
pip install "azure-identity==1.16.1"
|
||||
@ -865,8 +865,8 @@ jobs:
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
pip install "google-genai==1.22.0"
|
||||
pip install pyarrow
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "aioboto3==15.5.0"
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "aioboto3==13.4.0"
|
||||
pip install langchain
|
||||
pip install lunary==0.2.5
|
||||
pip install "azure-identity==1.16.1"
|
||||
@ -972,8 +972,8 @@ jobs:
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
pip install "google-genai==1.22.0"
|
||||
pip install pyarrow
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "aioboto3==15.5.0"
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "aioboto3==13.4.0"
|
||||
pip install langchain
|
||||
pip install lunary==0.2.5
|
||||
pip install "azure-identity==1.16.1"
|
||||
@ -1198,7 +1198,7 @@ jobs:
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
pip install "respx==0.22.0"
|
||||
pip install "pydantic==2.10.2"
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "boto3==1.36.0"
|
||||
# Run pytest and generate JUnit XML report
|
||||
- run:
|
||||
name: Run tests
|
||||
@ -1879,7 +1879,7 @@ jobs:
|
||||
pip install aiohttp
|
||||
pip install openai
|
||||
pip install click
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "boto3==1.36.0"
|
||||
pip install jinja2
|
||||
pip install "tokenizers==0.20.0"
|
||||
pip install "uvloop==0.21.0"
|
||||
@ -2176,8 +2176,8 @@ jobs:
|
||||
pip install "google-generativeai==0.3.2"
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
pip install pyarrow
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "aioboto3==15.5.0"
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "aioboto3==13.4.0"
|
||||
pip install langchain
|
||||
pip install "langfuse>=2.0.0"
|
||||
pip install "logfire==0.29.0"
|
||||
@ -2316,8 +2316,8 @@ jobs:
|
||||
pip install "google-generativeai==0.3.2"
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
pip install pyarrow
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "aioboto3==15.5.0"
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "aioboto3==13.4.0"
|
||||
pip install langchain
|
||||
pip install "langchain_mcp_adapters==0.0.5"
|
||||
pip install "langfuse>=2.0.0"
|
||||
@ -2462,8 +2462,8 @@ jobs:
|
||||
pip install "google-generativeai==0.3.2"
|
||||
pip install "google-cloud-aiplatform==1.43.0"
|
||||
pip install pyarrow
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "aioboto3==15.5.0"
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "aioboto3==13.4.0"
|
||||
pip install langchain
|
||||
pip install "langfuse>=2.0.0"
|
||||
pip install "logfire==0.29.0"
|
||||
@ -3118,7 +3118,7 @@ jobs:
|
||||
pip install "pytest==7.3.1"
|
||||
pip install "pytest-mock==3.12.0"
|
||||
pip install "pytest-asyncio==0.21.1"
|
||||
pip install "boto3==1.40.61"
|
||||
pip install "boto3==1.36.0"
|
||||
pip install "mypy==1.18.2"
|
||||
pip install pyarrow
|
||||
pip install numpydoc
|
||||
|
||||
@ -49,9 +49,7 @@ if TYPE_CHECKING:
|
||||
ProxyConfig = _ProxyConfig
|
||||
else:
|
||||
ProxyConfig = Any
|
||||
from litellm.proxy.litellm_pre_call_utils import (
|
||||
add_litellm_data_to_request,
|
||||
)
|
||||
from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
|
||||
from litellm.types.utils import ModelResponse, ModelResponseStream, Usage
|
||||
|
||||
|
||||
|
||||
@ -846,9 +846,7 @@ async def add_litellm_data_to_request( # noqa: PLR0915
|
||||
|
||||
# Add headers to metadata for guardrails to access (fixes #17477)
|
||||
# Guardrails use metadata["headers"] to access request headers (e.g., User-Agent)
|
||||
if _metadata_variable_name in data and isinstance(
|
||||
data[_metadata_variable_name], dict
|
||||
):
|
||||
if _metadata_variable_name in data and isinstance(data[_metadata_variable_name], dict):
|
||||
data[_metadata_variable_name]["headers"] = _headers
|
||||
|
||||
# check for forwardable headers
|
||||
@ -1316,9 +1314,6 @@ def move_guardrails_to_metadata(
|
||||
|
||||
- If guardrails set on API Key metadata then sets guardrails on request metadata
|
||||
- If guardrails not set on API key, then checks request metadata
|
||||
|
||||
Note: We copy (not pop) guardrails from data to metadata to ensure deployment-level
|
||||
guardrails merged by the router remain in kwargs for async_pre_call_deployment_hook.
|
||||
"""
|
||||
# Check key-level guardrails
|
||||
_add_guardrails_from_key_or_team_metadata(
|
||||
@ -1331,25 +1326,15 @@ def move_guardrails_to_metadata(
|
||||
#########################################################################################
|
||||
# User's might send "guardrails" in the request body, we need to add them to the request metadata.
|
||||
# Since downstream logic requires "guardrails" to be in the request metadata
|
||||
#
|
||||
# IMPORTANT: We copy instead of pop to preserve guardrails in kwargs for
|
||||
# async_pre_call_deployment_hook (custom_guardrail.py:290) which checks kwargs.get("guardrails").
|
||||
# This is the event-based approach for deployment-level guardrails.
|
||||
#########################################################################################
|
||||
if "guardrails" in data:
|
||||
request_body_guardrails = data.get("guardrails")
|
||||
if request_body_guardrails is None:
|
||||
return
|
||||
request_body_guardrails = data.pop("guardrails")
|
||||
if "guardrails" in data[_metadata_variable_name] and isinstance(
|
||||
data[_metadata_variable_name]["guardrails"], list
|
||||
):
|
||||
# Merge unique guardrails
|
||||
existing = data[_metadata_variable_name]["guardrails"]
|
||||
for g in request_body_guardrails:
|
||||
if g not in existing:
|
||||
existing.append(g)
|
||||
data[_metadata_variable_name]["guardrails"].extend(request_body_guardrails)
|
||||
else:
|
||||
data[_metadata_variable_name]["guardrails"] = list(request_body_guardrails)
|
||||
data[_metadata_variable_name]["guardrails"] = request_body_guardrails
|
||||
|
||||
#########################################################################################
|
||||
if "guardrail_config" in data:
|
||||
|
||||
@ -26,5 +26,3 @@ if exit_code != 0:
|
||||
verbose_proxy_logger.error(
|
||||
f"'prisma generate' stderr: {result.stderr}"
|
||||
) # Log stderr
|
||||
|
||||
sys.exit(exit_code)
|
||||
@ -187,7 +187,6 @@ class ProxyInitializationHelpers:
|
||||
ssl_certfile_path: str,
|
||||
ssl_keyfile_path: str,
|
||||
max_requests_before_restart: Optional[int] = None,
|
||||
keepalive_timeout: Optional[int] = None,
|
||||
):
|
||||
"""
|
||||
Run litellm with `gunicorn`
|
||||
@ -268,10 +267,6 @@ class ProxyInitializationHelpers:
|
||||
"access_log_format": '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s',
|
||||
}
|
||||
|
||||
# Optional: set keepalive timeout if specified by user
|
||||
if keepalive_timeout is not None:
|
||||
gunicorn_options["keepalive"] = keepalive_timeout
|
||||
|
||||
# Optional: recycle workers after N requests to mitigate memory growth
|
||||
if max_requests_before_restart is not None:
|
||||
gunicorn_options["max_requests"] = max_requests_before_restart
|
||||
@ -494,7 +489,7 @@ class ProxyInitializationHelpers:
|
||||
"--keepalive_timeout",
|
||||
default=None,
|
||||
type=int,
|
||||
help="Set the keepalive timeout in seconds. For Uvicorn: timeout_keep_alive parameter. For Gunicorn: keepalive parameter. Default: Uvicorn uses ~75s, Gunicorn uses 90s",
|
||||
help="Set the uvicorn keepalive timeout in seconds (uvicorn timeout_keep_alive parameter)",
|
||||
envvar="KEEPALIVE_TIMEOUT",
|
||||
)
|
||||
@click.option(
|
||||
@ -864,7 +859,6 @@ def run_server( # noqa: PLR0915
|
||||
ssl_certfile_path=ssl_certfile_path,
|
||||
ssl_keyfile_path=ssl_keyfile_path,
|
||||
max_requests_before_restart=max_requests_before_restart,
|
||||
keepalive_timeout=keepalive_timeout,
|
||||
)
|
||||
elif run_hypercorn is True:
|
||||
ProxyInitializationHelpers._init_hypercorn_server(
|
||||
|
||||
@ -256,9 +256,7 @@ async def video_status(
|
||||
# Resolve model_name from model_id if available
|
||||
# This allows the router to automatically inject litellm_params from the model config
|
||||
if model_id_from_decoded and llm_router:
|
||||
resolved_model = llm_router.resolve_model_name_from_model_id(
|
||||
model_id_from_decoded, custom_llm_provider=provider_from_id
|
||||
)
|
||||
resolved_model = llm_router.resolve_model_name_from_model_id(model_id_from_decoded)
|
||||
if resolved_model:
|
||||
data["model"] = resolved_model
|
||||
|
||||
@ -356,9 +354,7 @@ async def video_content(
|
||||
# Resolve model_name from model_id if available
|
||||
# This allows the router to automatically inject litellm_params from the model config
|
||||
if model_id_from_decoded and llm_router:
|
||||
resolved_model = llm_router.resolve_model_name_from_model_id(
|
||||
model_id_from_decoded, custom_llm_provider=provider_from_id
|
||||
)
|
||||
resolved_model = llm_router.resolve_model_name_from_model_id(model_id_from_decoded)
|
||||
if resolved_model:
|
||||
data["model"] = resolved_model
|
||||
# Process request using ProxyBaseLLMRequestProcessing
|
||||
@ -470,9 +466,7 @@ async def video_remix(
|
||||
# Resolve model_name from model_id if available
|
||||
# This allows the router to automatically inject litellm_params from the model config
|
||||
if model_id_from_decoded and llm_router:
|
||||
resolved_model = llm_router.resolve_model_name_from_model_id(
|
||||
model_id_from_decoded, custom_llm_provider=provider_from_id
|
||||
)
|
||||
resolved_model = llm_router.resolve_model_name_from_model_id(model_id_from_decoded)
|
||||
if resolved_model:
|
||||
data["model"] = resolved_model
|
||||
|
||||
|
||||
@ -6971,7 +6971,7 @@ class Router:
|
||||
return candidate_id in self.model_id_to_deployment_index_map
|
||||
|
||||
def resolve_model_name_from_model_id(
|
||||
self, model_id: Optional[str], custom_llm_provider: Optional[str] = None
|
||||
self, model_id: Optional[str]
|
||||
) -> Optional[str]:
|
||||
"""
|
||||
Resolve model_name from model_id.
|
||||
@ -6981,15 +6981,12 @@ class Router:
|
||||
|
||||
Strategy:
|
||||
1. First, check if model_id directly matches a model_name or deployment ID
|
||||
2. If custom_llm_provider is provided, check with provider prefix
|
||||
3. Search through router's model_list to find a match by litellm_params.model
|
||||
4. If custom_llm_provider is provided, try to find a wildcard pattern match
|
||||
5. Return the model_name if found, None otherwise
|
||||
2. If not, search through router's model_list to find a match by litellm_params.model
|
||||
3. Return the model_name if found, None otherwise
|
||||
|
||||
Args:
|
||||
model_id: The model_id extracted from decoded video_id
|
||||
(could be model_name or litellm_params.model value)
|
||||
custom_llm_provider: The provider name (e.g., "vertex_ai") for wildcard matching
|
||||
|
||||
Returns:
|
||||
model_name if found, None otherwise. If None, the request will fall through
|
||||
@ -7002,26 +6999,15 @@ class Router:
|
||||
if model_id in self.model_names or self.has_model_id(model_id):
|
||||
return model_id
|
||||
|
||||
# Strategy 2: Check with provider prefix (e.g., "vertex_ai/veo-3.0-generate-preview")
|
||||
if custom_llm_provider:
|
||||
full_model_name = f"{custom_llm_provider}/{model_id}"
|
||||
if full_model_name in self.model_names or self.has_model_id(full_model_name):
|
||||
return full_model_name
|
||||
|
||||
# Strategy 3: Search through router's model_list to find by litellm_params.model
|
||||
# Strategy 2: Search through router's model_list to find by litellm_params.model
|
||||
all_models = self.get_model_list(model_name=None)
|
||||
if not all_models:
|
||||
return None
|
||||
|
||||
# First pass: exact matches (non-wildcard)
|
||||
for deployment in all_models:
|
||||
litellm_params = deployment.get("litellm_params", {})
|
||||
actual_model = litellm_params.get("model")
|
||||
|
||||
# Skip wildcard patterns in first pass
|
||||
if actual_model and actual_model.endswith("/*"):
|
||||
continue
|
||||
|
||||
# Match by exact match or by checking if actual_model ends with /model_id or :model_id
|
||||
# e.g., model_id="veo-2.0-generate-001" matches actual_model="vertex_ai/veo-2.0-generate-001"
|
||||
matches = (
|
||||
@ -7035,19 +7021,6 @@ class Router:
|
||||
if model_name:
|
||||
return model_name
|
||||
|
||||
# Strategy 4: Wildcard patterns using PatternMatchRouter
|
||||
# For video status/content, we need to match model_id like "veo-3.0-generate-preview"
|
||||
# to wildcard patterns like "vertex_ai/*"
|
||||
if custom_llm_provider:
|
||||
full_model_name = f"{custom_llm_provider}/{model_id}"
|
||||
pattern_deployments = self.pattern_router.route(full_model_name)
|
||||
if pattern_deployments:
|
||||
# Return the first matching wildcard model_name
|
||||
for pattern_deployment in pattern_deployments:
|
||||
matched_model_name = pattern_deployment.get("model_name")
|
||||
if matched_model_name:
|
||||
return matched_model_name
|
||||
|
||||
# No match found
|
||||
return None
|
||||
|
||||
|
||||
@ -10201,48 +10201,6 @@
|
||||
"mode": "completion",
|
||||
"output_cost_per_token": 5e-07
|
||||
},
|
||||
"deepseek-v3-2-251201": {
|
||||
"input_cost_per_token": 0.0,
|
||||
"litellm_provider": "volcengine",
|
||||
"max_input_tokens": 98304,
|
||||
"max_output_tokens": 32768,
|
||||
"max_tokens": 32768,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 0.0,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"glm-4-7-251222": {
|
||||
"input_cost_per_token": 0.0,
|
||||
"litellm_provider": "volcengine",
|
||||
"max_input_tokens": 204800,
|
||||
"max_output_tokens": 131072,
|
||||
"max_tokens": 131072,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 0.0,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"kimi-k2-thinking-251104": {
|
||||
"input_cost_per_token": 0.0,
|
||||
"litellm_provider": "volcengine",
|
||||
"max_input_tokens": 229376,
|
||||
"max_output_tokens": 32768,
|
||||
"max_tokens": 32768,
|
||||
"mode": "chat",
|
||||
"output_cost_per_token": 0.0,
|
||||
"supports_assistant_prefill": true,
|
||||
"supports_function_calling": true,
|
||||
"supports_prompt_caching": true,
|
||||
"supports_reasoning": true,
|
||||
"supports_tool_choice": true
|
||||
},
|
||||
"doubao-embedding": {
|
||||
"input_cost_per_token": 0.0,
|
||||
"litellm_provider": "volcengine",
|
||||
|
||||
38
poetry.lock
generated
38
poetry.lock
generated
@ -1,4 +1,4 @@
|
||||
# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 2.2.0 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiofiles"
|
||||
@ -525,36 +525,36 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "boto3"
|
||||
version = "1.40.61"
|
||||
version = "1.36.0"
|
||||
description = "The AWS SDK for Python"
|
||||
optional = true
|
||||
python-versions = ">=3.9"
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
markers = "extra == \"proxy\""
|
||||
files = [
|
||||
{file = "boto3-1.40.61-py3-none-any.whl", hash = "sha256:6b9c57b2a922b5d8c17766e29ed792586a818098efe84def27c8f582b33f898c"},
|
||||
{file = "boto3-1.40.61.tar.gz", hash = "sha256:d6c56277251adf6c2bdd25249feae625abe4966831676689ff23b4694dea5b12"},
|
||||
{file = "boto3-1.36.0-py3-none-any.whl", hash = "sha256:d0ca7a58ce25701a52232cc8df9d87854824f1f2964b929305722ebc7959d5a9"},
|
||||
{file = "boto3-1.36.0.tar.gz", hash = "sha256:159898f51c2997a12541c0e02d6e5a8fe2993ddb307b9478fd9a339f98b57e00"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
botocore = ">=1.40.61,<1.41.0"
|
||||
botocore = ">=1.36.0,<1.37.0"
|
||||
jmespath = ">=0.7.1,<2.0.0"
|
||||
s3transfer = ">=0.14.0,<0.15.0"
|
||||
s3transfer = ">=0.11.0,<0.12.0"
|
||||
|
||||
[package.extras]
|
||||
crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
|
||||
|
||||
[[package]]
|
||||
name = "botocore"
|
||||
version = "1.40.76"
|
||||
version = "1.36.26"
|
||||
description = "Low-level, data-driven core of boto 3."
|
||||
optional = true
|
||||
python-versions = ">=3.9"
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
markers = "extra == \"proxy\""
|
||||
files = [
|
||||
{file = "botocore-1.40.76-py3-none-any.whl", hash = "sha256:fe425d386e48ac64c81cbb4a7181688d813df2e2b4c78b95ebe833c9e868c6f4"},
|
||||
{file = "botocore-1.40.76.tar.gz", hash = "sha256:2b16024d68b29b973005adfb5039adfe9099ebe772d40a90ca89f2e165c495dc"},
|
||||
{file = "botocore-1.36.26-py3-none-any.whl", hash = "sha256:4e3f19913887a58502e71ef8d696fe7eaa54de7813ff73390cd5883f837dfa6e"},
|
||||
{file = "botocore-1.36.26.tar.gz", hash = "sha256:4a63bcef7ecf6146fd3a61dc4f9b33b7473b49bdaf1770e9aaca6eee0c9eab62"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@ -566,7 +566,7 @@ urllib3 = [
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
crt = ["awscrt (==0.28.4)"]
|
||||
crt = ["awscrt (==0.23.8)"]
|
||||
|
||||
[[package]]
|
||||
name = "cachetools"
|
||||
@ -6255,22 +6255,22 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "s3transfer"
|
||||
version = "0.14.0"
|
||||
version = "0.11.3"
|
||||
description = "An Amazon S3 Transfer Manager"
|
||||
optional = true
|
||||
python-versions = ">=3.9"
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
markers = "extra == \"proxy\""
|
||||
files = [
|
||||
{file = "s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456"},
|
||||
{file = "s3transfer-0.14.0.tar.gz", hash = "sha256:eff12264e7c8b4985074ccce27a3b38a485bb7f7422cc8046fee9be4983e4125"},
|
||||
{file = "s3transfer-0.11.3-py3-none-any.whl", hash = "sha256:ca855bdeb885174b5ffa95b9913622459d4ad8e331fc98eb01e6d5eb6a30655d"},
|
||||
{file = "s3transfer-0.11.3.tar.gz", hash = "sha256:edae4977e3a122445660c7c114bba949f9d191bae3b34a096f18a1c8c354527a"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
botocore = ">=1.37.4,<2.0a.0"
|
||||
botocore = ">=1.36.0,<2.0a.0"
|
||||
|
||||
[package.extras]
|
||||
crt = ["botocore[crt] (>=1.37.4,<2.0a.0)"]
|
||||
crt = ["botocore[crt] (>=1.36.0,<2.0a.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "scikit-learn"
|
||||
@ -7981,4 +7981,4 @@ utils = ["numpydoc"]
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.9,<4.0"
|
||||
content-hash = "f391c702cf58ef2ba7641acdc3ae13d7c8e672faede68c0a624bd2ba0fb46b12"
|
||||
content-hash = "ea62b77c662ab9fc486e421c576f0868bcde16d62a24703ee1f4916a0465ffb2"
|
||||
|
||||
@ -56,7 +56,7 @@ google-cloud-iam = {version = "^2.19.1", optional = true}
|
||||
resend = {version = ">=0.8.0", optional = true}
|
||||
pynacl = {version = "^1.5.0", optional = true}
|
||||
websockets = {version = "^15.0.1", optional = true}
|
||||
boto3 = {version = "1.40.61", optional = true}
|
||||
boto3 = {version = "1.36.0", optional = true}
|
||||
redisvl = {version = "^0.4.1", optional = true, markers = "python_version >= '3.9' and python_version < '3.14'"}
|
||||
mcp = {version = "^1.21.2", optional = true, python = ">=3.10"}
|
||||
litellm-proxy-extras = {version = "0.4.22", optional = true}
|
||||
|
||||
@ -10,7 +10,7 @@ uvicorn==0.31.1 # server dep
|
||||
gunicorn==23.0.0 # server dep
|
||||
fastuuid==0.13.5 # for uuid4
|
||||
uvloop==0.21.0 # uvicorn dep, gives us much better performance under load
|
||||
boto3==1.40.61 # aws bedrock/sagemaker calls
|
||||
boto3==1.36.0 # aws bedrock/sagemaker calls
|
||||
redis==5.2.1 # redis caching
|
||||
prisma==0.11.0 # for db
|
||||
nodejs-wheel-binaries==24.12.0 ## required by prisma for migrations, prevents runtime download (updated from nodejs-bin for security fixes)
|
||||
@ -59,7 +59,7 @@ click==8.1.7 # for proxy cli
|
||||
rich==13.7.1 # for litellm proxy cli
|
||||
jinja2==3.1.6 # for prompt templates
|
||||
aiohttp==3.13.3 # for network calls
|
||||
aioboto3==15.5.0 # for async sagemaker calls
|
||||
aioboto3==13.4.0 # for async sagemaker calls
|
||||
tenacity==8.5.0 # for retrying requests, when litellm.num_retries set
|
||||
pydantic>=2.11,<3 # proxy + openai req. + mcp
|
||||
jsonschema>=4.23.0,<5.0.0 # validating json schema - aligned with openapi-core + mcp
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -483,75 +483,6 @@ class TestProxyInitializationHelpers:
|
||||
# Verify that uvicorn.run was called again
|
||||
mock_uvicorn_run.assert_called_once()
|
||||
|
||||
@patch("litellm.proxy.proxy_cli.ProxyInitializationHelpers._run_gunicorn_server")
|
||||
@patch("builtins.print")
|
||||
def test_gunicorn_keepalive_timeout_flag(self, mock_print, mock_gunicorn):
|
||||
"""Test that the keepalive_timeout flag is properly passed to Gunicorn"""
|
||||
from click.testing import CliRunner
|
||||
|
||||
from litellm.proxy.proxy_cli import run_server
|
||||
|
||||
runner = CliRunner()
|
||||
|
||||
mock_app = MagicMock()
|
||||
mock_proxy_config = MagicMock()
|
||||
mock_key_mgmt = MagicMock()
|
||||
mock_save_worker_config = MagicMock()
|
||||
|
||||
with patch.dict(
|
||||
"sys.modules",
|
||||
{
|
||||
"proxy_server": MagicMock(
|
||||
app=mock_app,
|
||||
ProxyConfig=mock_proxy_config,
|
||||
KeyManagementSettings=mock_key_mgmt,
|
||||
save_worker_config=mock_save_worker_config,
|
||||
)
|
||||
},
|
||||
):
|
||||
result = runner.invoke(
|
||||
run_server, ["--local", "--run_gunicorn", "--keepalive_timeout", "120"]
|
||||
)
|
||||
assert result.exit_code == 0
|
||||
|
||||
# Verify _run_gunicorn_server was called with keepalive_timeout
|
||||
mock_gunicorn.assert_called_once()
|
||||
call_kwargs = mock_gunicorn.call_args.kwargs
|
||||
assert call_kwargs["keepalive_timeout"] == 120
|
||||
|
||||
@patch("litellm.proxy.proxy_cli.ProxyInitializationHelpers._run_gunicorn_server")
|
||||
@patch("builtins.print")
|
||||
def test_gunicorn_keepalive_default(self, mock_print, mock_gunicorn):
|
||||
"""Test that Gunicorn uses default 90s when keepalive_timeout not specified"""
|
||||
from click.testing import CliRunner
|
||||
|
||||
from litellm.proxy.proxy_cli import run_server
|
||||
|
||||
runner = CliRunner()
|
||||
|
||||
mock_app = MagicMock()
|
||||
mock_proxy_config = MagicMock()
|
||||
mock_key_mgmt = MagicMock()
|
||||
mock_save_worker_config = MagicMock()
|
||||
|
||||
with patch.dict(
|
||||
"sys.modules",
|
||||
{
|
||||
"proxy_server": MagicMock(
|
||||
app=mock_app,
|
||||
ProxyConfig=mock_proxy_config,
|
||||
KeyManagementSettings=mock_key_mgmt,
|
||||
save_worker_config=mock_save_worker_config,
|
||||
)
|
||||
},
|
||||
):
|
||||
result = runner.invoke(run_server, ["--local", "--run_gunicorn"])
|
||||
assert result.exit_code == 0
|
||||
|
||||
# Verify default behavior (keepalive_timeout is None, Gunicorn will use 90)
|
||||
call_kwargs = mock_gunicorn.call_args.kwargs
|
||||
assert call_kwargs.get("keepalive_timeout") is None
|
||||
|
||||
|
||||
class TestHealthAppFactory:
|
||||
"""Test cases for the health app factory module"""
|
||||
|
||||
@ -2054,190 +2054,3 @@ async def test_aguardrail():
|
||||
|
||||
assert result["result"] == "success"
|
||||
assert result["selected_guardrail"]["id"] == "guardrail-1"
|
||||
|
||||
|
||||
def test_resolve_model_name_from_model_id_wildcard_pattern():
|
||||
"""
|
||||
Test that resolve_model_name_from_model_id correctly resolves model names
|
||||
for wildcard patterns using PatternMatchRouter.
|
||||
|
||||
This is critical for video status/content endpoints where model_id extracted
|
||||
from video_id (e.g., "veo-3.0-generate-preview") needs to match wildcard
|
||||
patterns like "vertex_ai/*" to inject credentials from the model config.
|
||||
"""
|
||||
# Set up router with wildcard pattern
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "vertex_ai/*",
|
||||
"litellm_params": {
|
||||
"model": "vertex_ai/*",
|
||||
"vertex_project": "test-project",
|
||||
"vertex_location": "us-central1",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "specific-model",
|
||||
"litellm_params": {
|
||||
"model": "vertex_ai/gemini-pro",
|
||||
"vertex_project": "specific-project",
|
||||
"vertex_location": "us-east1",
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
# Test Case 1: Wildcard pattern matching with custom_llm_provider
|
||||
# This simulates video_id like "vertex_ai:veo-3.0-generate-preview:..."
|
||||
result = router.resolve_model_name_from_model_id(
|
||||
model_id="veo-3.0-generate-preview",
|
||||
custom_llm_provider="vertex_ai",
|
||||
)
|
||||
assert result == "vertex_ai/*", f"Expected 'vertex_ai/*', got '{result}'"
|
||||
|
||||
# Test Case 2: Different model name should also match wildcard
|
||||
result = router.resolve_model_name_from_model_id(
|
||||
model_id="gemini-2.0-flash",
|
||||
custom_llm_provider="vertex_ai",
|
||||
)
|
||||
assert result == "vertex_ai/*", f"Expected 'vertex_ai/*', got '{result}'"
|
||||
|
||||
# Test Case 3: Without custom_llm_provider, should not match wildcard
|
||||
result = router.resolve_model_name_from_model_id(
|
||||
model_id="veo-3.0-generate-preview",
|
||||
custom_llm_provider=None,
|
||||
)
|
||||
assert result is None, f"Expected None without provider, got '{result}'"
|
||||
|
||||
# Test Case 4: Exact model_name match should take precedence
|
||||
result = router.resolve_model_name_from_model_id(
|
||||
model_id="specific-model",
|
||||
custom_llm_provider="vertex_ai",
|
||||
)
|
||||
assert result == "specific-model", f"Expected 'specific-model', got '{result}'"
|
||||
|
||||
|
||||
def test_resolve_model_name_from_model_id_exact_match():
|
||||
"""
|
||||
Test that resolve_model_name_from_model_id correctly resolves exact model names.
|
||||
"""
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "my-gpt-model",
|
||||
"litellm_params": {
|
||||
"model": "azure/gpt-4",
|
||||
"api_key": "test-key",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "veo-model",
|
||||
"litellm_params": {
|
||||
"model": "vertex_ai/veo-2.0-generate-001",
|
||||
"vertex_project": "test-project",
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
# Test Case 1: Direct model_name match
|
||||
result = router.resolve_model_name_from_model_id(model_id="my-gpt-model")
|
||||
assert result == "my-gpt-model", f"Expected 'my-gpt-model', got '{result}'"
|
||||
|
||||
# Test Case 2: Match by litellm_params.model suffix
|
||||
result = router.resolve_model_name_from_model_id(model_id="veo-2.0-generate-001")
|
||||
assert result == "veo-model", f"Expected 'veo-model', got '{result}'"
|
||||
|
||||
# Test Case 3: Non-existent model should return None
|
||||
result = router.resolve_model_name_from_model_id(model_id="non-existent-model")
|
||||
assert result is None, f"Expected None, got '{result}'"
|
||||
|
||||
|
||||
def test_resolve_model_name_from_model_id_provider_prefix():
|
||||
"""
|
||||
Test that resolve_model_name_from_model_id handles provider prefix correctly.
|
||||
"""
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "vertex_ai/gemini-pro",
|
||||
"litellm_params": {
|
||||
"model": "vertex_ai/gemini-pro",
|
||||
"vertex_project": "test-project",
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
# Test Case 1: Full model name with provider prefix as model_name
|
||||
result = router.resolve_model_name_from_model_id(
|
||||
model_id="vertex_ai/gemini-pro",
|
||||
custom_llm_provider=None,
|
||||
)
|
||||
assert result == "vertex_ai/gemini-pro", f"Expected 'vertex_ai/gemini-pro', got '{result}'"
|
||||
|
||||
# Test Case 2: Model ID with provider prefix constructed from custom_llm_provider
|
||||
result = router.resolve_model_name_from_model_id(
|
||||
model_id="gemini-pro",
|
||||
custom_llm_provider="vertex_ai",
|
||||
)
|
||||
assert result == "vertex_ai/gemini-pro", f"Expected 'vertex_ai/gemini-pro', got '{result}'"
|
||||
|
||||
|
||||
def test_resolve_model_name_from_model_id_multiple_wildcards():
|
||||
"""
|
||||
Test that resolve_model_name_from_model_id works with multiple wildcard patterns.
|
||||
"""
|
||||
router = litellm.Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "vertex_ai/*",
|
||||
"litellm_params": {
|
||||
"model": "vertex_ai/*",
|
||||
"vertex_project": "vertex-project",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "openai/*",
|
||||
"litellm_params": {
|
||||
"model": "openai/*",
|
||||
"api_key": "openai-key",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "anthropic/*",
|
||||
"litellm_params": {
|
||||
"model": "anthropic/*",
|
||||
"api_key": "anthropic-key",
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
# Test Case 1: Match vertex_ai wildcard
|
||||
result = router.resolve_model_name_from_model_id(
|
||||
model_id="veo-3.0-generate-preview",
|
||||
custom_llm_provider="vertex_ai",
|
||||
)
|
||||
assert result == "vertex_ai/*", f"Expected 'vertex_ai/*', got '{result}'"
|
||||
|
||||
# Test Case 2: Match openai wildcard
|
||||
result = router.resolve_model_name_from_model_id(
|
||||
model_id="gpt-4o",
|
||||
custom_llm_provider="openai",
|
||||
)
|
||||
assert result == "openai/*", f"Expected 'openai/*', got '{result}'"
|
||||
|
||||
# Test Case 3: Match anthropic wildcard
|
||||
result = router.resolve_model_name_from_model_id(
|
||||
model_id="claude-3-opus",
|
||||
custom_llm_provider="anthropic",
|
||||
)
|
||||
assert result == "anthropic/*", f"Expected 'anthropic/*', got '{result}'"
|
||||
|
||||
# Test Case 4: Non-matching provider should return None
|
||||
result = router.resolve_model_name_from_model_id(
|
||||
model_id="some-model",
|
||||
custom_llm_provider="bedrock",
|
||||
)
|
||||
assert result is None, f"Expected None for non-matching provider, got '{result}'"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user