Merge remote-tracking branch 'upstream/litellm_internal_staging' into codex/skills-containers-tenant-guard
# Conflicts: # litellm/proxy/auth/auth_utils.py
This commit is contained in:
commit
3dcb6bd3f9
@ -1 +1 @@
|
||||
litellm==1.83.5
|
||||
litellm==1.83.14
|
||||
|
||||
@ -11,6 +11,10 @@ from typing import Literal
|
||||
import litellm
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails._content_utils import (
|
||||
is_text_content_call_type,
|
||||
iter_message_text,
|
||||
)
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from fastapi import HTTPException
|
||||
@ -73,10 +77,9 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
|
||||
- check if user id part of blocked list
|
||||
"""
|
||||
self.print_verbose("Inside Banned Keyword List Pre-Call Hook")
|
||||
if call_type == "completion" and "messages" in data:
|
||||
for m in data["messages"]:
|
||||
if "content" in m and isinstance(m["content"], str):
|
||||
self.test_violation(test_str=m["content"])
|
||||
if is_text_content_call_type(call_type):
|
||||
for text in iter_message_text(data):
|
||||
self.test_violation(test_str=text)
|
||||
|
||||
except HTTPException as e:
|
||||
raise e
|
||||
@ -93,11 +96,16 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
response,
|
||||
):
|
||||
if isinstance(response, litellm.ModelResponse) and isinstance(
|
||||
response.choices[0], litellm.utils.Choices
|
||||
):
|
||||
for word in self.banned_keywords_list:
|
||||
self.test_violation(test_str=response.choices[0].message.content or "")
|
||||
if not isinstance(response, litellm.ModelResponse):
|
||||
return
|
||||
|
||||
for choice in response.choices:
|
||||
if not isinstance(choice, litellm.utils.Choices):
|
||||
continue
|
||||
message = getattr(choice, "message", None)
|
||||
content = getattr(message, "content", None)
|
||||
if isinstance(content, str):
|
||||
self.test_violation(test_str=content)
|
||||
|
||||
async def async_post_call_streaming_hook(
|
||||
self,
|
||||
|
||||
@ -12,6 +12,7 @@ import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails._content_utils import iter_message_text
|
||||
from litellm.types.utils import CallTypesLiteral
|
||||
|
||||
|
||||
@ -94,11 +95,9 @@ class _ENTERPRISE_GoogleTextModeration(CustomLogger):
|
||||
- Calls Google's Text Moderation API
|
||||
- Rejects request if it fails safety check
|
||||
"""
|
||||
if "messages" in data and isinstance(data["messages"], list):
|
||||
text = ""
|
||||
for m in data["messages"]: # assume messages is a list
|
||||
if "content" in m and isinstance(m["content"], str):
|
||||
text += m["content"]
|
||||
# Covers multimodal list content + Responses-API input.
|
||||
text = "".join(iter_message_text(data))
|
||||
if text:
|
||||
document = self.language_document(content=text, type_=self.document_type)
|
||||
|
||||
request = self.moderate_text_request(
|
||||
|
||||
@ -19,6 +19,7 @@ import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails._content_utils import iter_message_text
|
||||
from litellm.types.utils import CallTypesLiteral
|
||||
|
||||
|
||||
@ -37,11 +38,8 @@ class _ENTERPRISE_OpenAI_Moderation(CustomLogger):
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
call_type: CallTypesLiteral,
|
||||
):
|
||||
text = ""
|
||||
if "messages" in data and isinstance(data["messages"], list):
|
||||
for m in data["messages"]: # assume messages is a list
|
||||
if "content" in m and isinstance(m["content"], str):
|
||||
text += m["content"]
|
||||
# Covers multimodal list content + Responses-API input.
|
||||
text = "".join(iter_message_text(data))
|
||||
|
||||
from litellm.proxy.proxy_server import llm_router
|
||||
|
||||
|
||||
@ -18,6 +18,7 @@ from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_guardrail import CustomGuardrail
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails._content_utils import walk_user_text
|
||||
|
||||
GUARDRAIL_NAME = "hide_secrets"
|
||||
|
||||
@ -473,23 +474,19 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
|
||||
if await self.should_run_check(user_api_key_dict) is False:
|
||||
return
|
||||
|
||||
if "messages" in data and isinstance(data["messages"], list):
|
||||
for message in data["messages"]:
|
||||
if "content" in message and isinstance(message["content"], str):
|
||||
detected_secrets = self.scan_message_for_secrets(message["content"])
|
||||
# Covers multimodal list content + Responses-API input.
|
||||
def _redact_message_text(text: str) -> str:
|
||||
detected_secrets = self.scan_message_for_secrets(text)
|
||||
for secret in detected_secrets:
|
||||
text = text.replace(secret["value"], "[REDACTED]")
|
||||
if detected_secrets:
|
||||
secret_types = [secret["type"] for secret in detected_secrets]
|
||||
verbose_proxy_logger.warning(
|
||||
f"Detected and redacted secrets in message: {secret_types}"
|
||||
)
|
||||
return text
|
||||
|
||||
for secret in detected_secrets:
|
||||
message["content"] = message["content"].replace(
|
||||
secret["value"], "[REDACTED]"
|
||||
)
|
||||
|
||||
if len(detected_secrets) > 0:
|
||||
secret_types = [secret["type"] for secret in detected_secrets]
|
||||
verbose_proxy_logger.warning(
|
||||
f"Detected and redacted secrets in message: {secret_types}"
|
||||
)
|
||||
else:
|
||||
verbose_proxy_logger.debug("No secrets detected on input.")
|
||||
walk_user_text(data, _redact_message_text)
|
||||
|
||||
if "prompt" in data:
|
||||
if isinstance(data["prompt"], str):
|
||||
@ -504,11 +501,15 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
|
||||
f"Detected and redacted secrets in prompt: {secret_types}"
|
||||
)
|
||||
elif isinstance(data["prompt"], list):
|
||||
for item in data["prompt"]:
|
||||
# Index back into the list — assigning to ``item`` would only
|
||||
# rebind the loop variable and leave ``data["prompt"]``
|
||||
# carrying the unredacted secret.
|
||||
for idx, item in enumerate(data["prompt"]):
|
||||
if isinstance(item, str):
|
||||
detected_secrets = self.scan_message_for_secrets(item)
|
||||
for secret in detected_secrets:
|
||||
item = item.replace(secret["value"], "[REDACTED]")
|
||||
data["prompt"][idx] = item
|
||||
if len(detected_secrets) > 0:
|
||||
secret_types = [
|
||||
secret["type"] for secret in detected_secrets
|
||||
@ -517,31 +518,6 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
|
||||
f"Detected and redacted secrets in prompt: {secret_types}"
|
||||
)
|
||||
|
||||
if "input" in data:
|
||||
if isinstance(data["input"], str):
|
||||
detected_secrets = self.scan_message_for_secrets(data["input"])
|
||||
for secret in detected_secrets:
|
||||
data["input"] = data["input"].replace(secret["value"], "[REDACTED]")
|
||||
if len(detected_secrets) > 0:
|
||||
secret_types = [secret["type"] for secret in detected_secrets]
|
||||
verbose_proxy_logger.warning(
|
||||
f"Detected and redacted secrets in input: {secret_types}"
|
||||
)
|
||||
elif isinstance(data["input"], list):
|
||||
_input_in_request = data["input"]
|
||||
for idx, item in enumerate(_input_in_request):
|
||||
if isinstance(item, str):
|
||||
detected_secrets = self.scan_message_for_secrets(item)
|
||||
for secret in detected_secrets:
|
||||
_input_in_request[idx] = item.replace(
|
||||
secret["value"], "[REDACTED]"
|
||||
)
|
||||
if len(detected_secrets) > 0:
|
||||
secret_types = [
|
||||
secret["type"] for secret in detected_secrets
|
||||
]
|
||||
verbose_proxy_logger.warning(
|
||||
f"Detected and redacted secrets in input: {secret_types}"
|
||||
)
|
||||
verbose_proxy_logger.debug("Data after redacting input %s", data)
|
||||
# ``data["input"]`` (Responses API and embeddings/moderation) is
|
||||
# already covered by ``walk_user_text`` above.
|
||||
return
|
||||
|
||||
@ -16,7 +16,7 @@ Repository = "https://github.com/BerriAI/litellm"
|
||||
Documentation = "https://docs.litellm.ai"
|
||||
|
||||
[build-system]
|
||||
requires = ["uv_build==0.10.7"]
|
||||
requires = ["uv_build==0.11.8"]
|
||||
build-backend = "uv_build"
|
||||
|
||||
[tool.uv]
|
||||
|
||||
2054
litellm-js/proxy/package-lock.json
generated
Normal file
2054
litellm-js/proxy/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@ -4,11 +4,11 @@
|
||||
"deploy": "wrangler deploy --minify src/index.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"hono": "4.12.12",
|
||||
"hono": "4.12.16",
|
||||
"openai": "4.29.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@cloudflare/workers-types": "4.20240208.0",
|
||||
"wrangler": "3.32.0"
|
||||
"@cloudflare/workers-types": "4.20260501.1",
|
||||
"wrangler": "4.87.0"
|
||||
}
|
||||
}
|
||||
|
||||
8
litellm-js/spend-logs/package-lock.json
generated
8
litellm-js/spend-logs/package-lock.json
generated
@ -6,7 +6,7 @@
|
||||
"": {
|
||||
"dependencies": {
|
||||
"@hono/node-server": "1.19.13",
|
||||
"hono": "4.12.12"
|
||||
"hono": "4.12.16"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "20.19.25",
|
||||
@ -548,9 +548,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/hono": {
|
||||
"version": "4.12.12",
|
||||
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.12.tgz",
|
||||
"integrity": "sha512-p1JfQMKaceuCbpJKAPKVqyqviZdS0eUxH9v82oWo1kb9xjQ5wA6iP3FNVAPDFlz5/p7d45lO+BpSk1tuSZMF4Q==",
|
||||
"version": "4.12.16",
|
||||
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.16.tgz",
|
||||
"integrity": "sha512-jN0ZewiNAWSe5khM3EyCmBb250+b40wWbwNILNfEvq84VREWwOIkuUsFONk/3i3nqkz7Oe1PcpM2mwQEK2L9Kg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=16.9.0"
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@hono/node-server": "1.19.13",
|
||||
"hono": "4.12.12"
|
||||
"hono": "4.12.16"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "20.19.25",
|
||||
|
||||
@ -16,7 +16,7 @@ Repository = "https://github.com/BerriAI/litellm"
|
||||
Documentation = "https://docs.litellm.ai"
|
||||
|
||||
[build-system]
|
||||
requires = ["uv_build==0.10.7"]
|
||||
requires = ["uv_build==0.11.8"]
|
||||
build-backend = "uv_build"
|
||||
|
||||
[tool.uv]
|
||||
|
||||
@ -166,7 +166,7 @@ langfuse_default_tags: Optional[List[str]] = None
|
||||
langsmith_batch_size: Optional[int] = None
|
||||
prometheus_initialize_budget_metrics: Optional[bool] = False
|
||||
prometheus_latency_buckets: Optional[List[float]] = None
|
||||
require_auth_for_metrics_endpoint: Optional[bool] = False
|
||||
require_auth_for_metrics_endpoint: Optional[bool] = True
|
||||
argilla_batch_size: Optional[int] = None
|
||||
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload.
|
||||
gcs_pub_sub_use_v1: Optional[bool] = (
|
||||
|
||||
@ -5,7 +5,8 @@ Fetches prompt versions from Arize Phoenix and provides workspace-based access c
|
||||
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from jinja2 import DictLoader, Environment, select_autoescape
|
||||
from jinja2 import DictLoader, select_autoescape
|
||||
from jinja2.sandbox import ImmutableSandboxedEnvironment
|
||||
|
||||
from litellm.integrations.custom_prompt_management import CustomPromptManagement
|
||||
from litellm.integrations.prompt_management_base import (
|
||||
@ -74,7 +75,13 @@ class ArizePhoenixTemplateManager:
|
||||
api_key=self.api_key, api_base=self.api_base
|
||||
)
|
||||
|
||||
self.jinja_env = Environment(
|
||||
# Templates fetched from Arize Phoenix come from external workspace
|
||||
# users; in a plain `Environment()` a malicious template could reach
|
||||
# `__class__.__init__.__globals__` and execute arbitrary code on the
|
||||
# proxy host. The sandbox blocks that attribute traversal while
|
||||
# leaving normal `{{ var }}` substitution intact. Matches the
|
||||
# dotprompt manager's hardening.
|
||||
self.jinja_env = ImmutableSandboxedEnvironment(
|
||||
loader=DictLoader({}),
|
||||
autoescape=select_autoescape(["html", "xml"]),
|
||||
# Use Mustache/Handlebars-style delimiters
|
||||
|
||||
@ -5,7 +5,8 @@ Fetches .prompt files from BitBucket repositories and provides team-based access
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from jinja2 import DictLoader, Environment, select_autoescape
|
||||
from jinja2 import DictLoader, select_autoescape
|
||||
from jinja2.sandbox import ImmutableSandboxedEnvironment
|
||||
|
||||
from litellm.integrations.custom_prompt_management import CustomPromptManagement
|
||||
|
||||
@ -74,7 +75,13 @@ class BitBucketTemplateManager:
|
||||
self.prompts: Dict[str, BitBucketPromptTemplate] = {}
|
||||
self.bitbucket_client = BitBucketClient(bitbucket_config)
|
||||
|
||||
self.jinja_env = Environment(
|
||||
# Templates fetched from a BitBucket repo are not trustworthy:
|
||||
# anyone with repo write access can ship Jinja syntax that, in a
|
||||
# plain `Environment()`, would reach `__class__.__init__.__globals__`
|
||||
# and pivot into RCE on the proxy host. The sandbox blocks that
|
||||
# attribute traversal while leaving normal `{{ var }}` substitution
|
||||
# intact. Matches the dotprompt manager's hardening.
|
||||
self.jinja_env = ImmutableSandboxedEnvironment(
|
||||
loader=DictLoader({}),
|
||||
autoescape=select_autoescape(["html", "xml"]),
|
||||
# Use Handlebars-style delimiters to match Dotprompt spec
|
||||
|
||||
@ -4,7 +4,8 @@ GitLab prompt manager with configurable prompts folder.
|
||||
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from jinja2 import DictLoader, Environment, select_autoescape
|
||||
from jinja2 import DictLoader, select_autoescape
|
||||
from jinja2.sandbox import ImmutableSandboxedEnvironment
|
||||
|
||||
from litellm.integrations.custom_prompt_management import CustomPromptManagement
|
||||
|
||||
@ -90,7 +91,13 @@ class GitLabTemplateManager:
|
||||
or ""
|
||||
).strip("/")
|
||||
|
||||
self.jinja_env = Environment(
|
||||
# Templates fetched from a GitLab repo are not trustworthy:
|
||||
# anyone with repo write access can ship Jinja syntax that, in a
|
||||
# plain `Environment()`, would reach `__class__.__init__.__globals__`
|
||||
# and pivot into RCE on the proxy host. The sandbox blocks that
|
||||
# attribute traversal while leaving normal `{{ var }}` substitution
|
||||
# intact. Matches the dotprompt manager's hardening.
|
||||
self.jinja_env = ImmutableSandboxedEnvironment(
|
||||
loader=DictLoader({}),
|
||||
autoescape=select_autoescape(["html", "xml"]),
|
||||
variable_start_string="{{",
|
||||
|
||||
@ -617,13 +617,12 @@ class LiteLLMRoutes(enum.Enum):
|
||||
"/",
|
||||
"/health/liveliness",
|
||||
"/health/liveness",
|
||||
"/health/readiness",
|
||||
"/test",
|
||||
"/config/yaml",
|
||||
"/metrics",
|
||||
"/litellm/.well-known/litellm-ui-config",
|
||||
"/.well-known/litellm-ui-config",
|
||||
"/public/model_hub",
|
||||
"/public/model_hub/info",
|
||||
"/public/agent_hub",
|
||||
"/public/mcp_hub",
|
||||
"/public/skill_hub",
|
||||
|
||||
@ -216,20 +216,15 @@ _EXTRA_BANNED_OBSERVABILITY_PARAMS: FrozenSet[str] = frozenset(
|
||||
def _build_banned_observability_params() -> FrozenSet[str]:
|
||||
"""Derive the observability ban list from the canonical allowlist.
|
||||
|
||||
``_supported_callback_params`` in
|
||||
``_supported_callback_params`` and ``_request_blocked_callback_params`` in
|
||||
``litellm/litellm_core_utils/initialize_dynamic_callback_params.py`` is
|
||||
the single place that enumerates every observability field
|
||||
integrations resolve from kwargs/metadata. Subtract the small set of
|
||||
informational fields (``_SAFE_CLIENT_CALLBACK_PARAMS``) and union with
|
||||
the extras the canonical allowlist hasn't caught up to yet. New
|
||||
integrations added to the canonical allowlist are banned by default,
|
||||
which is the safe failure mode.
|
||||
|
||||
``_request_blocked_callback_params`` (e.g. ``gcs_bucket_name``,
|
||||
``gcs_path_service_account``) is the GCS-logging-specific deny list
|
||||
that lives alongside the allowlist; fold it in here so a single
|
||||
declaration of "this field must not be caller-supplied" covers both
|
||||
the request-body bouncer and the dynamic callback initializer.
|
||||
the single place that enumerates every observability field integrations
|
||||
resolve from kwargs/metadata, plus fields that integration code explicitly
|
||||
blocks from request-supplied callback params. Subtract the small set of
|
||||
informational fields (``_SAFE_CLIENT_CALLBACK_PARAMS``) and union with the
|
||||
extras the canonical allowlist hasn't caught up to yet. New integrations
|
||||
added to the canonical allowlist are banned by default, which is the safe
|
||||
failure mode.
|
||||
"""
|
||||
from litellm.litellm_core_utils.initialize_dynamic_callback_params import (
|
||||
_request_blocked_callback_params,
|
||||
@ -238,8 +233,8 @@ def _build_banned_observability_params() -> FrozenSet[str]:
|
||||
|
||||
return (
|
||||
(frozenset(_supported_callback_params) - _SAFE_CLIENT_CALLBACK_PARAMS)
|
||||
| _EXTRA_BANNED_OBSERVABILITY_PARAMS
|
||||
| frozenset(_request_blocked_callback_params)
|
||||
| _EXTRA_BANNED_OBSERVABILITY_PARAMS
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -87,6 +87,23 @@ except ImportError as e:
|
||||
|
||||
user_api_key_service_logger_obj = ServiceLogging() # used for tracking latency on OTEL
|
||||
|
||||
|
||||
def _normalize_public_auth_route(route: str) -> str:
|
||||
if route != "/" and route.endswith("/"):
|
||||
return route.rstrip("/")
|
||||
return route
|
||||
|
||||
|
||||
def _route_requires_auth_despite_public(
|
||||
route: str, general_settings: Optional[dict]
|
||||
) -> bool:
|
||||
normalized_route = _normalize_public_auth_route(route)
|
||||
if normalized_route == "/metrics":
|
||||
return litellm.require_auth_for_metrics_endpoint is not False
|
||||
|
||||
return False
|
||||
|
||||
|
||||
custom_litellm_key_header = APIKeyHeader(
|
||||
name=SpecialHeaders.custom_litellm_api_key.value,
|
||||
auto_error=False,
|
||||
@ -714,7 +731,9 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
|
||||
"""
|
||||
|
||||
######## Route Checks Before Reading DB / Cache for "token" ################
|
||||
if (
|
||||
if not _route_requires_auth_despite_public(
|
||||
route=route, general_settings=general_settings
|
||||
) and (
|
||||
route in LiteLLMRoutes.public_routes.value # type: ignore
|
||||
or route_in_additonal_public_routes(current_route=route)
|
||||
):
|
||||
@ -1698,7 +1717,7 @@ async def _run_centralized_common_checks(
|
||||
user_custom_auth,
|
||||
)
|
||||
|
||||
# Public routes (e.g. /health/readiness, /metrics) are exempt from
|
||||
# Public routes (e.g. /health/liveness) are exempt from
|
||||
# auth in the builder — the wrapper must not retroactively apply
|
||||
# authz on top, or k8s readiness probes and other unauthenticated
|
||||
# callers get 401.
|
||||
|
||||
@ -50,7 +50,10 @@ def configure_gc_thresholds():
|
||||
configure_gc_thresholds()
|
||||
|
||||
|
||||
@router.get("/debug/asyncio-tasks")
|
||||
@router.get(
|
||||
"/debug/asyncio-tasks",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def get_active_tasks_stats():
|
||||
"""
|
||||
Returns:
|
||||
@ -103,7 +106,11 @@ if os.environ.get("LITELLM_PROFILE", "false").lower() == "true":
|
||||
|
||||
tracemalloc.start(10)
|
||||
|
||||
@router.get("/memory-usage", include_in_schema=False)
|
||||
@router.get(
|
||||
"/memory-usage",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def memory_usage():
|
||||
# Take a snapshot of the current memory usage
|
||||
snapshot = tracemalloc.take_snapshot()
|
||||
@ -711,7 +718,11 @@ async def configure_gc_thresholds_endpoint(
|
||||
}
|
||||
|
||||
|
||||
@router.get("/otel-spans", include_in_schema=False)
|
||||
@router.get(
|
||||
"/otel-spans",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def get_otel_spans():
|
||||
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||
|
||||
|
||||
236
litellm/proxy/guardrails/_content_utils.py
Normal file
236
litellm/proxy/guardrails/_content_utils.py
Normal file
@ -0,0 +1,236 @@
|
||||
"""
|
||||
Shared helpers for guardrail hooks: extract text from a request body
|
||||
regardless of whether it uses Chat Completions ``messages``, Responses-API
|
||||
``input``, or multimodal list-format ``content`` parts.
|
||||
|
||||
Hooks that only check ``data["messages"]`` for string content silently
|
||||
skip the other shapes — these helpers normalise that so every hook sees
|
||||
every text fragment.
|
||||
"""
|
||||
|
||||
from typing import Any, Callable, Dict, FrozenSet, Iterator, List
|
||||
|
||||
|
||||
# Call types whose body carries free-form chat / prompt text that
|
||||
# text-content guardrails (banned keywords, content moderation, secret
|
||||
# detection, …) should inspect. The proxy ingress passes ``route_type``
|
||||
# straight through as ``call_type``, so the literal values here are
|
||||
# what the guardrail dispatcher actually receives:
|
||||
#
|
||||
# /v1/chat/completions -> "acompletion"
|
||||
# /v1/responses -> "aresponses"
|
||||
#
|
||||
# ``"completion"`` is included for SDK / internal callers that invoke
|
||||
# ``pre_call_hook`` directly with the sync name. Embedding, moderation,
|
||||
# audio, and transcription endpoints are deliberately excluded — text
|
||||
# guardrails on those paths are a separate scope.
|
||||
TEXT_CONTENT_CALL_TYPES: FrozenSet[str] = frozenset(
|
||||
{"completion", "acompletion", "aresponses"}
|
||||
)
|
||||
|
||||
|
||||
def is_text_content_call_type(call_type: str) -> bool:
|
||||
"""Return True if ``call_type`` carries free-form text that text
|
||||
guardrails should inspect (Chat Completions or Responses API)."""
|
||||
return call_type in TEXT_CONTENT_CALL_TYPES
|
||||
|
||||
|
||||
def _iter_text_parts_in_content(content: Any) -> Iterator[str]:
|
||||
"""Yield text fragments from a ``message.content`` value (string or
|
||||
multimodal list). Non-text parts (images, audio, …) are skipped."""
|
||||
if isinstance(content, str):
|
||||
if content:
|
||||
yield content
|
||||
elif isinstance(content, list):
|
||||
for part in content:
|
||||
if isinstance(part, str):
|
||||
# A bare string in a content/input list is itself a text
|
||||
# fragment (Responses-API mixed-list shape).
|
||||
if part:
|
||||
yield part
|
||||
continue
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
if part.get("type") == "text":
|
||||
text = part.get("text")
|
||||
if isinstance(text, str) and text:
|
||||
yield text
|
||||
|
||||
|
||||
def _coerce_input_to_messages(input_value: Any) -> List[Dict[str, Any]]:
|
||||
"""Coerce a Responses-API ``data["input"]`` value into chat-style messages."""
|
||||
if isinstance(input_value, str):
|
||||
return [{"role": "user", "content": input_value}]
|
||||
if isinstance(input_value, list):
|
||||
if input_value and all(
|
||||
isinstance(item, dict) and "role" in item for item in input_value
|
||||
):
|
||||
return list(input_value)
|
||||
# Mixed lists (content-part dicts + bare strings) and pure
|
||||
# string/dict lists all become a single user message; the content
|
||||
# iterator below handles each element type uniformly.
|
||||
return [{"role": "user", "content": input_value}]
|
||||
return []
|
||||
|
||||
|
||||
def _iter_inspection_messages(data: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
|
||||
"""Yield every message-like dict, walking ``messages`` AND ``input``."""
|
||||
messages = data.get("messages")
|
||||
if isinstance(messages, list):
|
||||
yield from messages
|
||||
yield from _coerce_input_to_messages(data.get("input"))
|
||||
|
||||
|
||||
def iter_message_text(data: Dict[str, Any]) -> Iterator[str]:
|
||||
"""Yield every text fragment from ``messages`` AND ``input``.
|
||||
|
||||
Walks every role (user, assistant, system, …) — guardrails inspect
|
||||
the entire conversation, not just user turns.
|
||||
"""
|
||||
for message in _iter_inspection_messages(data):
|
||||
if not isinstance(message, dict):
|
||||
continue
|
||||
yield from _iter_text_parts_in_content(message.get("content"))
|
||||
|
||||
|
||||
def walk_user_text(data: Dict[str, Any], visit: Callable[[str], str]) -> int:
|
||||
"""Rewrite every text fragment in place via ``visit``.
|
||||
|
||||
Mutates ``data["messages"]`` and ``data["input"]``. Returns the number
|
||||
of fragments visited so callers can short-circuit when nothing was
|
||||
inspected.
|
||||
"""
|
||||
visited = 0
|
||||
|
||||
def _rewrite_content(content: Any) -> Any:
|
||||
nonlocal visited
|
||||
if isinstance(content, str):
|
||||
if content:
|
||||
visited += 1
|
||||
return visit(content)
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
new_parts: List[Any] = []
|
||||
for part in content:
|
||||
if isinstance(part, str) and part:
|
||||
visited += 1
|
||||
new_parts.append(visit(part))
|
||||
elif (
|
||||
isinstance(part, dict)
|
||||
and part.get("type") == "text"
|
||||
and isinstance(part.get("text"), str)
|
||||
and part["text"]
|
||||
):
|
||||
visited += 1
|
||||
new_parts.append({**part, "text": visit(part["text"])})
|
||||
else:
|
||||
new_parts.append(part)
|
||||
return new_parts
|
||||
return content
|
||||
|
||||
messages = data.get("messages")
|
||||
if isinstance(messages, list):
|
||||
for message in messages:
|
||||
if isinstance(message, dict) and "content" in message:
|
||||
message["content"] = _rewrite_content(message["content"])
|
||||
|
||||
input_value = data.get("input")
|
||||
if isinstance(input_value, str):
|
||||
if input_value:
|
||||
visited += 1
|
||||
data["input"] = visit(input_value)
|
||||
return visited
|
||||
if isinstance(input_value, list):
|
||||
# List of full messages: rewrite each message's content.
|
||||
if input_value and all(
|
||||
isinstance(item, dict) and "role" in item for item in input_value
|
||||
):
|
||||
for item in input_value:
|
||||
if "content" in item:
|
||||
item["content"] = _rewrite_content(item["content"])
|
||||
return visited
|
||||
# List of content parts and/or bare strings: rewrite in place.
|
||||
for idx, item in enumerate(input_value):
|
||||
if isinstance(item, str) and item:
|
||||
visited += 1
|
||||
input_value[idx] = visit(item)
|
||||
elif (
|
||||
isinstance(item, dict)
|
||||
and item.get("type") == "text"
|
||||
and isinstance(item.get("text"), str)
|
||||
and item["text"]
|
||||
):
|
||||
visited += 1
|
||||
input_value[idx] = {**item, "text": visit(item["text"])}
|
||||
return visited
|
||||
|
||||
return visited
|
||||
|
||||
|
||||
def apply_redacted_messages_back(
|
||||
data: Dict[str, Any], redacted_messages: List[Dict[str, Any]]
|
||||
) -> None:
|
||||
"""Write redacted messages back to whichever field(s) the caller used.
|
||||
|
||||
Mask/anonymize paths take a synthesised messages list (from
|
||||
:func:`build_inspection_messages`), get a redacted version back from a
|
||||
third-party guardrail, and need to rewrite the request body. Writing
|
||||
only to ``data["messages"]`` leaves the Responses-API ``data["input"]``
|
||||
field untouched, so the unredacted text still reaches the LLM.
|
||||
|
||||
This helper updates both fields when both are present.
|
||||
"""
|
||||
if "messages" in data:
|
||||
data["messages"] = redacted_messages
|
||||
if isinstance(data.get("input"), str):
|
||||
text_parts: List[str] = []
|
||||
for msg in redacted_messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
text_parts.extend(_iter_text_parts_in_content(msg.get("content")))
|
||||
data["input"] = "\n".join(text_parts)
|
||||
|
||||
|
||||
def has_non_string_content(data: Dict[str, Any]) -> bool:
|
||||
"""Return True if any inspected content is not a plain string.
|
||||
|
||||
Used by hooks whose mask/redact path operates on string offsets and
|
||||
therefore cannot preserve multimodal non-text parts. Such hooks should
|
||||
degrade to block-on-detect when this returns True so image/audio parts
|
||||
are not silently stripped during in-place masking.
|
||||
"""
|
||||
messages = data.get("messages")
|
||||
if isinstance(messages, list):
|
||||
for message in messages:
|
||||
if isinstance(message, dict) and not isinstance(
|
||||
message.get("content"), str
|
||||
):
|
||||
if message.get("content") is not None:
|
||||
return True
|
||||
input_value = data.get("input")
|
||||
if input_value is not None and not isinstance(input_value, str):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def build_inspection_messages(data: Dict[str, Any]) -> List[Dict[str, str]]:
|
||||
"""Synthesize a chat-style messages list for posting to a guardrail API.
|
||||
|
||||
Each returned message has a plain-string ``content`` — multimodal text
|
||||
parts are joined with newlines and Responses-API ``input`` is lifted
|
||||
into synthetic messages. Messages with no inspectable text are dropped.
|
||||
|
||||
Hooks that POST ``{"messages": [...]}`` to an external service should
|
||||
call this instead of ``data.get("messages", [])`` so the Responses API
|
||||
and multimodal content are covered.
|
||||
"""
|
||||
flattened: List[Dict[str, str]] = []
|
||||
for message in _iter_inspection_messages(data):
|
||||
if not isinstance(message, dict):
|
||||
continue
|
||||
text = "\n".join(_iter_text_parts_in_content(message.get("content")))
|
||||
if not text:
|
||||
continue
|
||||
role = message.get("role", "user") or "user"
|
||||
flattened.append({"role": role, "content": text})
|
||||
return flattened
|
||||
@ -22,6 +22,11 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails._content_utils import (
|
||||
apply_redacted_messages_back,
|
||||
build_inspection_messages,
|
||||
has_non_string_content,
|
||||
)
|
||||
from litellm.types.utils import (
|
||||
CallTypesLiteral,
|
||||
Choices,
|
||||
@ -101,10 +106,11 @@ class AimGuardrail(CustomGuardrail):
|
||||
user_email=user_email,
|
||||
litellm_call_id=call_id,
|
||||
)
|
||||
# Covers multimodal list content + Responses-API input.
|
||||
response = await self.async_handler.post(
|
||||
f"{self.api_base}/fw/v1/analyze",
|
||||
headers=headers,
|
||||
json={"messages": data.get("messages", [])},
|
||||
json={"messages": build_inspection_messages(data)},
|
||||
)
|
||||
response.raise_for_status()
|
||||
res = response.json()
|
||||
@ -137,13 +143,31 @@ class AimGuardrail(CustomGuardrail):
|
||||
redacted_chat = res.get("redacted_chat")
|
||||
if not redacted_chat:
|
||||
return data
|
||||
data["messages"] = [
|
||||
# Aim returns text-only redacted messages. Overwriting
|
||||
# ``data["messages"]`` with that would silently strip image/audio
|
||||
# parts from a multimodal request — degrade to block so the
|
||||
# multimodal payload is never silently rewritten.
|
||||
if has_non_string_content(data):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=(
|
||||
"Aim: anonymize action requested for multimodal input "
|
||||
"but mask-in-place would drop non-text parts. Send the "
|
||||
"request with plain string content to use anonymize, "
|
||||
"or rely on block-mode policies."
|
||||
),
|
||||
)
|
||||
redacted_messages = [
|
||||
{
|
||||
"role": message["role"],
|
||||
"content": message["content"],
|
||||
}
|
||||
for message in redacted_chat["all_redacted_messages"]
|
||||
]
|
||||
# Write back to ``messages`` AND ``input``. The Responses-API
|
||||
# backend reads ``input``; writing only to ``messages`` would let
|
||||
# unredacted text reach the LLM for ``/v1/responses`` calls.
|
||||
apply_redacted_messages_back(data, redacted_messages)
|
||||
return data
|
||||
|
||||
async def call_aim_guardrail_on_output(
|
||||
@ -162,7 +186,7 @@ class AimGuardrail(CustomGuardrail):
|
||||
litellm_call_id=call_id,
|
||||
),
|
||||
json={
|
||||
"messages": request_data.get("messages", [])
|
||||
"messages": build_inspection_messages(request_data)
|
||||
+ [{"role": "assistant", "content": output}]
|
||||
},
|
||||
)
|
||||
@ -233,15 +257,33 @@ class AimGuardrail(CustomGuardrail):
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
response: Union[Any, ModelResponse, EmbeddingResponse, ImageResponse],
|
||||
) -> Any:
|
||||
if (
|
||||
isinstance(response, ModelResponse)
|
||||
and response.choices
|
||||
and isinstance(response.choices[0], Choices)
|
||||
):
|
||||
content = response.choices[0].message.content or ""
|
||||
aim_output_guardrail_result = await self.call_aim_guardrail_on_output(
|
||||
data, content, hook="output", key_alias=user_api_key_dict.key_alias
|
||||
)
|
||||
if not (isinstance(response, ModelResponse) and response.choices):
|
||||
return response
|
||||
# Inspect every choice — when ``n>1`` the additional completions
|
||||
# used to bypass Aim entirely because the hook only inspected
|
||||
# ``choices[0]``. Run inspections concurrently so multi-completion
|
||||
# responses don't pay an n× latency penalty.
|
||||
choices_to_inspect = [c for c in response.choices if isinstance(c, Choices)]
|
||||
if not choices_to_inspect:
|
||||
return response
|
||||
# ``return_exceptions=True`` lets every inspection finish even if
|
||||
# one fails — without it, the first exception would propagate and
|
||||
# leave the remaining tasks running in the background.
|
||||
results = await asyncio.gather(
|
||||
*(
|
||||
self.call_aim_guardrail_on_output(
|
||||
data,
|
||||
choice.message.content or "",
|
||||
hook="output",
|
||||
key_alias=user_api_key_dict.key_alias,
|
||||
)
|
||||
for choice in choices_to_inspect
|
||||
),
|
||||
return_exceptions=True,
|
||||
)
|
||||
for choice, aim_output_guardrail_result in zip(choices_to_inspect, results):
|
||||
if isinstance(aim_output_guardrail_result, BaseException):
|
||||
raise aim_output_guardrail_result
|
||||
if aim_output_guardrail_result and aim_output_guardrail_result.get(
|
||||
"detection_message"
|
||||
):
|
||||
@ -252,7 +294,7 @@ class AimGuardrail(CustomGuardrail):
|
||||
if aim_output_guardrail_result and aim_output_guardrail_result.get(
|
||||
"redacted_output"
|
||||
):
|
||||
response.choices[0].message.content = aim_output_guardrail_result.get(
|
||||
choice.message.content = aim_output_guardrail_result.get(
|
||||
"redacted_output"
|
||||
)
|
||||
return response
|
||||
|
||||
@ -254,15 +254,16 @@ class AzureContentSafetyTextModerationGuardrail(AzureGuardrailBase, CustomGuardr
|
||||
) -> Any:
|
||||
from litellm.types.utils import Choices, ModelResponse
|
||||
|
||||
if (
|
||||
isinstance(response, ModelResponse)
|
||||
and response.choices
|
||||
and isinstance(response.choices[0], Choices)
|
||||
):
|
||||
content = response.choices[0].message.content or ""
|
||||
await self.async_make_request(
|
||||
text=content,
|
||||
)
|
||||
if isinstance(response, ModelResponse) and response.choices:
|
||||
for choice in response.choices:
|
||||
if not isinstance(choice, Choices):
|
||||
continue
|
||||
content = _message_content_to_text(choice.message.content)
|
||||
if not content:
|
||||
continue
|
||||
await self.async_make_request(
|
||||
text=content,
|
||||
)
|
||||
return response
|
||||
|
||||
async def async_post_call_streaming_hook(
|
||||
@ -279,3 +280,16 @@ class AzureContentSafetyTextModerationGuardrail(AzureGuardrailBase, CustomGuardr
|
||||
|
||||
error_returned = json.dumps({"error": e.detail})
|
||||
return f"data: {error_returned}\n\n"
|
||||
|
||||
|
||||
def _message_content_to_text(content: Any) -> str:
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
text_parts = [
|
||||
item.get("text")
|
||||
for item in content
|
||||
if isinstance(item, dict) and isinstance(item.get("text"), str)
|
||||
]
|
||||
return "\n".join(part for part in text_parts if part)
|
||||
return ""
|
||||
|
||||
@ -20,6 +20,7 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails._content_utils import iter_message_text
|
||||
from litellm.types.guardrails import GuardrailEventHooks
|
||||
from litellm.types.proxy.guardrails.guardrail_hooks.ibm import (
|
||||
IBMDetectorDetection,
|
||||
@ -463,65 +464,53 @@ class IBMGuardrailDetector(CustomGuardrail):
|
||||
if self.should_run_guardrail(data=data, event_type=event_type) is not True:
|
||||
return data
|
||||
|
||||
_messages = data.get("messages")
|
||||
if _messages:
|
||||
contents_to_check: List[str] = []
|
||||
for message in _messages:
|
||||
_content = message.get("content")
|
||||
if isinstance(_content, str):
|
||||
contents_to_check.append(_content)
|
||||
# Covers multimodal list content + Responses-API input.
|
||||
contents_to_check: List[str] = list(iter_message_text(data))
|
||||
if contents_to_check:
|
||||
if self.is_detector_server:
|
||||
# Call detector server with all contents at once
|
||||
result = await self._call_detector_server(
|
||||
contents=contents_to_check,
|
||||
request_data=data,
|
||||
event_type=GuardrailEventHooks.pre_call,
|
||||
)
|
||||
|
||||
if contents_to_check:
|
||||
if self.is_detector_server:
|
||||
# Call detector server with all contents at once
|
||||
result = await self._call_detector_server(
|
||||
contents=contents_to_check,
|
||||
verbose_proxy_logger.debug(
|
||||
"IBM Detector Server async_pre_call_hook result: %s", result
|
||||
)
|
||||
|
||||
# Check if any detections were found
|
||||
has_violations = False
|
||||
for message_detections in result:
|
||||
filtered = self._filter_detections_by_threshold(message_detections)
|
||||
if filtered:
|
||||
has_violations = True
|
||||
break
|
||||
|
||||
if has_violations and self.block_on_detection:
|
||||
error_message = self._create_error_message_detector_server(result)
|
||||
raise ValueError(error_message)
|
||||
|
||||
else:
|
||||
# Call orchestrator for each content separately
|
||||
for content in contents_to_check:
|
||||
orchestrator_result = await self._call_orchestrator(
|
||||
content=content,
|
||||
request_data=data,
|
||||
event_type=GuardrailEventHooks.pre_call,
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"IBM Detector Server async_pre_call_hook result: %s", result
|
||||
"IBM Orchestrator async_pre_call_hook result: %s",
|
||||
orchestrator_result,
|
||||
)
|
||||
|
||||
# Check if any detections were found
|
||||
has_violations = False
|
||||
for message_detections in result:
|
||||
filtered = self._filter_detections_by_threshold(
|
||||
message_detections
|
||||
)
|
||||
if filtered:
|
||||
has_violations = True
|
||||
break
|
||||
|
||||
if has_violations and self.block_on_detection:
|
||||
error_message = self._create_error_message_detector_server(
|
||||
result
|
||||
)
|
||||
raise ValueError(error_message)
|
||||
|
||||
else:
|
||||
# Call orchestrator for each content separately
|
||||
for content in contents_to_check:
|
||||
orchestrator_result = await self._call_orchestrator(
|
||||
content=content,
|
||||
request_data=data,
|
||||
event_type=GuardrailEventHooks.pre_call,
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"IBM Orchestrator async_pre_call_hook result: %s",
|
||||
orchestrator_result,
|
||||
)
|
||||
|
||||
filtered = self._filter_detections_by_threshold(
|
||||
filtered = self._filter_detections_by_threshold(orchestrator_result)
|
||||
if filtered and self.block_on_detection:
|
||||
error_message = self._create_error_message_orchestrator(
|
||||
orchestrator_result
|
||||
)
|
||||
if filtered and self.block_on_detection:
|
||||
error_message = self._create_error_message_orchestrator(
|
||||
orchestrator_result
|
||||
)
|
||||
raise ValueError(error_message)
|
||||
raise ValueError(error_message)
|
||||
|
||||
# Add guardrail to applied guardrails header
|
||||
add_guardrail_to_applied_guardrails_header(
|
||||
@ -550,65 +539,53 @@ class IBMGuardrailDetector(CustomGuardrail):
|
||||
if self.should_run_guardrail(data=data, event_type=event_type) is not True:
|
||||
return
|
||||
|
||||
_messages = data.get("messages")
|
||||
if _messages:
|
||||
contents_to_check: List[str] = []
|
||||
for message in _messages:
|
||||
_content = message.get("content")
|
||||
if isinstance(_content, str):
|
||||
contents_to_check.append(_content)
|
||||
# Covers multimodal list content + Responses-API input.
|
||||
contents_to_check: List[str] = list(iter_message_text(data))
|
||||
if contents_to_check:
|
||||
if self.is_detector_server:
|
||||
# Call detector server with all contents at once
|
||||
result = await self._call_detector_server(
|
||||
contents=contents_to_check,
|
||||
request_data=data,
|
||||
event_type=GuardrailEventHooks.during_call,
|
||||
)
|
||||
|
||||
if contents_to_check:
|
||||
if self.is_detector_server:
|
||||
# Call detector server with all contents at once
|
||||
result = await self._call_detector_server(
|
||||
contents=contents_to_check,
|
||||
verbose_proxy_logger.debug(
|
||||
"IBM Detector Server async_moderation_hook result: %s", result
|
||||
)
|
||||
|
||||
# Check if any detections were found
|
||||
has_violations = False
|
||||
for message_detections in result:
|
||||
filtered = self._filter_detections_by_threshold(message_detections)
|
||||
if filtered:
|
||||
has_violations = True
|
||||
break
|
||||
|
||||
if has_violations and self.block_on_detection:
|
||||
error_message = self._create_error_message_detector_server(result)
|
||||
raise ValueError(error_message)
|
||||
|
||||
else:
|
||||
# Call orchestrator for each content separately
|
||||
for content in contents_to_check:
|
||||
orchestrator_result = await self._call_orchestrator(
|
||||
content=content,
|
||||
request_data=data,
|
||||
event_type=GuardrailEventHooks.during_call,
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"IBM Detector Server async_moderation_hook result: %s", result
|
||||
"IBM Orchestrator async_moderation_hook result: %s",
|
||||
orchestrator_result,
|
||||
)
|
||||
|
||||
# Check if any detections were found
|
||||
has_violations = False
|
||||
for message_detections in result:
|
||||
filtered = self._filter_detections_by_threshold(
|
||||
message_detections
|
||||
)
|
||||
if filtered:
|
||||
has_violations = True
|
||||
break
|
||||
|
||||
if has_violations and self.block_on_detection:
|
||||
error_message = self._create_error_message_detector_server(
|
||||
result
|
||||
)
|
||||
raise ValueError(error_message)
|
||||
|
||||
else:
|
||||
# Call orchestrator for each content separately
|
||||
for content in contents_to_check:
|
||||
orchestrator_result = await self._call_orchestrator(
|
||||
content=content,
|
||||
request_data=data,
|
||||
event_type=GuardrailEventHooks.during_call,
|
||||
)
|
||||
|
||||
verbose_proxy_logger.debug(
|
||||
"IBM Orchestrator async_moderation_hook result: %s",
|
||||
orchestrator_result,
|
||||
)
|
||||
|
||||
filtered = self._filter_detections_by_threshold(
|
||||
filtered = self._filter_detections_by_threshold(orchestrator_result)
|
||||
if filtered and self.block_on_detection:
|
||||
error_message = self._create_error_message_orchestrator(
|
||||
orchestrator_result
|
||||
)
|
||||
if filtered and self.block_on_detection:
|
||||
error_message = self._create_error_message_orchestrator(
|
||||
orchestrator_result
|
||||
)
|
||||
raise ValueError(error_message)
|
||||
raise ValueError(error_message)
|
||||
|
||||
# Add guardrail to applied guardrails header
|
||||
add_guardrail_to_applied_guardrails_header(
|
||||
|
||||
@ -13,6 +13,11 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails._content_utils import (
|
||||
apply_redacted_messages_back,
|
||||
build_inspection_messages,
|
||||
has_non_string_content,
|
||||
)
|
||||
from litellm.secret_managers.main import get_secret_str
|
||||
from litellm.types.guardrails import GuardrailEventHooks
|
||||
from litellm.types.llms.openai import AllMessageValues
|
||||
@ -214,18 +219,26 @@ class LakeraAIGuardrail(CustomGuardrail):
|
||||
)
|
||||
return data
|
||||
|
||||
new_messages: Optional[List[AllMessageValues]] = data.get("messages")
|
||||
if new_messages is None:
|
||||
# Covers multimodal list content + Responses-API input.
|
||||
new_messages = build_inspection_messages(data)
|
||||
if not new_messages:
|
||||
verbose_proxy_logger.warning(
|
||||
"Lakera AI: not running guardrail. No messages in data"
|
||||
"Lakera AI: not running guardrail. No inspectable text in data"
|
||||
)
|
||||
return data
|
||||
|
||||
# Mask-in-place uses offsets returned by Lakera and can only
|
||||
# preserve non-text parts (images, audio, …) when the original
|
||||
# content is a plain string. For multimodal/Responses-API input
|
||||
# we degrade to block-on-detect so we never silently strip image
|
||||
# parts while attempting to redact text.
|
||||
is_multimodal_input = has_non_string_content(data)
|
||||
|
||||
#########################################################
|
||||
########## 1. Make the Lakera AI v2 guard API request ##########
|
||||
#########################################################
|
||||
lakera_guardrail_response, masked_entity_count = await self.call_v2_guard(
|
||||
messages=new_messages,
|
||||
messages=new_messages, # type: ignore[arg-type]
|
||||
request_data=data,
|
||||
event_type=GuardrailEventHooks.pre_call,
|
||||
)
|
||||
@ -234,13 +247,20 @@ class LakeraAIGuardrail(CustomGuardrail):
|
||||
########## 2. Handle flagged content ##########
|
||||
#########################################################
|
||||
if lakera_guardrail_response.get("flagged") is True:
|
||||
# If only PII violations exist, mask the PII
|
||||
if self._is_only_pii_violation(lakera_guardrail_response):
|
||||
data["messages"] = self._mask_pii_in_messages(
|
||||
messages=new_messages,
|
||||
# If only PII violations exist, mask the PII (string input only).
|
||||
if (
|
||||
self._is_only_pii_violation(lakera_guardrail_response)
|
||||
and not is_multimodal_input
|
||||
):
|
||||
redacted_messages = self._mask_pii_in_messages(
|
||||
messages=new_messages, # type: ignore[arg-type]
|
||||
lakera_response=lakera_guardrail_response,
|
||||
masked_entity_count=masked_entity_count,
|
||||
)
|
||||
# Write back to ``messages`` AND ``input``. The Responses-API
|
||||
# backend reads ``input``; writing only to ``messages``
|
||||
# would let unredacted PII reach the LLM for /v1/responses.
|
||||
apply_redacted_messages_back(data, list(redacted_messages)) # type: ignore[arg-type]
|
||||
verbose_proxy_logger.debug(
|
||||
"Lakera AI: Masked PII in messages instead of blocking request"
|
||||
)
|
||||
@ -252,7 +272,9 @@ class LakeraAIGuardrail(CustomGuardrail):
|
||||
)
|
||||
# Log violation but continue
|
||||
elif self.on_flagged == "block":
|
||||
# If there are other violations or not set to mask PII, raise exception
|
||||
# Either non-PII violations, or PII on multimodal input
|
||||
# (which cannot be masked in place without dropping
|
||||
# image/audio parts) — raise the standard block error.
|
||||
raise self._get_http_exception_for_blocked_guardrail(
|
||||
lakera_guardrail_response
|
||||
)
|
||||
@ -280,18 +302,22 @@ class LakeraAIGuardrail(CustomGuardrail):
|
||||
if self.should_run_guardrail(data=data, event_type=event_type) is not True:
|
||||
return
|
||||
|
||||
new_messages: Optional[List[AllMessageValues]] = data.get("messages")
|
||||
if new_messages is None:
|
||||
new_messages = build_inspection_messages(data)
|
||||
if not new_messages:
|
||||
verbose_proxy_logger.warning(
|
||||
"Lakera AI: not running guardrail. No messages in data"
|
||||
"Lakera AI: not running guardrail. No inspectable text in data"
|
||||
)
|
||||
return
|
||||
|
||||
# See ``async_pre_call_hook`` — multimodal input degrades to
|
||||
# block-on-detect because mask-in-place would drop image parts.
|
||||
is_multimodal_input = has_non_string_content(data)
|
||||
|
||||
#########################################################
|
||||
########## 1. Make the Lakera AI v2 guard API request ##########
|
||||
#########################################################
|
||||
lakera_guardrail_response, masked_entity_count = await self.call_v2_guard(
|
||||
messages=new_messages,
|
||||
messages=new_messages, # type: ignore[arg-type]
|
||||
request_data=data,
|
||||
event_type=GuardrailEventHooks.during_call,
|
||||
)
|
||||
@ -300,25 +326,28 @@ class LakeraAIGuardrail(CustomGuardrail):
|
||||
########## 2. Handle flagged content ##########
|
||||
#########################################################
|
||||
if lakera_guardrail_response.get("flagged") is True:
|
||||
# If only PII violations exist, mask the PII
|
||||
if self._is_only_pii_violation(lakera_guardrail_response):
|
||||
data["messages"] = self._mask_pii_in_messages(
|
||||
messages=new_messages,
|
||||
if (
|
||||
self._is_only_pii_violation(lakera_guardrail_response)
|
||||
and not is_multimodal_input
|
||||
):
|
||||
redacted_messages = self._mask_pii_in_messages(
|
||||
messages=new_messages, # type: ignore[arg-type]
|
||||
lakera_response=lakera_guardrail_response,
|
||||
masked_entity_count=masked_entity_count,
|
||||
)
|
||||
# Write back to ``messages`` AND ``input``. The Responses-API
|
||||
# backend reads ``input``; writing only to ``messages``
|
||||
# would let unredacted PII reach the LLM for /v1/responses.
|
||||
apply_redacted_messages_back(data, list(redacted_messages)) # type: ignore[arg-type]
|
||||
verbose_proxy_logger.debug(
|
||||
"Lakera AI: Masked PII in messages instead of blocking request"
|
||||
)
|
||||
else:
|
||||
# Check on_flagged setting
|
||||
if self.on_flagged == "monitor":
|
||||
verbose_proxy_logger.warning(
|
||||
"Lakera Guardrail: Monitoring mode - violation detected but allowing request"
|
||||
)
|
||||
# Log violation but continue
|
||||
elif self.on_flagged == "block":
|
||||
# If there are other violations or not set to mask PII, raise exception
|
||||
raise self._get_http_exception_for_blocked_guardrail(
|
||||
lakera_guardrail_response
|
||||
)
|
||||
|
||||
@ -50,6 +50,11 @@ from litellm.llms.custom_httpx.http_handler import (
|
||||
httpxSpecialProvider,
|
||||
)
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails._content_utils import (
|
||||
apply_redacted_messages_back,
|
||||
build_inspection_messages,
|
||||
has_non_string_content,
|
||||
)
|
||||
from litellm.types.guardrails import GuardrailEventHooks
|
||||
import litellm
|
||||
|
||||
@ -366,16 +371,19 @@ class LassoGuardrail(CustomGuardrail):
|
||||
LassoGuardrailAPIError: If the Lasso API call fails
|
||||
HTTPException: If blocking violations are detected
|
||||
"""
|
||||
messages: List[Dict[str, str]] = data.get("messages", [])
|
||||
# Covers multimodal list content + Responses-API input.
|
||||
messages: List[Dict[str, str]] = build_inspection_messages(data)
|
||||
if not messages:
|
||||
return data
|
||||
|
||||
if self.mask:
|
||||
# Lasso's classifix endpoint returns masked text that we copy back
|
||||
# into ``data["messages"]``. For multimodal/Responses-API input we
|
||||
# would silently strip image/audio parts, so fall back to the
|
||||
# classify endpoint (which still raises on BLOCK actions) and
|
||||
# leave the original payload intact.
|
||||
if self.mask and not has_non_string_content(data):
|
||||
return await self._handle_masking(data, cache, message_type, messages)
|
||||
else:
|
||||
return await self._handle_classification(
|
||||
data, cache, message_type, messages
|
||||
)
|
||||
return await self._handle_classification(data, cache, message_type, messages)
|
||||
|
||||
async def _handle_classification(
|
||||
self,
|
||||
@ -413,8 +421,9 @@ class LassoGuardrail(CustomGuardrail):
|
||||
self._process_lasso_response(response)
|
||||
|
||||
# Apply masking to messages if violations detected and masked messages are available
|
||||
if response.get("violations_detected") and response.get("messages"):
|
||||
data["messages"] = response["messages"]
|
||||
redacted_messages = response.get("messages")
|
||||
if response.get("violations_detected") and redacted_messages:
|
||||
apply_redacted_messages_back(data, list(redacted_messages))
|
||||
self._log_masking_applied(message_type, dict(response))
|
||||
|
||||
return data
|
||||
|
||||
@ -1873,8 +1873,9 @@ class ContentFilterGuardrail(CustomGuardrail):
|
||||
and the UI Request Lifecycle panel. Mirrors apply_guardrail's finally-block
|
||||
contract.
|
||||
"""
|
||||
accumulated_full_text = ""
|
||||
yielded_masked_text_len = 0
|
||||
accumulated_text_by_choice: Dict[int, str] = {}
|
||||
yielded_masked_text_len_by_choice: Dict[int, int] = {}
|
||||
latest_detections_by_choice: Dict[int, List[ContentFilterDetection]] = {}
|
||||
buffer_size = 50 # Increased buffer to catch patterns split across many chunks
|
||||
|
||||
start_time = datetime.now()
|
||||
@ -1890,79 +1891,90 @@ class ContentFilterGuardrail(CustomGuardrail):
|
||||
try:
|
||||
async for item in response:
|
||||
if isinstance(item, ModelResponseStream) and item.choices:
|
||||
delta_content = ""
|
||||
is_final = False
|
||||
for choice in item.choices:
|
||||
if hasattr(choice, "delta") and choice.delta:
|
||||
content = getattr(choice.delta, "content", None)
|
||||
if content and isinstance(content, str):
|
||||
delta_content += content
|
||||
if getattr(choice, "finish_reason", None):
|
||||
is_final = True
|
||||
if not (hasattr(choice, "delta") and choice.delta):
|
||||
continue
|
||||
|
||||
accumulated_full_text += delta_content
|
||||
choice_index = getattr(choice, "index", 0)
|
||||
if not isinstance(choice_index, int):
|
||||
choice_index = 0
|
||||
|
||||
# Check for blocking or apply masking
|
||||
# Add a space at the end if it's the final chunk to trigger word boundaries (\b)
|
||||
text_to_check = accumulated_full_text
|
||||
if is_final:
|
||||
text_to_check += " "
|
||||
content = getattr(choice.delta, "content", None)
|
||||
is_final = bool(getattr(choice, "finish_reason", None))
|
||||
if isinstance(content, str) and content:
|
||||
accumulated_text_by_choice[choice_index] = (
|
||||
accumulated_text_by_choice.get(choice_index, "")
|
||||
+ content
|
||||
)
|
||||
elif not is_final:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Reset before each scan: _filter_single_text scans the
|
||||
# whole accumulated buffer every chunk, so previous-chunk
|
||||
# matches are guaranteed to be re-found. Keeping only the
|
||||
# latest scan's detections avoids N× duplication in the
|
||||
# final log row. BLOCK still records correctly because
|
||||
# handlers append to detections before raising.
|
||||
detections.clear()
|
||||
masked_text = self._filter_single_text(
|
||||
text_to_check, detections=detections
|
||||
text_to_check = accumulated_text_by_choice.get(choice_index, "")
|
||||
if not text_to_check:
|
||||
continue
|
||||
|
||||
# Add a space at the end if it's the final chunk to trigger word boundaries (\b)
|
||||
text_to_scan = text_to_check + (" " if is_final else "")
|
||||
choice_detections: List[ContentFilterDetection] = []
|
||||
|
||||
try:
|
||||
# _filter_single_text scans the whole accumulated
|
||||
# choice buffer every chunk, so previous-chunk
|
||||
# matches are guaranteed to be re-found. Keeping
|
||||
# only each choice's latest scan avoids duplicate
|
||||
# detections in the final log row.
|
||||
masked_text = self._filter_single_text(
|
||||
text_to_scan, detections=choice_detections
|
||||
)
|
||||
if is_final and masked_text.endswith(" "):
|
||||
masked_text = masked_text[:-1]
|
||||
latest_detections_by_choice[choice_index] = (
|
||||
choice_detections
|
||||
)
|
||||
except HTTPException:
|
||||
latest_detections_by_choice[choice_index] = (
|
||||
choice_detections
|
||||
)
|
||||
raise
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
f"ContentFilterGuardrail: Error in masking: {e}"
|
||||
)
|
||||
masked_text = text_to_scan # Fallback to current text
|
||||
|
||||
# Determine how much can be safely yielded
|
||||
if is_final:
|
||||
safe_to_yield_len = len(masked_text)
|
||||
else:
|
||||
safe_to_yield_len = max(0, len(masked_text) - buffer_size)
|
||||
|
||||
yielded_masked_text_len = yielded_masked_text_len_by_choice.get(
|
||||
choice_index, 0
|
||||
)
|
||||
if is_final and masked_text.endswith(" "):
|
||||
masked_text = masked_text[:-1]
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.error(
|
||||
f"ContentFilterGuardrail: Error in masking: {e}"
|
||||
)
|
||||
masked_text = text_to_check # Fallback to current text
|
||||
if safe_to_yield_len > yielded_masked_text_len:
|
||||
new_masked_content = masked_text[
|
||||
yielded_masked_text_len:safe_to_yield_len
|
||||
]
|
||||
choice.delta.content = new_masked_content
|
||||
yielded_masked_text_len_by_choice[choice_index] = (
|
||||
safe_to_yield_len
|
||||
)
|
||||
else:
|
||||
# Hold content by yielding empty content on this choice
|
||||
# while preserving chunk metadata and other choices.
|
||||
choice.delta.content = ""
|
||||
|
||||
# Determine how much can be safely yielded
|
||||
if is_final:
|
||||
safe_to_yield_len = len(masked_text)
|
||||
else:
|
||||
safe_to_yield_len = max(0, len(masked_text) - buffer_size)
|
||||
|
||||
if safe_to_yield_len > yielded_masked_text_len:
|
||||
new_masked_content = masked_text[
|
||||
yielded_masked_text_len:safe_to_yield_len
|
||||
]
|
||||
# Modify the chunk to contain only the new masked content
|
||||
if (
|
||||
item.choices
|
||||
and hasattr(item.choices[0], "delta")
|
||||
and item.choices[0].delta
|
||||
):
|
||||
item.choices[0].delta.content = new_masked_content
|
||||
yielded_masked_text_len = safe_to_yield_len
|
||||
yield item
|
||||
else:
|
||||
# Hold content by yielding empty content chunk (keeps metadata/structure)
|
||||
if (
|
||||
item.choices
|
||||
and hasattr(item.choices[0], "delta")
|
||||
and item.choices[0].delta
|
||||
):
|
||||
item.choices[0].delta.content = ""
|
||||
yield item
|
||||
yield item
|
||||
else:
|
||||
# Not a ModelResponseStream or no choices - yield as is
|
||||
yield item
|
||||
|
||||
# Any remaining content (should have been handled by is_final, but just in case)
|
||||
if yielded_masked_text_len < len(accumulated_full_text):
|
||||
if any(
|
||||
yielded_masked_text_len_by_choice.get(choice_index, 0)
|
||||
< len(accumulated_text)
|
||||
for choice_index, accumulated_text in accumulated_text_by_choice.items()
|
||||
):
|
||||
# We already reached the end of the generator
|
||||
pass
|
||||
except HTTPException:
|
||||
@ -1973,6 +1985,11 @@ class ContentFilterGuardrail(CustomGuardrail):
|
||||
exception_str = str(e)
|
||||
raise e
|
||||
finally:
|
||||
detections = [
|
||||
detection
|
||||
for choice_detections in latest_detections_by_choice.values()
|
||||
for detection in choice_detections
|
||||
]
|
||||
self._count_masked_entities(detections, masked_entity_count)
|
||||
self._log_guardrail_information(
|
||||
request_data=request_data,
|
||||
|
||||
@ -187,11 +187,28 @@ def _extract_user_text(messages: List) -> str:
|
||||
|
||||
|
||||
def _extract_response_text(response: Any) -> str:
|
||||
"""Extract text from LLM response object."""
|
||||
"""Extract text from every LLM response choice."""
|
||||
if hasattr(response, "choices") and response.choices:
|
||||
choice = response.choices[0]
|
||||
if hasattr(choice, "message") and choice.message:
|
||||
return choice.message.content or ""
|
||||
text_parts: List[str] = []
|
||||
for choice in response.choices:
|
||||
if hasattr(choice, "message") and choice.message:
|
||||
text = _content_to_text(choice.message.content)
|
||||
if text:
|
||||
text_parts.append(text)
|
||||
return "\n".join(text_parts)
|
||||
return ""
|
||||
|
||||
|
||||
def _content_to_text(content: Any) -> str:
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
text_parts = [
|
||||
block.get("text")
|
||||
for block in content
|
||||
if isinstance(block, dict) and isinstance(block.get("text"), str)
|
||||
]
|
||||
return " ".join(part for part in text_parts if part)
|
||||
return ""
|
||||
|
||||
|
||||
|
||||
@ -480,21 +480,32 @@ class XecGuardGuardrail(CustomGuardrail):
|
||||
choices = response.get("choices")
|
||||
if not choices:
|
||||
return None
|
||||
first = choices[0]
|
||||
if hasattr(first, "message"):
|
||||
message = first.message
|
||||
elif isinstance(first, dict):
|
||||
message = first.get("message")
|
||||
text_parts: List[str] = []
|
||||
for choice in choices:
|
||||
content = XecGuardGuardrail._extract_choice_content(choice)
|
||||
text = XecGuardGuardrail._content_to_text(content)
|
||||
if text:
|
||||
text_parts.append(text)
|
||||
return "\n".join(text_parts) or None
|
||||
|
||||
@staticmethod
|
||||
def _extract_choice_content(choice: Any) -> Any:
|
||||
if hasattr(choice, "message"):
|
||||
message = choice.message
|
||||
elif isinstance(choice, dict):
|
||||
message = choice.get("message")
|
||||
else:
|
||||
return None
|
||||
if message is None:
|
||||
return None
|
||||
if hasattr(message, "content"):
|
||||
content = message.content
|
||||
elif isinstance(message, dict):
|
||||
content = message.get("content")
|
||||
else:
|
||||
return None
|
||||
return message.content
|
||||
if isinstance(message, dict):
|
||||
return message.get("content")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _content_to_text(content: Any) -> Optional[str]:
|
||||
if isinstance(content, str) and content:
|
||||
return content
|
||||
if isinstance(content, list):
|
||||
|
||||
@ -1447,14 +1447,11 @@ def callback_name(callback):
|
||||
return str(callback)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/health/readiness",
|
||||
tags=["health"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def health_readiness(response: Response):
|
||||
async def _get_health_readiness_details(
|
||||
response: Optional[Response] = None,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Unprotected endpoint for checking if worker can receive requests
|
||||
Detailed health payload for authenticated diagnostics.
|
||||
"""
|
||||
from litellm.proxy.proxy_server import prisma_client, version
|
||||
|
||||
@ -1473,7 +1470,7 @@ async def health_readiness(response: Response):
|
||||
success_callback_names = litellm.success_callback
|
||||
|
||||
# check Cache
|
||||
cache_type = None
|
||||
cache_type: Any = None
|
||||
if litellm.cache is not None:
|
||||
from litellm.caching.caching import RedisSemanticCache
|
||||
|
||||
@ -1482,6 +1479,7 @@ async def health_readiness(response: Response):
|
||||
if isinstance(litellm.cache.cache, RedisSemanticCache):
|
||||
# ping the cache
|
||||
# TODO: @ishaan-jaff - we should probably not ping the cache on every /health/readiness check
|
||||
index_info: Any
|
||||
try:
|
||||
index_info = await litellm.cache.cache._index_info()
|
||||
except Exception as e:
|
||||
@ -1499,7 +1497,7 @@ async def health_readiness(response: Response):
|
||||
# serve requests that depend on persisted state (keys, budgets,
|
||||
# spend logs). Return 503 so orchestrators take this pod out of
|
||||
# rotation; "Not connected" (no DB configured at all) stays 200.
|
||||
if db_health_status["status"] != "connected":
|
||||
if response is not None and db_health_status["status"] != "connected":
|
||||
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
|
||||
return {
|
||||
"status": "healthy",
|
||||
@ -1526,6 +1524,52 @@ async def health_readiness(response: Response):
|
||||
raise HTTPException(status_code=503, detail=f"Service Unhealthy ({str(e)})")
|
||||
|
||||
|
||||
def _allow_public_health_readiness_details() -> bool:
|
||||
from litellm.proxy.proxy_server import general_settings
|
||||
|
||||
return general_settings.get("allow_public_health_readiness_details") is True
|
||||
|
||||
|
||||
async def _set_public_readiness_status(response: Response) -> None:
|
||||
from litellm.proxy.proxy_server import prisma_client
|
||||
|
||||
if prisma_client is None:
|
||||
return
|
||||
|
||||
db_health_status = await _db_health_readiness_check()
|
||||
if db_health_status["status"] != "connected":
|
||||
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
|
||||
|
||||
|
||||
@router.get(
|
||||
"/health/readiness",
|
||||
tags=["health"],
|
||||
)
|
||||
async def health_readiness(response: Response):
|
||||
"""
|
||||
Public readiness probe. Keep this low-detail for unauthenticated load
|
||||
balancers by default. Admins can opt into the legacy detailed public
|
||||
payload with general_settings.allow_public_health_readiness_details.
|
||||
"""
|
||||
if _allow_public_health_readiness_details():
|
||||
return await _get_health_readiness_details(response=response)
|
||||
|
||||
await _set_public_readiness_status(response=response)
|
||||
return {"status": "healthy"}
|
||||
|
||||
|
||||
@router.get(
|
||||
"/health/readiness/details",
|
||||
tags=["health"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def health_readiness_details(response: Response):
|
||||
"""
|
||||
Authenticated readiness diagnostics with DB/cache/callback metadata.
|
||||
"""
|
||||
return await _get_health_readiness_details(response=response)
|
||||
|
||||
|
||||
@router.get(
|
||||
"/health/backlog",
|
||||
tags=["health"],
|
||||
@ -1561,7 +1605,6 @@ async def health_liveliness():
|
||||
@router.options(
|
||||
"/health/readiness",
|
||||
tags=["health"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
)
|
||||
async def health_readiness_options():
|
||||
"""
|
||||
|
||||
@ -8,6 +8,10 @@ from litellm._logging import verbose_proxy_logger
|
||||
from litellm.caching.caching import DualCache
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.guardrails._content_utils import (
|
||||
is_text_content_call_type,
|
||||
iter_message_text,
|
||||
)
|
||||
|
||||
|
||||
class _PROXY_AzureContentSafety(
|
||||
@ -118,10 +122,9 @@ class _PROXY_AzureContentSafety(
|
||||
):
|
||||
verbose_proxy_logger.debug("Inside Azure Content-Safety Pre-Call Hook")
|
||||
try:
|
||||
if call_type == "completion" and "messages" in data:
|
||||
for m in data["messages"]:
|
||||
if "content" in m and isinstance(m["content"], str):
|
||||
await self.test_violation(content=m["content"], source="input")
|
||||
if is_text_content_call_type(call_type):
|
||||
for text in iter_message_text(data):
|
||||
await self.test_violation(content=text, source="input")
|
||||
|
||||
except HTTPException as e:
|
||||
raise e
|
||||
@ -140,12 +143,16 @@ class _PROXY_AzureContentSafety(
|
||||
response,
|
||||
):
|
||||
verbose_proxy_logger.debug("Inside Azure Content-Safety Post-Call Hook")
|
||||
if isinstance(response, litellm.ModelResponse) and isinstance(
|
||||
response.choices[0], litellm.utils.Choices
|
||||
):
|
||||
await self.test_violation(
|
||||
content=response.choices[0].message.content or "", source="output"
|
||||
)
|
||||
if not isinstance(response, litellm.ModelResponse):
|
||||
return
|
||||
|
||||
for choice in response.choices:
|
||||
if not isinstance(choice, litellm.utils.Choices):
|
||||
continue
|
||||
message = getattr(choice, "message", None)
|
||||
content = getattr(message, "content", None)
|
||||
if isinstance(content, str):
|
||||
await self.test_violation(content=content, source="output")
|
||||
|
||||
# async def async_post_call_streaming_hook(
|
||||
# self,
|
||||
|
||||
@ -61,6 +61,7 @@ from litellm.secret_managers.main import get_secret_bool
|
||||
from litellm.types.llms.anthropic import ANTHROPIC_API_HEADERS
|
||||
from litellm.types.services import ServiceTypes
|
||||
from litellm.types.utils import (
|
||||
CustomPricingLiteLLMParams,
|
||||
LlmProviders,
|
||||
ProviderSpecificHeader,
|
||||
StandardLoggingUserAPIKeyMetadata,
|
||||
@ -168,6 +169,20 @@ _ALLOW_CLIENT_MESSAGE_REDACTION_OPT_OUT_METADATA_KEY = (
|
||||
"allow_client_message_redaction_opt_out"
|
||||
)
|
||||
|
||||
# Per-request pricing parameters mutate cost-tracking output and (via
|
||||
# ``litellm.completion`` → ``register_model``) the process-wide
|
||||
# ``litellm.model_cost`` map. Both effects belong to deployment configuration,
|
||||
# not to user-supplied request bodies, so the proxy strips them before they
|
||||
# reach the call path. Built from the Pydantic model so newly-added pricing
|
||||
# fields are covered automatically.
|
||||
_CLIENT_PRICING_CONTROL_FIELDS = frozenset(
|
||||
CustomPricingLiteLLMParams.model_fields.keys()
|
||||
)
|
||||
# ``model_info`` carries the same pricing fields when read by
|
||||
# ``use_custom_pricing_for_model``; strip from metadata for the same reason.
|
||||
_CLIENT_PRICING_METADATA_FIELDS = frozenset({"model_info"})
|
||||
_ALLOW_CLIENT_PRICING_OVERRIDE_METADATA_KEY = "allow_client_pricing_override"
|
||||
|
||||
# Request fields whose value, when URL-valued, becomes the outbound destination
|
||||
# for a provider call. Letting a proxy caller pin the destination is an SSRF
|
||||
# primitive (HuggingFace/Oobabooga `model`, Gemini files `file_id`); guard
|
||||
@ -265,6 +280,46 @@ def _key_or_team_allows_client_message_redaction_opt_out(
|
||||
)
|
||||
|
||||
|
||||
def _key_or_team_allows_client_pricing_override(
|
||||
user_api_key_dict: UserAPIKeyAuth,
|
||||
) -> bool:
|
||||
return _key_or_team_metadata_flag_is_true(
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
metadata_key=_ALLOW_CLIENT_PRICING_OVERRIDE_METADATA_KEY,
|
||||
)
|
||||
|
||||
|
||||
def _strip_client_pricing_overrides(data: Dict[str, Any]) -> None:
|
||||
"""Drop pricing overrides from the request body and any metadata variant.
|
||||
|
||||
Skipped only when the calling key/team carries
|
||||
``allow_client_pricing_override: True`` in its metadata. Emits a
|
||||
``debug``-level log line naming the dropped fields so operators can
|
||||
trace why a client-supplied pricing override stopped being applied
|
||||
(otherwise the strip is invisible from the caller's perspective).
|
||||
"""
|
||||
stripped: List[str] = []
|
||||
for field in _CLIENT_PRICING_CONTROL_FIELDS:
|
||||
if field in data:
|
||||
stripped.append(field)
|
||||
data.pop(field, None)
|
||||
for metadata_key in ("metadata", "litellm_metadata"):
|
||||
metadata = data.get(metadata_key)
|
||||
if not isinstance(metadata, dict):
|
||||
continue
|
||||
for field in _CLIENT_PRICING_METADATA_FIELDS:
|
||||
if field in metadata:
|
||||
stripped.append(f"{metadata_key}.{field}")
|
||||
metadata.pop(field, None)
|
||||
if stripped:
|
||||
verbose_proxy_logger.debug(
|
||||
"Stripped client-supplied pricing fields from request body: %s. "
|
||||
"Set `allow_client_pricing_override: true` on the key or team "
|
||||
"metadata to keep these values.",
|
||||
", ".join(stripped),
|
||||
)
|
||||
|
||||
|
||||
def _get_metadata_variable_name(request: Request) -> str:
|
||||
"""
|
||||
Helper to return what the "metadata" field should be called in the request data
|
||||
@ -1364,6 +1419,14 @@ async def add_litellm_data_to_request( # noqa: PLR0915
|
||||
]:
|
||||
_user_meta.pop(_k, None)
|
||||
|
||||
# Strip pricing overrides AFTER the litellm_metadata string-to-dict parse
|
||||
# above, for the same reason as the user_api_key_* strip — JSON-string
|
||||
# metadata (sent via multipart/form-data or extra_body) wouldn't be a
|
||||
# dict yet at the earlier strip point and the isinstance(dict) guard
|
||||
# would silently skip the field.
|
||||
if not _key_or_team_allows_client_pricing_override(user_api_key_dict):
|
||||
_strip_client_pricing_overrides(data)
|
||||
|
||||
# Strip caller-supplied routing/budget tags unless the admin has opted
|
||||
# this key or team in via metadata.allow_client_tags=True. Tags drive
|
||||
# tag-based routing and tag budget attribution — accepting them from
|
||||
|
||||
@ -104,11 +104,16 @@ async def get_router_settings(
|
||||
config = await proxy_config.get_config()
|
||||
router_settings_from_config = config.get("router_settings", {})
|
||||
|
||||
# Get current values from llm_router if initialized
|
||||
current_values = {}
|
||||
current_values: Dict[str, Any] = {}
|
||||
if llm_router is not None:
|
||||
# Check all field names from the fields list
|
||||
# Router exposes routing groups as private `_routing_groups`; the
|
||||
# generic `hasattr` loop below would miss them.
|
||||
current_values["routing_groups"] = [
|
||||
group.model_dump() for group in llm_router._routing_groups.values()
|
||||
]
|
||||
for field in router_fields:
|
||||
if field.field_name == "routing_groups":
|
||||
continue
|
||||
if hasattr(llm_router, field.field_name):
|
||||
value = getattr(llm_router, field.field_name)
|
||||
current_values[field.field_name] = value
|
||||
|
||||
@ -20,13 +20,13 @@ class PrometheusAuthMiddleware:
|
||||
"""
|
||||
Middleware to authenticate requests to the metrics endpoint.
|
||||
|
||||
By default, auth is not run on the metrics endpoint.
|
||||
By default, auth is run on the metrics endpoint.
|
||||
|
||||
Enabled by setting the following in proxy_config.yaml:
|
||||
To allow unauthenticated metrics in proxy_config.yaml:
|
||||
|
||||
```yaml
|
||||
litellm_settings:
|
||||
require_auth_for_metrics_endpoint: true
|
||||
require_auth_for_metrics_endpoint: false
|
||||
```
|
||||
"""
|
||||
|
||||
@ -39,8 +39,8 @@ class PrometheusAuthMiddleware:
|
||||
await self.app(scope, receive, send)
|
||||
return
|
||||
|
||||
# Only run auth if configured to do so
|
||||
if litellm.require_auth_for_metrics_endpoint is True:
|
||||
# Run auth by default; allow legacy public metrics only when explicitly disabled.
|
||||
if litellm.require_auth_for_metrics_endpoint is not False:
|
||||
# user_api_key_auth reads the request body, which consumes ASGI `receive`.
|
||||
# Buffer those messages and replay them for the inner app; otherwise a
|
||||
# successful auth would forward an exhausted receive and /metrics hangs.
|
||||
@ -52,10 +52,29 @@ class PrometheusAuthMiddleware:
|
||||
return message
|
||||
|
||||
request = Request(scope, receive_for_auth)
|
||||
api_key = request.headers.get(_AUTHORIZATION_HEADER) or ""
|
||||
|
||||
try:
|
||||
await user_api_key_auth(request=request, api_key=api_key)
|
||||
await user_api_key_auth(
|
||||
request=request,
|
||||
api_key=request.headers.get(_AUTHORIZATION_HEADER) or "",
|
||||
azure_api_key_header=request.headers.get(
|
||||
SpecialHeaders.azure_authorization.value
|
||||
)
|
||||
or "",
|
||||
anthropic_api_key_header=request.headers.get(
|
||||
SpecialHeaders.anthropic_authorization.value
|
||||
),
|
||||
google_ai_studio_api_key_header=request.headers.get(
|
||||
SpecialHeaders.google_ai_studio_authorization.value
|
||||
),
|
||||
azure_apim_header=request.headers.get(
|
||||
SpecialHeaders.azure_apim_authorization.value
|
||||
)
|
||||
or "",
|
||||
custom_litellm_key_header=request.headers.get(
|
||||
SpecialHeaders.custom_litellm_api_key.value
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
# Send 401 response directly via ASGI protocol
|
||||
error_message = getattr(e, "message", str(e))
|
||||
|
||||
@ -5,7 +5,7 @@ from importlib.resources import files
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import litellm
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from litellm._logging import verbose_logger
|
||||
from litellm.litellm_core_utils.get_blog_posts import (
|
||||
@ -14,8 +14,9 @@ from litellm.litellm_core_utils.get_blog_posts import (
|
||||
GetBlogPosts,
|
||||
get_blog_posts,
|
||||
)
|
||||
from litellm.proxy._types import CommonProxyErrors
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy._types import (
|
||||
CommonProxyErrors,
|
||||
)
|
||||
from litellm.types.agents import AgentCard
|
||||
from litellm.types.mcp import MCPPublicServer
|
||||
from litellm.types.proxy.management_endpoints.model_management_endpoints import (
|
||||
@ -31,6 +32,7 @@ from litellm.types.utils import LlmProviders
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /public/endpoints — helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
@ -153,7 +155,6 @@ def _load_endpoints() -> List[Dict[str, Any]]:
|
||||
@router.get(
|
||||
"/public/model_hub",
|
||||
tags=["public", "model management"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=List[ModelGroupInfoProxy],
|
||||
)
|
||||
async def public_model_hub():
|
||||
@ -208,7 +209,6 @@ async def public_model_hub():
|
||||
@router.get(
|
||||
"/public/agent_hub",
|
||||
tags=["[beta] Agents", "public"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=List[AgentCard],
|
||||
)
|
||||
async def get_agents():
|
||||
@ -230,7 +230,6 @@ async def get_agents():
|
||||
@router.get(
|
||||
"/public/mcp_hub",
|
||||
tags=["[beta] MCP", "public"],
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=List[MCPPublicServer],
|
||||
)
|
||||
async def get_mcp_servers():
|
||||
|
||||
@ -3079,7 +3079,11 @@ async def global_spend_models(
|
||||
return response
|
||||
|
||||
|
||||
@router.get("/provider/budgets", response_model=ProviderBudgetResponse)
|
||||
@router.get(
|
||||
"/provider/budgets",
|
||||
dependencies=[Depends(user_api_key_auth)],
|
||||
response_model=ProviderBudgetResponse,
|
||||
)
|
||||
async def provider_budgets() -> ProviderBudgetResponse:
|
||||
"""
|
||||
Provider Budget Routing - Get Budget, Spend Details https://docs.litellm.ai/docs/proxy/provider_budget_routing
|
||||
|
||||
@ -99,6 +99,11 @@ class UISettings(BaseModel):
|
||||
description="If true, requires authentication for accessing the public AI Hub.",
|
||||
)
|
||||
|
||||
allow_public_health_readiness_details: bool = Field(
|
||||
default=False,
|
||||
description="If true, returns the legacy detailed payload from the unauthenticated /health/readiness endpoint.",
|
||||
)
|
||||
|
||||
forward_client_headers_to_llm_api: bool = Field(
|
||||
default=False,
|
||||
description=(
|
||||
@ -169,6 +174,7 @@ ALLOWED_UI_SETTINGS_FIELDS = {
|
||||
"disable_team_admin_delete_team_user",
|
||||
"enabled_ui_pages_internal_users",
|
||||
"require_auth_for_public_ai_hub",
|
||||
"allow_public_health_readiness_details",
|
||||
"forward_client_headers_to_llm_api",
|
||||
"forward_llm_provider_auth_headers",
|
||||
"disable_agents_for_internal_users",
|
||||
@ -183,6 +189,7 @@ ALLOWED_UI_SETTINGS_FIELDS = {
|
||||
# Flags that must be synced from the persisted UISettings into
|
||||
# general_settings at runtime (on both read and write).
|
||||
_RUNTIME_GENERAL_SETTINGS_FLAGS = [
|
||||
"allow_public_health_readiness_details",
|
||||
"forward_client_headers_to_llm_api",
|
||||
"forward_llm_provider_auth_headers",
|
||||
"disable_agents_for_internal_users",
|
||||
|
||||
@ -1052,11 +1052,17 @@ class Router:
|
||||
strategy = self._normalize_strategy(self.routing_strategy)
|
||||
attr = self._DEFAULT_SELECTOR_ATTR_BY_STRATEGY.get(strategy or "")
|
||||
selector = getattr(self, attr, None) if attr is not None else None
|
||||
verbose_router_logger.debug(
|
||||
"routing_group=default model=%s strategy=%s", model, strategy
|
||||
)
|
||||
return strategy, selector
|
||||
|
||||
group = self._routing_groups[group_name]
|
||||
strategy = self._normalize_strategy(group.routing_strategy)
|
||||
selector = self._group_selectors.get(group_name, {}).get(strategy or "")
|
||||
verbose_router_logger.debug(
|
||||
"routing_group=%s model=%s strategy=%s", group_name, model, strategy
|
||||
)
|
||||
return strategy, selector
|
||||
|
||||
async def _select_deployment_async(
|
||||
|
||||
@ -112,6 +112,14 @@ ROUTER_SETTINGS_FIELDS: List[RouterSettingsField] = [
|
||||
field_default={},
|
||||
ui_field_name="Routing Strategy Args",
|
||||
),
|
||||
RouterSettingsField(
|
||||
field_name="routing_groups",
|
||||
field_type="List",
|
||||
field_value=None,
|
||||
field_description="Named subsets of model_names that share a routing strategy. Models not claimed by an explicit group fall through to the top-level routing_strategy.",
|
||||
field_default=[],
|
||||
ui_field_name="Routing Groups",
|
||||
),
|
||||
RouterSettingsField(
|
||||
field_name="num_retries",
|
||||
field_type="Integer",
|
||||
|
||||
@ -2243,12 +2243,52 @@ def encode(model="", text="", custom_tokenizer: Optional[dict] = None):
|
||||
return enc
|
||||
|
||||
|
||||
def decode(model="", tokens: List[int] = [], custom_tokenizer: Optional[dict] = None):
|
||||
def decode(
|
||||
model="",
|
||||
tokens: List[int] = [],
|
||||
custom_tokenizer: Optional[dict] = None,
|
||||
skip_special_tokens: bool = True,
|
||||
):
|
||||
"""
|
||||
Decodes token ids using the selected tokenizer.
|
||||
|
||||
Args:
|
||||
skip_special_tokens: For HuggingFace tokenizers, keep the historical
|
||||
LiteLLM round-trip behavior by omitting special tokens by default.
|
||||
Set to False to inspect decoded BOS/EOS tokens.
|
||||
"""
|
||||
tokenizer_json = custom_tokenizer or _select_tokenizer(model=model)
|
||||
if tokenizer_json["type"] == "huggingface_tokenizer":
|
||||
if skip_special_tokens:
|
||||
tokens = _strip_huggingface_special_token_ids(
|
||||
tokenizer_json["tokenizer"], tokens
|
||||
)
|
||||
dec = tokenizer_json["tokenizer"].decode(
|
||||
tokens, skip_special_tokens=skip_special_tokens
|
||||
)
|
||||
return dec
|
||||
dec = tokenizer_json["tokenizer"].decode(tokens)
|
||||
return dec
|
||||
|
||||
|
||||
def _strip_huggingface_special_token_ids(
|
||||
tokenizer: Tokenizer, tokens: List[int]
|
||||
) -> List[int]:
|
||||
try:
|
||||
added_tokens_decoder = tokenizer.get_added_tokens_decoder()
|
||||
except Exception:
|
||||
return tokens
|
||||
|
||||
special_token_ids = {
|
||||
token_id
|
||||
for token_id, added_token in added_tokens_decoder.items()
|
||||
if getattr(added_token, "special", False)
|
||||
}
|
||||
if not special_token_ids:
|
||||
return tokens
|
||||
return [token for token in tokens if token not in special_token_ids]
|
||||
|
||||
|
||||
def create_pretrained_tokenizer(
|
||||
identifier: str, revision="main", auth_token: Optional[str] = None
|
||||
):
|
||||
|
||||
172
package-lock.json
generated
172
package-lock.json
generated
@ -1,19 +1,19 @@
|
||||
{
|
||||
"name": "litellm",
|
||||
"name": "litellm-dependency-refresh",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"dependencies": {
|
||||
"prism-react-renderer": "^2.4.1",
|
||||
"prisma": "^5.17.0",
|
||||
"react-copy-to-clipboard": "^5.1.0"
|
||||
"prism-react-renderer": "2.4.1",
|
||||
"prisma": "5.17.0",
|
||||
"react-copy-to-clipboard": "5.1.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@testing-library/jest-dom": "^6.8.0",
|
||||
"@testing-library/react": "^14.3.1",
|
||||
"@types/react-copy-to-clipboard": "^5.0.7",
|
||||
"jest": "^29.7.0"
|
||||
"@testing-library/jest-dom": "6.8.0",
|
||||
"@testing-library/react": "14.3.1",
|
||||
"@types/react-copy-to-clipboard": "5.0.7",
|
||||
"jest": "29.7.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@adobe/css-tools": {
|
||||
@ -529,29 +529,6 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@isaacs/balanced-match": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz",
|
||||
"integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": "20 || >=22"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/brace-expansion": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.0.tgz",
|
||||
"integrity": "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@isaacs/balanced-match": "^4.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": "20 || >=22"
|
||||
}
|
||||
},
|
||||
"node_modules/@istanbuljs/load-nyc-config": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz",
|
||||
@ -957,48 +934,48 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@prisma/debug": {
|
||||
"version": "5.22.0",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/debug/-/debug-5.22.0.tgz",
|
||||
"integrity": "sha512-AUt44v3YJeggO2ZU5BkXI7M4hu9BF2zzH2iF2V5pyXT/lRTyWiElZ7It+bRH1EshoMRxHgpYg4VB6rCM+mG5jQ==",
|
||||
"version": "5.17.0",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/debug/-/debug-5.17.0.tgz",
|
||||
"integrity": "sha512-l7+AteR3P8FXiYyo496zkuoiJ5r9jLQEdUuxIxNCN1ud8rdbH3GTxm+f+dCyaSv9l9WY+29L9czaVRXz9mULfg==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@prisma/engines": {
|
||||
"version": "5.22.0",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/engines/-/engines-5.22.0.tgz",
|
||||
"integrity": "sha512-UNjfslWhAt06kVL3CjkuYpHAWSO6L4kDCVPegV6itt7nD1kSJavd3vhgAEhjglLJJKEdJ7oIqDJ+yHk6qO8gPA==",
|
||||
"version": "5.17.0",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/engines/-/engines-5.17.0.tgz",
|
||||
"integrity": "sha512-+r+Nf+JP210Jur+/X8SIPLtz+uW9YA4QO5IXA+KcSOBe/shT47bCcRMTYCbOESw3FFYFTwe7vU6KTWHKPiwvtg==",
|
||||
"hasInstallScript": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@prisma/debug": "5.22.0",
|
||||
"@prisma/engines-version": "5.22.0-44.605197351a3c8bdd595af2d2a9bc3025bca48ea2",
|
||||
"@prisma/fetch-engine": "5.22.0",
|
||||
"@prisma/get-platform": "5.22.0"
|
||||
"@prisma/debug": "5.17.0",
|
||||
"@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
|
||||
"@prisma/fetch-engine": "5.17.0",
|
||||
"@prisma/get-platform": "5.17.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@prisma/engines-version": {
|
||||
"version": "5.22.0-44.605197351a3c8bdd595af2d2a9bc3025bca48ea2",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/engines-version/-/engines-version-5.22.0-44.605197351a3c8bdd595af2d2a9bc3025bca48ea2.tgz",
|
||||
"integrity": "sha512-2PTmxFR2yHW/eB3uqWtcgRcgAbG1rwG9ZriSvQw+nnb7c4uCr3RAcGMb6/zfE88SKlC1Nj2ziUvc96Z379mHgQ==",
|
||||
"version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/engines-version/-/engines-version-5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053.tgz",
|
||||
"integrity": "sha512-tUuxZZysZDcrk5oaNOdrBnnkoTtmNQPkzINFDjz7eG6vcs9AVDmA/F6K5Plsb2aQc/l5M2EnFqn3htng9FA4hg==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@prisma/fetch-engine": {
|
||||
"version": "5.22.0",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/fetch-engine/-/fetch-engine-5.22.0.tgz",
|
||||
"integrity": "sha512-bkrD/Mc2fSvkQBV5EpoFcZ87AvOgDxbG99488a5cexp5Ccny+UM6MAe/UFkUC0wLYD9+9befNOqGiIJhhq+HbA==",
|
||||
"version": "5.17.0",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/fetch-engine/-/fetch-engine-5.17.0.tgz",
|
||||
"integrity": "sha512-ESxiOaHuC488ilLPnrv/tM2KrPhQB5TRris/IeIV4ZvUuKeaicCl4Xj/JCQeG9IlxqOgf1cCg5h5vAzlewN91Q==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@prisma/debug": "5.22.0",
|
||||
"@prisma/engines-version": "5.22.0-44.605197351a3c8bdd595af2d2a9bc3025bca48ea2",
|
||||
"@prisma/get-platform": "5.22.0"
|
||||
"@prisma/debug": "5.17.0",
|
||||
"@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
|
||||
"@prisma/get-platform": "5.17.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@prisma/get-platform": {
|
||||
"version": "5.22.0",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/get-platform/-/get-platform-5.22.0.tgz",
|
||||
"integrity": "sha512-pHhpQdr1UPFpt+zFfnPazhulaZYCUqeIcPpJViYoq9R+D/yw4fjE+CtnsnKzPYm0ddUbeXUzjGVGIRVgPDCk4Q==",
|
||||
"version": "5.17.0",
|
||||
"resolved": "https://registry.npmjs.org/@prisma/get-platform/-/get-platform-5.17.0.tgz",
|
||||
"integrity": "sha512-UlDgbRozCP1rfJ5Tlkf3Cnftb6srGrEQ4Nm3og+1Se2gWmCZ0hmPIi+tQikGDUVLlvOWx3Gyi9LzgRP+HTXV9w==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@prisma/debug": "5.22.0"
|
||||
"@prisma/debug": "5.17.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@sinclair/typebox": {
|
||||
@ -1066,9 +1043,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@testing-library/jest-dom": {
|
||||
"version": "6.9.1",
|
||||
"resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.9.1.tgz",
|
||||
"integrity": "sha512-zIcONa+hVtVSSep9UT3jZ5rizo2BsxgyDYU7WFD5eICBE7no3881HGeb/QkGfsJs6JTkY1aQhT7rIPC7e+0nnA==",
|
||||
"version": "6.8.0",
|
||||
"resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.8.0.tgz",
|
||||
"integrity": "sha512-WgXcWzVM6idy5JaftTVC8Vs83NKRmGJz4Hqs4oyOuO2J4r/y79vvKZsb+CaGyCSEbUPI6OsewfPd0G1A0/TUZQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
@ -1497,11 +1474,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/balanced-match": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
|
||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
|
||||
"integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": "18 || 20 || >=22"
|
||||
}
|
||||
},
|
||||
"node_modules/baseline-browser-mapping": {
|
||||
"version": "2.8.30",
|
||||
@ -1514,14 +1494,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"version": "5.0.5",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
|
||||
"integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
"balanced-match": "^4.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": "18 || 20 || >=22"
|
||||
}
|
||||
},
|
||||
"node_modules/braces": {
|
||||
@ -1791,13 +1773,6 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/concat-map": {
|
||||
"version": "0.0.1",
|
||||
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
|
||||
"integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/convert-source-map": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
|
||||
@ -2258,6 +2233,7 @@
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
|
||||
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
@ -4090,16 +4066,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/minimatch": {
|
||||
"version": "10.1.1",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.1.1.tgz",
|
||||
"integrity": "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==",
|
||||
"version": "10.2.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz",
|
||||
"integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==",
|
||||
"dev": true,
|
||||
"license": "BlueOak-1.0.0",
|
||||
"dependencies": {
|
||||
"@isaacs/brace-expansion": "^5.0.0"
|
||||
"brace-expansion": "^5.0.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": "20 || >=22"
|
||||
"node": "18 || 20 || >=22"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
@ -4388,9 +4364,9 @@
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/picomatch": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
|
||||
"integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
|
||||
"integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
@ -4475,22 +4451,19 @@
|
||||
}
|
||||
},
|
||||
"node_modules/prisma": {
|
||||
"version": "5.22.0",
|
||||
"resolved": "https://registry.npmjs.org/prisma/-/prisma-5.22.0.tgz",
|
||||
"integrity": "sha512-vtpjW3XuYCSnMsNVBjLMNkTj6OZbudcPPTPYHqX0CJfpcdWciI1dM8uHETwmDxxiqEwCIE6WvXucWUetJgfu/A==",
|
||||
"version": "5.17.0",
|
||||
"resolved": "https://registry.npmjs.org/prisma/-/prisma-5.17.0.tgz",
|
||||
"integrity": "sha512-m4UWkN5lBE6yevqeOxEvmepnL5cNPEjzMw2IqDB59AcEV6w7D8vGljDLd1gPFH+W6gUxw9x7/RmN5dCS/WTPxA==",
|
||||
"hasInstallScript": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@prisma/engines": "5.22.0"
|
||||
"@prisma/engines": "5.17.0"
|
||||
},
|
||||
"bin": {
|
||||
"prisma": "build/index.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16.13"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"fsevents": "2.3.3"
|
||||
}
|
||||
},
|
||||
"node_modules/prompts": {
|
||||
@ -4555,16 +4528,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/react-copy-to-clipboard": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.0.tgz",
|
||||
"integrity": "sha512-k61RsNgAayIJNoy9yDsYzDe/yAZAzEbEgcz3DZMhF686LEyukcE1hzurxe85JandPUG+yTfGVFzuEw3xt8WP/A==",
|
||||
"version": "5.1.1",
|
||||
"resolved": "https://registry.npmjs.org/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.1.tgz",
|
||||
"integrity": "sha512-s+HrzLyJBxrpGTYXF15dTgMjAJpEPZT/Yp6NytAtZMRngejxt6Pt5WrfFxLAcsqUDU6sY1Jz6tyHwIicE1U2Xg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"copy-to-clipboard": "^3.3.1",
|
||||
"copy-to-clipboard": "^3.3.3",
|
||||
"prop-types": "^15.8.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "^15.3.0 || 16 || 17 || 18"
|
||||
"react": ">=15.3.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-dom": {
|
||||
@ -5068,19 +5041,6 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/test-exclude/node_modules/minimatch": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
|
||||
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^1.1.7"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/tmpl": {
|
||||
"version": "1.0.5",
|
||||
"resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz",
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
"dependencies": {
|
||||
"prism-react-renderer": "2.4.1",
|
||||
"prisma": "5.17.0",
|
||||
"react-copy-to-clipboard": "5.1.0"
|
||||
"react-copy-to-clipboard": "5.1.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@testing-library/jest-dom": "6.8.0",
|
||||
@ -12,7 +12,8 @@
|
||||
},
|
||||
"overrides": {
|
||||
"glob": "13.0.0",
|
||||
"minimatch": "10.1.1",
|
||||
"minimatch": "10.2.5",
|
||||
"picomatch": "2.3.2",
|
||||
"@isaacs/brace-expansion": "5.0.0",
|
||||
"@babel/traverse": "7.28.5",
|
||||
"braces": "3.0.3"
|
||||
|
||||
@ -12,11 +12,11 @@ authors = [
|
||||
dependencies = [
|
||||
"fastuuid==0.14.0",
|
||||
"httpx==0.28.1",
|
||||
"openai==2.24.0",
|
||||
"openai==2.33.0",
|
||||
"python-dotenv==1.2.2",
|
||||
"tiktoken==0.12.0",
|
||||
"importlib-metadata==8.5.0",
|
||||
"tokenizers==0.22.2",
|
||||
"tokenizers==0.23.1",
|
||||
"click==8.1.8",
|
||||
"jinja2==3.1.6",
|
||||
"aiohttp==3.13.4",
|
||||
@ -44,11 +44,11 @@ proxy = [
|
||||
"apscheduler==3.11.2",
|
||||
"fastapi-sso==0.19.0",
|
||||
"PyJWT==2.12.0",
|
||||
"python-multipart==0.0.26",
|
||||
"python-multipart==0.0.27",
|
||||
"cryptography==46.0.7",
|
||||
"pynacl==1.6.2",
|
||||
"websockets==15.0.1",
|
||||
"boto3==1.42.59",
|
||||
"boto3==1.43.1",
|
||||
"azure-identity==1.25.2",
|
||||
"azure-storage-blob==12.28.0",
|
||||
"mcp==1.26.0",
|
||||
@ -120,9 +120,9 @@ dev = [
|
||||
"flake8==7.3.0",
|
||||
"black==24.10.0",
|
||||
"mypy==1.19.0",
|
||||
"pytest==8.3.5",
|
||||
"pytest==9.0.3",
|
||||
"pytest-mock==3.15.1",
|
||||
"pytest-asyncio==1.2.0",
|
||||
"pytest-asyncio==1.3.0",
|
||||
"pytest-postgresql==7.0.2",
|
||||
# pytest-postgresql imports psycopg v3 during pytest startup. Keep the base
|
||||
# package and the binary wheel in the default dev environment so local
|
||||
@ -191,7 +191,7 @@ ci = [
|
||||
"pylint==4.0.5",
|
||||
"pyright==1.1.408",
|
||||
"langchain-mcp-adapters==0.2.1",
|
||||
"langchain-openai==1.1.10",
|
||||
"langchain-openai==1.1.14",
|
||||
"langgraph==1.0.10",
|
||||
# langgraph-prebuilt 1.0.9 imports ExecutionInfo/ServerInfo from
|
||||
# langgraph.runtime, which is not exported until langgraph 1.1.0.
|
||||
@ -205,7 +205,7 @@ healthcheck = [
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["uv_build==0.10.7"]
|
||||
requires = ["uv_build==0.11.8"]
|
||||
build-backend = "uv_build"
|
||||
|
||||
[tool.uv]
|
||||
|
||||
@ -150,6 +150,7 @@ jaraco.context: >=6.1.0 # Unknown license
|
||||
pypdf: >=6.6.2 # BSD-3-Clause license - https://github.com/py-pdf/pypdf/blob/main/LICENSE
|
||||
hf-xet: >=1.4.2 # Apache 2.0 License - https://github.com/huggingface/xet-tools/blob/main/LICENSE
|
||||
pytest-asyncio: >=1.2.0 # Apache 2.0 license
|
||||
pytest: >=9.0.3 # MIT license
|
||||
pytest-postgresql: >=7.0.2 # LGPLv3+ license
|
||||
pytest-xdist: >=3.8.0 # MIT License
|
||||
ruff: >=0.15.3 # MIT License
|
||||
|
||||
@ -171,6 +171,25 @@ class TestHelperFunctions:
|
||||
mock_response.choices[0].message.content = "Hello from LLM"
|
||||
assert _extract_response_text(mock_response) == "Hello from LLM"
|
||||
|
||||
def test_extract_response_text_combines_all_choices(self):
|
||||
from litellm.proxy.guardrails.guardrail_hooks.semantic_guard.semantic_guard import (
|
||||
_extract_response_text,
|
||||
)
|
||||
|
||||
first_choice = MagicMock()
|
||||
first_choice.message.content = "first response"
|
||||
second_choice = MagicMock()
|
||||
second_choice.message.content = [
|
||||
{"type": "text", "text": "second"},
|
||||
{"type": "text", "text": "response"},
|
||||
]
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [first_choice, second_choice]
|
||||
|
||||
assert (
|
||||
_extract_response_text(mock_response) == "first response\nsecond response"
|
||||
)
|
||||
|
||||
def test_extract_response_text_empty(self):
|
||||
from litellm.proxy.guardrails.guardrail_hooks.semantic_guard.semantic_guard import (
|
||||
_extract_response_text,
|
||||
|
||||
@ -18,6 +18,7 @@ from litellm import Router
|
||||
|
||||
# this tests debug logs from litellm router and litellm proxy server
|
||||
from litellm._logging import verbose_logger, verbose_proxy_logger, verbose_router_logger
|
||||
from litellm.llms.custom_httpx.async_client_cleanup import close_litellm_async_clients
|
||||
|
||||
|
||||
# this tests debug logs from litellm router and litellm proxy server
|
||||
@ -74,6 +75,9 @@ def test_async_fallbacks(caplog):
|
||||
pytest.fail(f"An exception occurred: {e}")
|
||||
finally:
|
||||
router.reset()
|
||||
# Close cached aiohttp/httpx clients before the event loop ends
|
||||
# to prevent "Unclosed client session" / "Unclosed connector" warnings.
|
||||
await close_litellm_async_clients()
|
||||
|
||||
asyncio.run(_make_request())
|
||||
captured_logs = [rec.message for rec in caplog.records]
|
||||
|
||||
@ -3640,7 +3640,7 @@ def test_mock_response_iterator_tool_use():
|
||||
[
|
||||
# "deepseek/deepseek-reasoner",
|
||||
# "anthropic/claude-3-7-sonnet-20250219",
|
||||
"openrouter/anthropic/claude-3.7-sonnet",
|
||||
"openrouter/anthropic/claude-sonnet-4.5",
|
||||
],
|
||||
)
|
||||
def test_reasoning_content_completion(model):
|
||||
|
||||
368
tests/pass_through_tests/package-lock.json
generated
368
tests/pass_through_tests/package-lock.json
generated
@ -8,8 +8,8 @@
|
||||
"name": "litellm-pass-through-tests",
|
||||
"version": "0.0.0",
|
||||
"dependencies": {
|
||||
"@google-cloud/vertexai": "1.9.3",
|
||||
"@google/generative-ai": "0.21.0"
|
||||
"@google-cloud/vertexai": "1.12.0",
|
||||
"@google/generative-ai": "0.24.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"jest": "29.7.0"
|
||||
@ -512,21 +512,46 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@google-cloud/vertexai": {
|
||||
"version": "1.9.3",
|
||||
"resolved": "https://registry.npmjs.org/@google-cloud/vertexai/-/vertexai-1.9.3.tgz",
|
||||
"integrity": "sha512-35o5tIEMLW3JeFJOaaMNR2e5sq+6rpnhrF97PuAxeOm0GlqVTESKhkGj7a5B5mmJSSSU3hUfIhcQCRRsw4Ipzg==",
|
||||
"version": "1.12.0",
|
||||
"resolved": "https://registry.npmjs.org/@google-cloud/vertexai/-/vertexai-1.12.0.tgz",
|
||||
"integrity": "sha512-XMJIk7GIeavFLP5A3YEUlowKa5Y5PZRrnnuTJcqR0k+lFKkv7+IWpdRp+Xbqb8xNDrvQaE2hP2RYPUylyD5EdA==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"@google/genai": "^1.45.0",
|
||||
"google-auth-library": "^9.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@google/genai": {
|
||||
"version": "1.51.0",
|
||||
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.51.0.tgz",
|
||||
"integrity": "sha512-vTZZF3CSimN7cn2zsLpW2p5WF0eZa5Gz69ITMPCNHpPrDlAstOfGifSfi0p/s9Z9400f7xJRkgvkQNrcM7pJ6w==",
|
||||
"hasInstallScript": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"google-auth-library": "^10.3.0",
|
||||
"p-retry": "^4.6.2",
|
||||
"protobufjs": "^7.5.4",
|
||||
"ws": "^8.18.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.25.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@modelcontextprotocol/sdk": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@google/generative-ai": {
|
||||
"version": "0.21.0",
|
||||
"resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.21.0.tgz",
|
||||
"integrity": "sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg==",
|
||||
"version": "0.24.1",
|
||||
"resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz",
|
||||
"integrity": "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
@ -901,6 +926,70 @@
|
||||
"@jridgewell/sourcemap-codec": "^1.4.14"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/aspromise": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
|
||||
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/base64": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
|
||||
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/codegen": {
|
||||
"version": "2.0.5",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.5.tgz",
|
||||
"integrity": "sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/eventemitter": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
|
||||
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/fetch": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
|
||||
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.1",
|
||||
"@protobufjs/inquire": "^1.1.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@protobufjs/float": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
|
||||
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/inquire": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.1.tgz",
|
||||
"integrity": "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/path": {
|
||||
"version": "1.1.2",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
|
||||
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/pool": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
|
||||
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@protobufjs/utf8": {
|
||||
"version": "1.1.1",
|
||||
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.1.tgz",
|
||||
"integrity": "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg==",
|
||||
"license": "BSD-3-Clause"
|
||||
},
|
||||
"node_modules/@sinclair/typebox": {
|
||||
"version": "0.27.10",
|
||||
"resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.10.tgz",
|
||||
@ -1014,12 +1103,17 @@
|
||||
"version": "25.6.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.6.0.tgz",
|
||||
"integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~7.19.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/retry": {
|
||||
"version": "0.12.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz",
|
||||
"integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/stack-utils": {
|
||||
"version": "2.0.3",
|
||||
"resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.3.tgz",
|
||||
@ -1560,6 +1654,15 @@
|
||||
"node": ">= 8"
|
||||
}
|
||||
},
|
||||
"node_modules/data-uri-to-buffer": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
|
||||
"integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 12"
|
||||
}
|
||||
},
|
||||
"node_modules/debug": {
|
||||
"version": "4.4.3",
|
||||
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
|
||||
@ -1785,6 +1888,29 @@
|
||||
"bser": "2.1.1"
|
||||
}
|
||||
},
|
||||
"node_modules/fetch-blob": {
|
||||
"version": "3.2.0",
|
||||
"resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
|
||||
"integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/jimmywarting"
|
||||
},
|
||||
{
|
||||
"type": "paypal",
|
||||
"url": "https://paypal.me/jimmywarting"
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"node-domexception": "^1.0.0",
|
||||
"web-streams-polyfill": "^3.0.3"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^12.20 || >= 14.13"
|
||||
}
|
||||
},
|
||||
"node_modules/fill-range": {
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
|
||||
@ -1812,6 +1938,18 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/formdata-polyfill": {
|
||||
"version": "4.0.10",
|
||||
"resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
|
||||
"integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"fetch-blob": "^3.1.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.20.0"
|
||||
}
|
||||
},
|
||||
"node_modules/fs.realpath": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
|
||||
@ -1845,33 +1983,31 @@
|
||||
}
|
||||
},
|
||||
"node_modules/gaxios": {
|
||||
"version": "6.7.1",
|
||||
"resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz",
|
||||
"integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==",
|
||||
"version": "7.1.4",
|
||||
"resolved": "https://registry.npmjs.org/gaxios/-/gaxios-7.1.4.tgz",
|
||||
"integrity": "sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"extend": "^3.0.2",
|
||||
"https-proxy-agent": "^7.0.1",
|
||||
"is-stream": "^2.0.0",
|
||||
"node-fetch": "^2.6.9",
|
||||
"uuid": "^9.0.1"
|
||||
"node-fetch": "^3.3.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/gcp-metadata": {
|
||||
"version": "6.1.1",
|
||||
"resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz",
|
||||
"integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==",
|
||||
"version": "8.1.2",
|
||||
"resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-8.1.2.tgz",
|
||||
"integrity": "sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"gaxios": "^6.1.1",
|
||||
"google-logging-utils": "^0.0.2",
|
||||
"gaxios": "^7.0.0",
|
||||
"google-logging-utils": "^1.0.0",
|
||||
"json-bigint": "^1.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/gensync": {
|
||||
@ -1940,26 +2076,26 @@
|
||||
}
|
||||
},
|
||||
"node_modules/google-auth-library": {
|
||||
"version": "9.15.1",
|
||||
"resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz",
|
||||
"integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==",
|
||||
"version": "10.6.2",
|
||||
"resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-10.6.2.tgz",
|
||||
"integrity": "sha512-e27Z6EThmVNNvtYASwQxose/G57rkRuaRbQyxM2bvYLLX/GqWZ5chWq2EBoUchJbCc57eC9ArzO5wMsEmWftCw==",
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"base64-js": "^1.3.0",
|
||||
"ecdsa-sig-formatter": "^1.0.11",
|
||||
"gaxios": "^6.1.1",
|
||||
"gcp-metadata": "^6.1.0",
|
||||
"gtoken": "^7.0.0",
|
||||
"gaxios": "^7.1.4",
|
||||
"gcp-metadata": "8.1.2",
|
||||
"google-logging-utils": "1.1.3",
|
||||
"jws": "^4.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/google-logging-utils": {
|
||||
"version": "0.0.2",
|
||||
"resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz",
|
||||
"integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==",
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-1.1.3.tgz",
|
||||
"integrity": "sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==",
|
||||
"license": "Apache-2.0",
|
||||
"engines": {
|
||||
"node": ">=14"
|
||||
@ -1972,19 +2108,6 @@
|
||||
"dev": true,
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/gtoken": {
|
||||
"version": "7.1.0",
|
||||
"resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz",
|
||||
"integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"gaxios": "^6.0.0",
|
||||
"jws": "^4.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=14.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/has-flag": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
|
||||
@ -2144,6 +2267,7 @@
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz",
|
||||
"integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
@ -2963,6 +3087,12 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/long": {
|
||||
"version": "5.3.2",
|
||||
"resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
|
||||
"integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/lru-cache": {
|
||||
"version": "5.1.1",
|
||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
|
||||
@ -3069,24 +3199,42 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/node-domexception": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
|
||||
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
|
||||
"deprecated": "Use your platform's native DOMException instead",
|
||||
"funding": [
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/jimmywarting"
|
||||
},
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://paypal.me/jimmywarting"
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.5.0"
|
||||
}
|
||||
},
|
||||
"node_modules/node-fetch": {
|
||||
"version": "2.7.0",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
|
||||
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
|
||||
"version": "3.3.2",
|
||||
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz",
|
||||
"integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"whatwg-url": "^5.0.0"
|
||||
"data-uri-to-buffer": "^4.0.0",
|
||||
"fetch-blob": "^3.1.4",
|
||||
"formdata-polyfill": "^4.0.10"
|
||||
},
|
||||
"engines": {
|
||||
"node": "4.x || >=6.0.0"
|
||||
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"encoding": "^0.1.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"encoding": {
|
||||
"optional": true
|
||||
}
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/node-fetch"
|
||||
}
|
||||
},
|
||||
"node_modules/node-int64": {
|
||||
@ -3197,6 +3345,19 @@
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/p-retry": {
|
||||
"version": "4.6.2",
|
||||
"resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz",
|
||||
"integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/retry": "0.12.0",
|
||||
"retry": "^0.13.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/p-try": {
|
||||
"version": "2.2.0",
|
||||
"resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
|
||||
@ -3348,6 +3509,30 @@
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/protobufjs": {
|
||||
"version": "7.5.6",
|
||||
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.6.tgz",
|
||||
"integrity": "sha512-M71sTMB146U3u0di3yup8iM+zv8yPRNQVr1KK4tyBitl3qFvEGucq/rGDRShD2rsJhtN02RJaJ7j5X5hmy8SJg==",
|
||||
"hasInstallScript": true,
|
||||
"license": "BSD-3-Clause",
|
||||
"dependencies": {
|
||||
"@protobufjs/aspromise": "^1.1.2",
|
||||
"@protobufjs/base64": "^1.1.2",
|
||||
"@protobufjs/codegen": "^2.0.5",
|
||||
"@protobufjs/eventemitter": "^1.1.0",
|
||||
"@protobufjs/fetch": "^1.1.0",
|
||||
"@protobufjs/float": "^1.0.2",
|
||||
"@protobufjs/inquire": "^1.1.1",
|
||||
"@protobufjs/path": "^1.1.2",
|
||||
"@protobufjs/pool": "^1.1.0",
|
||||
"@protobufjs/utf8": "^1.1.1",
|
||||
"@types/node": ">=13.7.0",
|
||||
"long": "^5.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/pure-rand": {
|
||||
"version": "6.1.0",
|
||||
"resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz",
|
||||
@ -3437,6 +3622,15 @@
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/retry": {
|
||||
"version": "0.13.1",
|
||||
"resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz",
|
||||
"integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 4"
|
||||
}
|
||||
},
|
||||
"node_modules/safe-buffer": {
|
||||
"version": "5.2.1",
|
||||
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
|
||||
@ -3691,12 +3885,6 @@
|
||||
"node": ">=8.0"
|
||||
}
|
||||
},
|
||||
"node_modules/tr46": {
|
||||
"version": "0.0.3",
|
||||
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
|
||||
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/type-detect": {
|
||||
"version": "4.0.8",
|
||||
"resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz",
|
||||
@ -3724,7 +3912,6 @@
|
||||
"version": "7.19.2",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.19.2.tgz",
|
||||
"integrity": "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/update-browserslist-db": {
|
||||
@ -3758,19 +3945,6 @@
|
||||
"browserslist": ">= 4.21.0"
|
||||
}
|
||||
},
|
||||
"node_modules/uuid": {
|
||||
"version": "9.0.1",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||
"funding": [
|
||||
"https://github.com/sponsors/broofa",
|
||||
"https://github.com/sponsors/ctavan"
|
||||
],
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"uuid": "dist/bin/uuid"
|
||||
}
|
||||
},
|
||||
"node_modules/v8-to-istanbul": {
|
||||
"version": "9.3.0",
|
||||
"resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.3.0.tgz",
|
||||
@ -3796,20 +3970,13 @@
|
||||
"makeerror": "1.0.12"
|
||||
}
|
||||
},
|
||||
"node_modules/webidl-conversions": {
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
|
||||
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
|
||||
"license": "BSD-2-Clause"
|
||||
},
|
||||
"node_modules/whatwg-url": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
|
||||
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
|
||||
"node_modules/web-streams-polyfill": {
|
||||
"version": "3.3.3",
|
||||
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
|
||||
"integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"tr46": "~0.0.3",
|
||||
"webidl-conversions": "^3.0.0"
|
||||
"engines": {
|
||||
"node": ">= 8"
|
||||
}
|
||||
},
|
||||
"node_modules/which": {
|
||||
@ -3867,6 +4034,27 @@
|
||||
"node": "^12.13.0 || ^14.15.0 || >=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/ws": {
|
||||
"version": "8.20.0",
|
||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
|
||||
"integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"bufferutil": "^4.0.1",
|
||||
"utf-8-validate": ">=5.0.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"bufferutil": {
|
||||
"optional": true
|
||||
},
|
||||
"utf-8-validate": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/y18n": {
|
||||
"version": "5.0.8",
|
||||
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
|
||||
|
||||
@ -4,10 +4,15 @@
|
||||
"private": true,
|
||||
"description": "JS pass-through tests for Vertex AI / Google AI Studio routes. CI-only; not published.",
|
||||
"dependencies": {
|
||||
"@google-cloud/vertexai": "1.9.3",
|
||||
"@google/generative-ai": "0.21.0"
|
||||
"@google-cloud/vertexai": "1.12.0",
|
||||
"@google/generative-ai": "0.24.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"jest": "29.7.0"
|
||||
},
|
||||
"overrides": {
|
||||
"@google-cloud/vertexai": {
|
||||
"google-auth-library": "10.6.2"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -8,22 +8,22 @@
|
||||
"name": "ui-unit-tests",
|
||||
"version": "1.0.0",
|
||||
"dependencies": {
|
||||
"@ant-design/icons": "^5.0.0",
|
||||
"antd": "^5.12.5",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0"
|
||||
"@ant-design/icons": "5.6.1",
|
||||
"antd": "5.29.1",
|
||||
"react": "18.3.1",
|
||||
"react-dom": "18.3.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@testing-library/jest-dom": "^6.0.0",
|
||||
"@testing-library/react": "^14.0.0",
|
||||
"@types/jest": "^29.5.0",
|
||||
"@types/react": "^18.2.0",
|
||||
"@types/react-dom": "^18.2.0",
|
||||
"identity-obj-proxy": "^3.0.0",
|
||||
"jest": "^29.5.0",
|
||||
"jest-environment-jsdom": "^29.5.0",
|
||||
"ts-jest": "^29.1.0",
|
||||
"typescript": "^5.0.0"
|
||||
"@testing-library/jest-dom": "6.9.1",
|
||||
"@testing-library/react": "14.3.1",
|
||||
"@types/jest": "29.5.14",
|
||||
"@types/react": "18.3.27",
|
||||
"@types/react-dom": "18.3.7",
|
||||
"identity-obj-proxy": "3.0.0",
|
||||
"jest": "29.7.0",
|
||||
"jest-environment-jsdom": "29.7.0",
|
||||
"ts-jest": "29.4.5",
|
||||
"typescript": "5.9.3"
|
||||
}
|
||||
},
|
||||
"node_modules/@adobe/css-tools": {
|
||||
@ -647,29 +647,6 @@
|
||||
"integrity": "sha512-OWORNpfjMsSSUBVrRBVGECkhWcULOAJz9ZW8uK9qgxD+87M7jHRcvh/A96XXNhXTLmKcoYSQtBEX7lHMO7YRwg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@isaacs/balanced-match": {
|
||||
"version": "4.0.1",
|
||||
"resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz",
|
||||
"integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": "20 || >=22"
|
||||
}
|
||||
},
|
||||
"node_modules/@isaacs/brace-expansion": {
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.0.tgz",
|
||||
"integrity": "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@isaacs/balanced-match": "^4.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": "20 || >=22"
|
||||
}
|
||||
},
|
||||
"node_modules/@istanbuljs/load-nyc-config": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz",
|
||||
@ -1326,9 +1303,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@tootallnate/once": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-2.0.0.tgz",
|
||||
"integrity": "sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==",
|
||||
"version": "3.0.1",
|
||||
"resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-3.0.1.tgz",
|
||||
"integrity": "sha512-VyMVKRrpHTT8PnotUeV8L/mDaMwD5DaAKCFLP73zAqAtvF0FCqky+Ki7BYbFCYQmqFyTe9316Ed5zS70QUR9eg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
@ -1907,11 +1884,14 @@
|
||||
}
|
||||
},
|
||||
"node_modules/balanced-match": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
|
||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
|
||||
"version": "4.0.4",
|
||||
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
|
||||
"integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": "18 || 20 || >=22"
|
||||
}
|
||||
},
|
||||
"node_modules/baseline-browser-mapping": {
|
||||
"version": "2.8.30",
|
||||
@ -1924,14 +1904,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/brace-expansion": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"version": "5.0.5",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
|
||||
"integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
"balanced-match": "^4.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": "18 || 20 || >=22"
|
||||
}
|
||||
},
|
||||
"node_modules/braces": {
|
||||
@ -2230,13 +2212,6 @@
|
||||
"integrity": "sha512-VRhuHOLoKYOy4UbilLbUzbYg93XLjv2PncJC50EuTWPA3gaja1UjBsUP/D/9/juV3vQFr6XBEzn9KCAHdUvOHw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/concat-map": {
|
||||
"version": "0.0.1",
|
||||
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
|
||||
"integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/convert-source-map": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
|
||||
@ -3015,9 +2990,9 @@
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/handlebars": {
|
||||
"version": "4.7.8",
|
||||
"resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.8.tgz",
|
||||
"integrity": "sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==",
|
||||
"version": "4.7.9",
|
||||
"resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.9.tgz",
|
||||
"integrity": "sha512-4E71E0rpOaQuJR2A3xDZ+GM1HyWYv1clR58tC8emQNeQe3RH7MAzSbat+V0wG78LQBo6m6bzSG/L4pBuCsgnUQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
@ -4920,16 +4895,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/minimatch": {
|
||||
"version": "10.1.1",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.1.1.tgz",
|
||||
"integrity": "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==",
|
||||
"version": "10.2.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz",
|
||||
"integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==",
|
||||
"dev": true,
|
||||
"license": "BlueOak-1.0.0",
|
||||
"dependencies": {
|
||||
"@isaacs/brace-expansion": "^5.0.0"
|
||||
"brace-expansion": "^5.0.5"
|
||||
},
|
||||
"engines": {
|
||||
"node": "20 || >=22"
|
||||
"node": "18 || 20 || >=22"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
@ -5246,9 +5221,9 @@
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/picomatch": {
|
||||
"version": "2.3.1",
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
|
||||
"integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
|
||||
"version": "2.3.2",
|
||||
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
|
||||
"integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
@ -6562,19 +6537,6 @@
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/test-exclude/node_modules/minimatch": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
|
||||
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
|
||||
"dev": true,
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^1.1.7"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/throttle-debounce": {
|
||||
"version": "5.0.2",
|
||||
"resolved": "https://registry.npmjs.org/throttle-debounce/-/throttle-debounce-5.0.2.tgz",
|
||||
|
||||
@ -25,10 +25,13 @@
|
||||
},
|
||||
"overrides": {
|
||||
"glob": "13.0.0",
|
||||
"minimatch": "10.1.1",
|
||||
"minimatch": "10.2.5",
|
||||
"picomatch": "2.3.2",
|
||||
"handlebars": "4.7.9",
|
||||
"@tootallnate/once": "3.0.1",
|
||||
"@isaacs/brace-expansion": "5.0.0",
|
||||
"@babel/traverse": "7.28.5",
|
||||
"ws": "8.18.3",
|
||||
"braces": "3.0.3"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -128,8 +128,8 @@ async def get_spend_info(session, entity_type: str, entity_id: str):
|
||||
|
||||
|
||||
async def get_proxy_readiness(session):
|
||||
"""Fetch /health/readiness. Used both as a fail-fast gate and as a diagnostic on poll timeout."""
|
||||
url = "http://0.0.0.0:4000/health/readiness"
|
||||
"""Fetch authenticated readiness details. Used both as a fail-fast gate and as a diagnostic on poll timeout."""
|
||||
url = "http://0.0.0.0:4000/health/readiness/details"
|
||||
headers = {"Authorization": "Bearer sk-1234"}
|
||||
async with session.get(url, headers=headers) as response:
|
||||
return response.status, await response.json()
|
||||
@ -140,7 +140,7 @@ async def assert_proxy_healthy(session):
|
||||
status, body = await get_proxy_readiness(session)
|
||||
if status != 200 or body.get("db") != "connected":
|
||||
pytest.fail(
|
||||
f"Proxy /health/readiness unhealthy (status={status}). "
|
||||
f"Proxy /health/readiness/details unhealthy (status={status}). "
|
||||
f"Cannot run spend accuracy test. Response: {body}"
|
||||
)
|
||||
print(f"Proxy readiness OK: {body}")
|
||||
|
||||
@ -73,13 +73,32 @@ async def test_health_readiness():
|
||||
response_json = await response.json()
|
||||
|
||||
print(response_json)
|
||||
assert "litellm_version" in response_json
|
||||
assert "status" in response_json
|
||||
|
||||
if status != 200:
|
||||
raise Exception(f"Request did not return a 200 status code: {status}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_health_readiness_details():
|
||||
"""
|
||||
Check if authenticated readiness diagnostics expose version metadata.
|
||||
"""
|
||||
async with aiohttp.ClientSession() as session:
|
||||
url = "http://0.0.0.0:4000/health/readiness/details"
|
||||
headers = {"Authorization": "Bearer sk-1234"}
|
||||
async with session.get(url, headers=headers) as response:
|
||||
status = response.status
|
||||
response_json = await response.json()
|
||||
|
||||
print(response_json)
|
||||
assert "status" in response_json
|
||||
assert "litellm_version" in response_json
|
||||
|
||||
if status != 200:
|
||||
raise Exception(f"Request did not return a 200 status code: {status}")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_health_liveliness():
|
||||
"""
|
||||
|
||||
125
tests/test_litellm/integrations/test_prompt_manager_ssti.py
Normal file
125
tests/test_litellm/integrations/test_prompt_manager_ssti.py
Normal file
@ -0,0 +1,125 @@
|
||||
"""SSTI regression coverage for non-dotprompt prompt managers.
|
||||
|
||||
DotpromptManager was hardened to render through
|
||||
``ImmutableSandboxedEnvironment``. The sibling managers (gitlab, arize,
|
||||
bitbucket) ship the exact same attacker-controlled-template surface —
|
||||
repository write access or workspace edit access turns into RCE on the
|
||||
proxy host if the renderer is unsandboxed. This suite locks in the sandbox
|
||||
so the regression can't recur.
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from jinja2.exceptions import SecurityError
|
||||
from jinja2.sandbox import ImmutableSandboxedEnvironment
|
||||
|
||||
from litellm.integrations.arize.arize_phoenix_prompt_manager import (
|
||||
ArizePhoenixTemplateManager,
|
||||
)
|
||||
from litellm.integrations.bitbucket.bitbucket_prompt_manager import (
|
||||
BitBucketTemplateManager,
|
||||
)
|
||||
from litellm.integrations.gitlab.gitlab_prompt_manager import GitLabTemplateManager
|
||||
|
||||
# Classic Jinja2 SSTI payloads. Any one of these rendering as anything other
|
||||
# than the literal string (or raising) means the sandbox isn't engaged.
|
||||
_SSTI_PAYLOADS = [
|
||||
"{{ ''.__class__.__mro__[1].__subclasses__() }}",
|
||||
"{{ config.__class__.__init__.__globals__['os'].popen('id').read() }}",
|
||||
"{{ cycler.__init__.__globals__.os.popen('id').read() }}",
|
||||
"{{ ().__class__.__bases__[0].__subclasses__() }}",
|
||||
]
|
||||
|
||||
|
||||
def _build_gitlab_manager() -> GitLabTemplateManager:
|
||||
# The constructor calls into a GitLab client when prompt_id is set; pass
|
||||
# None so __init__ stops at jinja_env construction and we can assert on it.
|
||||
return GitLabTemplateManager(
|
||||
gitlab_config={"project": "p", "access_token": "t", "branch": "main"},
|
||||
prompt_id=None,
|
||||
gitlab_client=MagicMock(),
|
||||
)
|
||||
|
||||
|
||||
def _build_bitbucket_manager(monkeypatch) -> BitBucketTemplateManager:
|
||||
# Stub the BitBucket client so we don't need network or real config.
|
||||
from litellm.integrations.bitbucket import bitbucket_prompt_manager
|
||||
|
||||
monkeypatch.setattr(
|
||||
bitbucket_prompt_manager, "BitBucketClient", lambda *a, **kw: MagicMock()
|
||||
)
|
||||
return BitBucketTemplateManager(
|
||||
bitbucket_config={"workspace": "w", "repository": "r", "access_token": "t"},
|
||||
prompt_id=None,
|
||||
)
|
||||
|
||||
|
||||
def _build_arize_manager(monkeypatch) -> ArizePhoenixTemplateManager:
|
||||
from litellm.integrations.arize import arize_phoenix_prompt_manager
|
||||
|
||||
monkeypatch.setattr(
|
||||
arize_phoenix_prompt_manager, "ArizePhoenixClient", lambda *a, **kw: MagicMock()
|
||||
)
|
||||
return ArizePhoenixTemplateManager(
|
||||
api_key="k",
|
||||
api_base="https://example.test",
|
||||
prompt_id=None,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"manager_factory",
|
||||
[
|
||||
("gitlab", lambda mp: _build_gitlab_manager()),
|
||||
("bitbucket", _build_bitbucket_manager),
|
||||
("arize", _build_arize_manager),
|
||||
],
|
||||
ids=lambda v: v[0] if isinstance(v, tuple) else v,
|
||||
)
|
||||
def test_jinja_env_is_sandboxed(manager_factory, monkeypatch):
|
||||
"""Each prompt manager must render via ``ImmutableSandboxedEnvironment``."""
|
||||
_, factory = manager_factory
|
||||
manager = factory(monkeypatch)
|
||||
assert isinstance(manager.jinja_env, ImmutableSandboxedEnvironment)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"manager_factory",
|
||||
[
|
||||
("gitlab", lambda mp: _build_gitlab_manager()),
|
||||
("bitbucket", _build_bitbucket_manager),
|
||||
("arize", _build_arize_manager),
|
||||
],
|
||||
ids=lambda v: v[0] if isinstance(v, tuple) else v,
|
||||
)
|
||||
@pytest.mark.parametrize("payload", _SSTI_PAYLOADS)
|
||||
def test_jinja_env_blocks_ssti_payloads(manager_factory, payload, monkeypatch):
|
||||
"""Attribute-traversal payloads must raise ``SecurityError`` at render time.
|
||||
|
||||
A plain ``Environment()`` would happily evaluate these and execute
|
||||
arbitrary Python on the proxy host.
|
||||
"""
|
||||
_, factory = manager_factory
|
||||
manager = factory(monkeypatch)
|
||||
template = manager.jinja_env.from_string(payload)
|
||||
with pytest.raises(SecurityError):
|
||||
template.render()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"manager_factory",
|
||||
[
|
||||
("gitlab", lambda mp: _build_gitlab_manager()),
|
||||
("bitbucket", _build_bitbucket_manager),
|
||||
("arize", _build_arize_manager),
|
||||
],
|
||||
ids=lambda v: v[0] if isinstance(v, tuple) else v,
|
||||
)
|
||||
def test_jinja_env_still_renders_normal_variables(manager_factory, monkeypatch):
|
||||
"""The sandbox is a strict superset for the legitimate use case — plain
|
||||
``{{ var }}`` substitution must keep working unchanged."""
|
||||
_, factory = manager_factory
|
||||
manager = factory(monkeypatch)
|
||||
template = manager.jinja_env.from_string("Hello {{ name }}!")
|
||||
assert template.render(name="world") == "Hello world!"
|
||||
@ -0,0 +1,37 @@
|
||||
from tokenizers import AddedToken, Tokenizer
|
||||
from tokenizers.models import WordLevel
|
||||
from tokenizers.pre_tokenizers import Whitespace
|
||||
from tokenizers.processors import TemplateProcessing
|
||||
|
||||
from litellm import decode, encode
|
||||
|
||||
|
||||
def _create_custom_tokenizer():
|
||||
tokenizer = Tokenizer(
|
||||
WordLevel({"[UNK]": 0, "Hello": 1, "World": 2}, unk_token="[UNK]")
|
||||
)
|
||||
tokenizer.pre_tokenizer = Whitespace()
|
||||
tokenizer.add_special_tokens([AddedToken("[BOS]", special=True)])
|
||||
bos_token_id = tokenizer.token_to_id("[BOS]")
|
||||
assert bos_token_id is not None
|
||||
tokenizer.post_processor = TemplateProcessing(
|
||||
single="[BOS] $A",
|
||||
special_tokens=[("[BOS]", bos_token_id)],
|
||||
)
|
||||
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
|
||||
|
||||
|
||||
def test_decode_can_preserve_huggingface_special_tokens():
|
||||
custom_tokenizer = _create_custom_tokenizer()
|
||||
sample_text = "Hello World"
|
||||
tokens = encode(text=sample_text, custom_tokenizer=custom_tokenizer)
|
||||
|
||||
decoded_text = decode(tokens=tokens, custom_tokenizer=custom_tokenizer)
|
||||
decoded_text_with_special_tokens = decode(
|
||||
tokens=tokens,
|
||||
custom_tokenizer=custom_tokenizer,
|
||||
skip_special_tokens=False,
|
||||
)
|
||||
|
||||
assert decoded_text == sample_text
|
||||
assert decoded_text_with_special_tokens == "[BOS] Hello World"
|
||||
@ -1493,6 +1493,7 @@ def test_observability_ban_covers_canonical_supported_callback_params():
|
||||
safe is an explicit decision recorded in
|
||||
``_SAFE_CLIENT_CALLBACK_PARAMS``."""
|
||||
from litellm.litellm_core_utils.initialize_dynamic_callback_params import (
|
||||
_request_blocked_callback_params,
|
||||
_supported_callback_params,
|
||||
)
|
||||
from litellm.proxy.auth.auth_utils import (
|
||||
@ -1508,3 +1509,8 @@ def test_observability_ban_covers_canonical_supported_callback_params():
|
||||
f"informational per-request field; otherwise the derivation will "
|
||||
f"ban it automatically."
|
||||
)
|
||||
for param in _request_blocked_callback_params:
|
||||
assert param in banned, (
|
||||
f"{param} is in _request_blocked_callback_params but is not banned "
|
||||
"at the proxy request-body boundary."
|
||||
)
|
||||
|
||||
@ -10,9 +10,11 @@ sys.path.insert(
|
||||
|
||||
import pytest
|
||||
|
||||
import litellm
|
||||
import litellm.proxy.proxy_server
|
||||
from litellm.caching.dual_cache import DualCache
|
||||
from litellm.proxy._types import (
|
||||
LiteLLMRoutes,
|
||||
LiteLLM_JWTAuth,
|
||||
LiteLLM_BudgetTable,
|
||||
LiteLLM_EndUserTable,
|
||||
@ -27,6 +29,7 @@ from litellm.proxy.auth.handle_jwt import JWTHandler
|
||||
from litellm.proxy.auth.auth_checks import get_key_object, _cache_key_object
|
||||
from litellm.proxy.auth.route_checks import RouteChecks
|
||||
from litellm.proxy.auth.user_api_key_auth import (
|
||||
_route_requires_auth_despite_public,
|
||||
_reserve_budget_after_common_checks,
|
||||
_run_centralized_common_checks,
|
||||
_run_post_custom_auth_checks,
|
||||
@ -59,6 +62,29 @@ def test_get_api_key():
|
||||
) == (api_key, passed_in_key)
|
||||
|
||||
|
||||
def test_route_requires_auth_despite_public_for_metrics(monkeypatch):
|
||||
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
|
||||
|
||||
assert _route_requires_auth_despite_public("/metrics", {}) is True
|
||||
assert _route_requires_auth_despite_public("/metrics/", {}) is True
|
||||
|
||||
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", False)
|
||||
|
||||
assert _route_requires_auth_despite_public("/metrics", {}) is False
|
||||
|
||||
|
||||
def test_public_ai_hub_routes_remain_public():
|
||||
for route in (
|
||||
"/public/model_hub",
|
||||
"/public/model_hub/info",
|
||||
"/public/agent_hub",
|
||||
"/public/mcp_hub",
|
||||
"/public/skill_hub",
|
||||
):
|
||||
assert route in LiteLLMRoutes.public_routes.value
|
||||
assert _route_requires_auth_despite_public(route, {}) is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_should_clear_stale_budget_reservation_when_budget_checks_skip():
|
||||
user_api_key_auth_obj = UserAPIKeyAuth(
|
||||
@ -2352,18 +2378,18 @@ async def test_centralized_common_checks_short_circuits_when_master_key_unset():
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_centralized_common_checks_skips_public_routes():
|
||||
"""Regression: public routes (e.g. /health/readiness) are exempted
|
||||
"""Regression: public routes (e.g. /health/liveness) are exempted
|
||||
by the builder fast-path. The wrapper must not retroactively run
|
||||
common_checks on top — the synthetic INTERNAL_USER_VIEW_ONLY token
|
||||
has no user_id, so common_checks would reject the request as
|
||||
admin-only. Breaks k8s readiness probes when master_key is set."""
|
||||
admin-only."""
|
||||
import litellm.proxy.proxy_server as _proxy_server_mod
|
||||
from fastapi import Request
|
||||
from starlette.datastructures import URL
|
||||
|
||||
token = UserAPIKeyAuth(user_role=LitellmUserRoles.INTERNAL_USER_VIEW_ONLY)
|
||||
request = Request(scope={"type": "http"})
|
||||
request._url = URL(url="/health/readiness")
|
||||
request._url = URL(url="/health/liveness")
|
||||
|
||||
attrs = _proxy_attrs_for_centralized_checks(user_custom_auth=None)
|
||||
originals = {a: getattr(_proxy_server_mod, a, None) for a in attrs}
|
||||
@ -2378,7 +2404,7 @@ async def test_centralized_common_checks_skips_public_routes():
|
||||
user_api_key_auth_obj=token,
|
||||
request=request,
|
||||
request_data={},
|
||||
route="/health/readiness",
|
||||
route="/health/liveness",
|
||||
)
|
||||
mock_checks.assert_not_awaited()
|
||||
finally:
|
||||
|
||||
@ -232,6 +232,52 @@ async def test_azure_text_moderation_guardrail_post_call_success_hook():
|
||||
assert mock_async_make_request.call_args.kwargs["text"] == "Hello world"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_azure_text_moderation_guardrail_post_call_checks_all_choices():
|
||||
azure_text_moderation_guardrail = AzureContentSafetyTextModerationGuardrail(
|
||||
guardrail_name="azure_text_moderation",
|
||||
api_key="azure_text_moderation_api_key",
|
||||
api_base="azure_text_moderation_api_base",
|
||||
)
|
||||
with patch.object(
|
||||
azure_text_moderation_guardrail, "async_make_request"
|
||||
) as mock_async_make_request:
|
||||
mock_async_make_request.side_effect = [
|
||||
{
|
||||
"blocklistsMatch": [],
|
||||
"categoriesAnalysis": [{"category": "Hate", "severity": 0}],
|
||||
},
|
||||
HTTPException(
|
||||
status_code=400,
|
||||
detail={"error": "blocked second choice"},
|
||||
),
|
||||
]
|
||||
|
||||
with pytest.raises(HTTPException):
|
||||
await azure_text_moderation_guardrail.async_post_call_success_hook(
|
||||
data={},
|
||||
user_api_key_dict=UserAPIKeyAuth(
|
||||
api_key="azure_text_moderation_api_key"
|
||||
),
|
||||
response=ModelResponse(
|
||||
choices=[
|
||||
Choices(
|
||||
index=0,
|
||||
message=Message(content="safe response"),
|
||||
),
|
||||
Choices(
|
||||
index=1,
|
||||
message=Message(content="unsafe response"),
|
||||
),
|
||||
]
|
||||
),
|
||||
)
|
||||
|
||||
assert [
|
||||
call.kwargs["text"] for call in mock_async_make_request.call_args_list
|
||||
] == ["safe response", "unsafe response"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_azure_text_moderation_guardrail_post_call_streaming_hook():
|
||||
|
||||
|
||||
@ -453,6 +453,71 @@ class TestContentFilterGuardrail:
|
||||
assert "[EMAIL_REDACTED]" in full_content
|
||||
assert "Contact me at [EMAIL_REDACTED] for info" in full_content
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_hook_mask_checks_all_choices(self):
|
||||
from litellm.types.utils import Delta, ModelResponseStream, StreamingChoices
|
||||
|
||||
patterns = [
|
||||
ContentFilterPattern(
|
||||
pattern_type="prebuilt",
|
||||
pattern_name="email",
|
||||
action=ContentFilterAction.MASK,
|
||||
),
|
||||
]
|
||||
|
||||
guardrail = ContentFilterGuardrail(
|
||||
guardrail_name="test-streaming-mask-all-choices",
|
||||
patterns=patterns,
|
||||
event_hook=GuardrailEventHooks.during_call,
|
||||
)
|
||||
|
||||
async def mock_stream():
|
||||
yield ModelResponseStream(
|
||||
id="chunk1",
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
delta=Delta(content="Contact first@ex"),
|
||||
index=0,
|
||||
),
|
||||
StreamingChoices(
|
||||
delta=Delta(content="Email second@ex"),
|
||||
index=1,
|
||||
),
|
||||
],
|
||||
model="gpt-4",
|
||||
)
|
||||
yield ModelResponseStream(
|
||||
id="chunk2",
|
||||
choices=[
|
||||
StreamingChoices(
|
||||
delta=Delta(content="ample.com for help"),
|
||||
index=0,
|
||||
finish_reason="stop",
|
||||
),
|
||||
StreamingChoices(
|
||||
delta=Delta(content="ample.com for support"),
|
||||
index=1,
|
||||
finish_reason="stop",
|
||||
),
|
||||
],
|
||||
model="gpt-4",
|
||||
)
|
||||
|
||||
content_by_choice = {0: "", 1: ""}
|
||||
async for chunk in guardrail.async_post_call_streaming_iterator_hook(
|
||||
user_api_key_dict=MagicMock(),
|
||||
response=mock_stream(),
|
||||
request_data={},
|
||||
):
|
||||
for choice in chunk.choices:
|
||||
if choice.delta.content:
|
||||
content_by_choice[choice.index] += choice.delta.content
|
||||
|
||||
assert "first@example.com" not in content_by_choice[0]
|
||||
assert "second@example.com" not in content_by_choice[1]
|
||||
assert content_by_choice[0] == "Contact [EMAIL_REDACTED] for help"
|
||||
assert content_by_choice[1] == "Email [EMAIL_REDACTED] for support"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_streaming_hook_block(self):
|
||||
"""
|
||||
|
||||
@ -6,7 +6,6 @@ branch coverage. Network calls are always mocked; the companion live
|
||||
suite lives in ``test_xecguard_live.py``.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
@ -1196,6 +1195,26 @@ class TestXecGuardMessageAssembly:
|
||||
is None
|
||||
)
|
||||
|
||||
def test_extract_assistant_text_combines_all_choices(self, xecguard_guardrail):
|
||||
assert (
|
||||
xecguard_guardrail._extract_assistant_text_from_response(
|
||||
{
|
||||
"choices": [
|
||||
{"message": {"content": "first response"}},
|
||||
{
|
||||
"message": {
|
||||
"content": [
|
||||
{"type": "text", "text": "second"},
|
||||
{"type": "text", "text": "response"},
|
||||
]
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
||||
)
|
||||
== "first response\nsecond\nresponse"
|
||||
)
|
||||
|
||||
def test_synthesize_user_inputs_not_dict(self, xecguard_guardrail):
|
||||
assert xecguard_guardrail._synthesize_user_from_inputs("not-dict") is None
|
||||
|
||||
|
||||
303
tests/test_litellm/proxy/guardrails/test_content_utils.py
Normal file
303
tests/test_litellm/proxy/guardrails/test_content_utils.py
Normal file
@ -0,0 +1,303 @@
|
||||
"""Tests for the shared guardrail content extraction helpers."""
|
||||
|
||||
from litellm.proxy.guardrails._content_utils import (
|
||||
apply_redacted_messages_back,
|
||||
build_inspection_messages,
|
||||
has_non_string_content,
|
||||
iter_message_text,
|
||||
walk_user_text,
|
||||
)
|
||||
|
||||
|
||||
# ── iter_message_text ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_iter_message_text_string_messages():
|
||||
data = {
|
||||
"messages": [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi"},
|
||||
]
|
||||
}
|
||||
assert list(iter_message_text(data)) == ["hello", "hi"]
|
||||
|
||||
|
||||
def test_iter_message_text_multimodal_list_content():
|
||||
"""VERIA-11: list-format content must be inspected, not silently skipped."""
|
||||
data = {
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "AWS_KEY=AKIA..."},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
{"type": "text", "text": "more secrets"},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
assert list(iter_message_text(data)) == ["AWS_KEY=AKIA...", "more secrets"]
|
||||
|
||||
|
||||
def test_iter_message_text_responses_api_string_input():
|
||||
"""fniVO9-F: Responses-API ``input`` must be inspectable when ``messages`` absent."""
|
||||
data = {"input": "tell me a secret"}
|
||||
assert list(iter_message_text(data)) == ["tell me a secret"]
|
||||
|
||||
|
||||
def test_iter_message_text_responses_api_list_input_messages():
|
||||
data = {
|
||||
"input": [
|
||||
{"role": "user", "content": "first"},
|
||||
{"role": "user", "content": "second"},
|
||||
]
|
||||
}
|
||||
assert list(iter_message_text(data)) == ["first", "second"]
|
||||
|
||||
|
||||
def test_iter_message_text_responses_api_list_input_content_parts():
|
||||
data = {
|
||||
"input": [
|
||||
{"type": "text", "text": "alpha"},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
{"type": "text", "text": "beta"},
|
||||
]
|
||||
}
|
||||
assert list(iter_message_text(data)) == ["alpha", "beta"]
|
||||
|
||||
|
||||
def test_iter_message_text_responses_api_list_input_mixed_dicts_and_strings():
|
||||
"""Greptile P2: mixed-list ``input`` with content-part dicts AND bare
|
||||
strings must yield every text fragment — read helpers used to truncate
|
||||
the bare strings."""
|
||||
data = {
|
||||
"input": [
|
||||
{"type": "text", "text": "from-dict"},
|
||||
"from-bare-string",
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
"another-bare-string",
|
||||
]
|
||||
}
|
||||
assert list(iter_message_text(data)) == [
|
||||
"from-dict",
|
||||
"from-bare-string",
|
||||
"another-bare-string",
|
||||
]
|
||||
|
||||
|
||||
def test_iter_message_text_walks_messages_and_input_independently():
|
||||
"""When both are present (rare), every fragment from either field is
|
||||
inspected — a stricter guarantee than "first one wins"."""
|
||||
data = {
|
||||
"messages": [{"role": "user", "content": "msg-content"}],
|
||||
"input": "input-content",
|
||||
}
|
||||
assert list(iter_message_text(data)) == ["msg-content", "input-content"]
|
||||
|
||||
|
||||
def test_iter_message_text_empty_data():
|
||||
assert list(iter_message_text({})) == []
|
||||
assert list(iter_message_text({"messages": []})) == []
|
||||
assert list(iter_message_text({"input": ""})) == []
|
||||
|
||||
|
||||
# ── walk_user_text ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_walk_user_text_redacts_string_messages_in_place():
|
||||
data = {
|
||||
"messages": [
|
||||
{"role": "user", "content": "leak: AKIAEXAMPLE"},
|
||||
{"role": "assistant", "content": "ok"},
|
||||
]
|
||||
}
|
||||
visited = walk_user_text(data, lambda s: s.replace("AKIAEXAMPLE", "[REDACTED]"))
|
||||
assert visited == 2
|
||||
assert data["messages"][0]["content"] == "leak: [REDACTED]"
|
||||
assert data["messages"][1]["content"] == "ok"
|
||||
|
||||
|
||||
def test_walk_user_text_redacts_multimodal_text_parts():
|
||||
"""VERIA-11: list-content text parts must be mutable for in-place redaction."""
|
||||
data = {
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "AKIAEXAMPLE here"},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
{"type": "text", "text": "no secret"},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
visited = walk_user_text(data, lambda s: s.replace("AKIAEXAMPLE", "[REDACTED]"))
|
||||
assert visited == 2
|
||||
parts = data["messages"][0]["content"]
|
||||
assert parts[0] == {"type": "text", "text": "[REDACTED] here"}
|
||||
# Non-text part must be left untouched.
|
||||
assert parts[1] == {"type": "image_url", "image_url": {"url": "..."}}
|
||||
assert parts[2] == {"type": "text", "text": "no secret"}
|
||||
|
||||
|
||||
def test_walk_user_text_redacts_responses_api_string_input():
|
||||
data = {"input": "leak AKIAEXAMPLE"}
|
||||
visited = walk_user_text(data, lambda s: s.replace("AKIAEXAMPLE", "[REDACTED]"))
|
||||
assert visited == 1
|
||||
assert data["input"] == "leak [REDACTED]"
|
||||
|
||||
|
||||
def test_walk_user_text_redacts_responses_api_list_input():
|
||||
data = {
|
||||
"input": [
|
||||
{"type": "text", "text": "AKIAEXAMPLE"},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
]
|
||||
}
|
||||
visited = walk_user_text(data, lambda s: f"[redacted]{s}[/]")
|
||||
assert visited == 1
|
||||
assert data["input"][0] == {"type": "text", "text": "[redacted]AKIAEXAMPLE[/]"}
|
||||
assert data["input"][1] == {"type": "image_url", "image_url": {"url": "..."}}
|
||||
|
||||
|
||||
def test_walk_user_text_redacts_mixed_list_input():
|
||||
"""Read and write helpers must agree on coverage — bare strings inside
|
||||
a mixed ``input`` list are inspected by both."""
|
||||
data = {
|
||||
"input": [
|
||||
{"type": "text", "text": "secret-one"},
|
||||
"secret-two",
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
]
|
||||
}
|
||||
visited = walk_user_text(data, lambda s: f"<{s}>")
|
||||
assert visited == 2
|
||||
assert data["input"][0] == {"type": "text", "text": "<secret-one>"}
|
||||
assert data["input"][1] == "<secret-two>"
|
||||
assert data["input"][2] == {"type": "image_url", "image_url": {"url": "..."}}
|
||||
|
||||
|
||||
# ── build_inspection_messages ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_build_inspection_messages_chat_completion_passthrough():
|
||||
data = {
|
||||
"messages": [
|
||||
{"role": "system", "content": "be helpful"},
|
||||
{"role": "user", "content": "hi"},
|
||||
]
|
||||
}
|
||||
assert build_inspection_messages(data) == [
|
||||
{"role": "system", "content": "be helpful"},
|
||||
{"role": "user", "content": "hi"},
|
||||
]
|
||||
|
||||
|
||||
def test_build_inspection_messages_joins_multimodal_text_parts():
|
||||
data = {
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "first part"},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
{"type": "text", "text": "second part"},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
assert build_inspection_messages(data) == [
|
||||
{"role": "user", "content": "first part\nsecond part"}
|
||||
]
|
||||
|
||||
|
||||
def test_build_inspection_messages_lifts_responses_api_input():
|
||||
"""fniVO9-F: ``input`` must be visible to hooks that POST messages to a remote API."""
|
||||
data = {"input": "responses-api content"}
|
||||
assert build_inspection_messages(data) == [
|
||||
{"role": "user", "content": "responses-api content"}
|
||||
]
|
||||
|
||||
|
||||
def test_build_inspection_messages_drops_messages_with_no_text():
|
||||
data = {
|
||||
"messages": [
|
||||
{"role": "user", "content": ""},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [{"type": "image_url", "image_url": {"url": "..."}}],
|
||||
},
|
||||
{"role": "user", "content": "kept"},
|
||||
]
|
||||
}
|
||||
assert build_inspection_messages(data) == [{"role": "user", "content": "kept"}]
|
||||
|
||||
|
||||
def test_build_inspection_messages_empty_data():
|
||||
assert build_inspection_messages({}) == []
|
||||
assert build_inspection_messages({"messages": []}) == []
|
||||
assert build_inspection_messages({"input": ""}) == []
|
||||
|
||||
|
||||
# ── has_non_string_content ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_has_non_string_content_string_messages():
|
||||
data = {"messages": [{"role": "user", "content": "hello"}]}
|
||||
assert has_non_string_content(data) is False
|
||||
|
||||
|
||||
def test_has_non_string_content_multimodal_messages():
|
||||
data = {"messages": [{"role": "user", "content": [{"type": "text", "text": "hi"}]}]}
|
||||
assert has_non_string_content(data) is True
|
||||
|
||||
|
||||
def test_has_non_string_content_responses_api_string_input():
|
||||
assert has_non_string_content({"input": "plain string"}) is False
|
||||
|
||||
|
||||
def test_has_non_string_content_responses_api_list_input():
|
||||
assert has_non_string_content({"input": ["a", "b"]}) is True
|
||||
|
||||
|
||||
def test_has_non_string_content_empty_data():
|
||||
assert has_non_string_content({}) is False
|
||||
assert has_non_string_content({"messages": []}) is False
|
||||
assert has_non_string_content({"input": ""}) is False
|
||||
|
||||
|
||||
# ── apply_redacted_messages_back ──────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_apply_redacted_messages_back_chat_completion():
|
||||
data = {"messages": [{"role": "user", "content": "secret"}]}
|
||||
apply_redacted_messages_back(data, [{"role": "user", "content": "[REDACTED]"}])
|
||||
assert data["messages"] == [{"role": "user", "content": "[REDACTED]"}]
|
||||
assert "input" not in data
|
||||
|
||||
|
||||
def test_apply_redacted_messages_back_responses_api_string_input():
|
||||
"""A Responses-API request reads ``data["input"]``; writing only to
|
||||
``messages`` would let unredacted text reach the LLM."""
|
||||
data = {"input": "secret payload"}
|
||||
apply_redacted_messages_back(data, [{"role": "user", "content": "[REDACTED]"}])
|
||||
assert data["input"] == "[REDACTED]"
|
||||
|
||||
|
||||
def test_apply_redacted_messages_back_both_fields():
|
||||
"""Defensive: when both fields are present, both are updated."""
|
||||
data = {
|
||||
"messages": [{"role": "user", "content": "old"}],
|
||||
"input": "old",
|
||||
}
|
||||
apply_redacted_messages_back(data, [{"role": "user", "content": "[REDACTED]"}])
|
||||
assert data["messages"] == [{"role": "user", "content": "[REDACTED]"}]
|
||||
assert data["input"] == "[REDACTED]"
|
||||
|
||||
|
||||
def test_apply_redacted_messages_back_skips_input_when_not_string():
|
||||
"""List ``input`` (multimodal Responses-API) is left alone — the
|
||||
multimodal-degrades-to-block guard runs upstream."""
|
||||
data = {"input": [{"type": "text", "text": "leak"}]}
|
||||
apply_redacted_messages_back(data, [{"role": "user", "content": "[REDACTED]"}])
|
||||
assert data["input"] == [{"type": "text", "text": "leak"}]
|
||||
811
tests/test_litellm/proxy/guardrails/test_guardrail_coverage.py
Normal file
811
tests/test_litellm/proxy/guardrails/test_guardrail_coverage.py
Normal file
@ -0,0 +1,811 @@
|
||||
"""
|
||||
Regression tests for guardrail-coverage gaps.
|
||||
|
||||
Each test confirms that a previously-bypassable input shape now triggers
|
||||
inspection by the relevant guardrail hook:
|
||||
|
||||
- VERIA-11: multimodal list-format ``content`` is inspected (no longer
|
||||
silently skipped because of an ``isinstance(content, str)`` check).
|
||||
- fniVO9-F: Responses-API ``data["input"]`` is inspected (no longer
|
||||
silently skipped because the hook only looked at ``data["messages"]``).
|
||||
- yVS0wMDO: Aim's post-call hook inspects every choice when ``n>1``,
|
||||
not just ``choices[0]``.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from httpx import Request, Response
|
||||
|
||||
from litellm import DualCache
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.types.utils import Choices, Message, ModelResponse
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def user_api_key():
|
||||
return UserAPIKeyAuth(api_key="hashed", user_id="u", key_alias=None)
|
||||
|
||||
|
||||
# ── Aim ───────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _aim_no_action_response() -> Response:
|
||||
return Response(
|
||||
status_code=200,
|
||||
json={"required_action": None},
|
||||
request=Request("POST", "https://api.aim.security/fw/v1/analyze"),
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_aim_inspects_multimodal_list_content(user_api_key, monkeypatch):
|
||||
monkeypatch.setenv("AIM_API_KEY", "hs-aim-key")
|
||||
from litellm.proxy.guardrails.guardrail_hooks.aim.aim import AimGuardrail
|
||||
|
||||
guard = AimGuardrail()
|
||||
sent_payload: Dict[str, Any] = {}
|
||||
|
||||
async def capture(url, headers, json):
|
||||
sent_payload.update(json)
|
||||
return _aim_no_action_response()
|
||||
|
||||
with patch.object(guard.async_handler, "post", side_effect=capture):
|
||||
await guard.async_pre_call_hook(
|
||||
user_api_key_dict=user_api_key,
|
||||
cache=DualCache(),
|
||||
data={
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "secret payload"},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
],
|
||||
}
|
||||
]
|
||||
},
|
||||
call_type="acompletion",
|
||||
)
|
||||
|
||||
# The multimodal text part must be visible to Aim.
|
||||
assert sent_payload["messages"] == [{"role": "user", "content": "secret payload"}]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_aim_inspects_responses_api_input(user_api_key, monkeypatch):
|
||||
monkeypatch.setenv("AIM_API_KEY", "hs-aim-key")
|
||||
from litellm.proxy.guardrails.guardrail_hooks.aim.aim import AimGuardrail
|
||||
|
||||
guard = AimGuardrail()
|
||||
sent_payload: Dict[str, Any] = {}
|
||||
|
||||
async def capture(url, headers, json):
|
||||
sent_payload.update(json)
|
||||
return _aim_no_action_response()
|
||||
|
||||
with patch.object(guard.async_handler, "post", side_effect=capture):
|
||||
await guard.async_pre_call_hook(
|
||||
user_api_key_dict=user_api_key,
|
||||
cache=DualCache(),
|
||||
data={"input": "responses-api content"},
|
||||
call_type="acompletion",
|
||||
)
|
||||
|
||||
assert sent_payload["messages"] == [
|
||||
{"role": "user", "content": "responses-api content"}
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_aim_post_call_inspects_all_choices(user_api_key, monkeypatch):
|
||||
"""yVS0wMDO: ``n>1`` no longer bypasses Aim by hiding violations in
|
||||
``choices[1+]``."""
|
||||
monkeypatch.setenv("AIM_API_KEY", "hs-aim-key")
|
||||
from litellm.proxy.guardrails.guardrail_hooks.aim.aim import AimGuardrail
|
||||
|
||||
guard = AimGuardrail()
|
||||
inspected_outputs = []
|
||||
|
||||
async def capture(request_data, output, hook, key_alias):
|
||||
inspected_outputs.append(output)
|
||||
return {"redacted_output": output}
|
||||
|
||||
response = ModelResponse(
|
||||
choices=[
|
||||
Choices(index=0, message=Message(role="assistant", content="first")),
|
||||
Choices(index=1, message=Message(role="assistant", content="second")),
|
||||
Choices(index=2, message=Message(role="assistant", content="third")),
|
||||
]
|
||||
)
|
||||
|
||||
with patch.object(guard, "call_aim_guardrail_on_output", side_effect=capture):
|
||||
await guard.async_post_call_success_hook(
|
||||
data={"messages": [{"role": "user", "content": "hi"}]},
|
||||
user_api_key_dict=user_api_key,
|
||||
response=response,
|
||||
)
|
||||
|
||||
# ``asyncio.gather`` is used for parallelism, so order of inspection is
|
||||
# not guaranteed.
|
||||
assert sorted(inspected_outputs) == ["first", "second", "third"]
|
||||
|
||||
|
||||
# ── Lakera v2 ─────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lakera_v2_inspects_responses_api_input(user_api_key, monkeypatch):
|
||||
monkeypatch.setenv("LAKERA_API_KEY", "lk-test")
|
||||
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai_v2 import (
|
||||
LakeraAIGuardrail,
|
||||
)
|
||||
|
||||
guard = LakeraAIGuardrail(api_key="lk-test", on_flagged="monitor")
|
||||
|
||||
seen_messages = []
|
||||
|
||||
async def fake_call_v2_guard(messages, request_data, event_type):
|
||||
seen_messages.append(messages)
|
||||
return {"flagged": False}, {}
|
||||
|
||||
with patch.object(guard, "call_v2_guard", side_effect=fake_call_v2_guard):
|
||||
await guard.async_pre_call_hook(
|
||||
user_api_key_dict=user_api_key,
|
||||
cache=DualCache(),
|
||||
data={"input": "responses-api content"},
|
||||
call_type="responses",
|
||||
)
|
||||
|
||||
assert seen_messages == [[{"role": "user", "content": "responses-api content"}]]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lakera_v2_responses_api_input_redacted_writeback(
|
||||
user_api_key, monkeypatch
|
||||
):
|
||||
"""Greptile P1: when input arrives via Responses-API ``data["input"]``
|
||||
(string) and Lakera flags PII, the redacted content must be written
|
||||
back to ``data["input"]`` — the Responses-API backend reads from
|
||||
``input``, so writing only to ``messages`` would let unredacted PII
|
||||
reach the LLM."""
|
||||
monkeypatch.setenv("LAKERA_API_KEY", "lk-test")
|
||||
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai_v2 import (
|
||||
LakeraAIGuardrail,
|
||||
)
|
||||
|
||||
guard = LakeraAIGuardrail(api_key="lk-test", on_flagged="block")
|
||||
|
||||
async def fake_call_v2_guard(messages, request_data, event_type):
|
||||
return ({"flagged": True, "payload": []}, {"EMAIL": 1})
|
||||
|
||||
def fake_mask(messages, lakera_response, masked_entity_count):
|
||||
return [{"role": "user", "content": "[REDACTED EMAIL]"}]
|
||||
|
||||
with (
|
||||
patch.object(guard, "call_v2_guard", side_effect=fake_call_v2_guard),
|
||||
patch.object(guard, "_is_only_pii_violation", return_value=True),
|
||||
patch.object(guard, "_mask_pii_in_messages", side_effect=fake_mask),
|
||||
):
|
||||
data = {"input": "user@example.com leaked"}
|
||||
await guard.async_pre_call_hook(
|
||||
user_api_key_dict=user_api_key,
|
||||
cache=DualCache(),
|
||||
data=data,
|
||||
call_type="responses",
|
||||
)
|
||||
|
||||
assert data["input"] == "[REDACTED EMAIL]"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_aim_responses_api_input_anonymize_writeback(user_api_key, monkeypatch):
|
||||
"""Greptile P1: Aim's anonymize action must redact ``data["input"]``
|
||||
for Responses-API requests, not just ``data["messages"]``."""
|
||||
monkeypatch.setenv("AIM_API_KEY", "hs-aim-key")
|
||||
from litellm.proxy.guardrails.guardrail_hooks.aim.aim import AimGuardrail
|
||||
|
||||
guard = AimGuardrail()
|
||||
|
||||
aim_response_body = {
|
||||
"required_action": {"action_type": "anonymize_action"},
|
||||
"redacted_chat": {
|
||||
"all_redacted_messages": [
|
||||
{"role": "user", "content": "[REDACTED] anonymised"}
|
||||
]
|
||||
},
|
||||
}
|
||||
|
||||
async def capture(url, headers, json):
|
||||
return Response(
|
||||
status_code=200,
|
||||
json=aim_response_body,
|
||||
request=Request("POST", "https://api.aim.security/fw/v1/analyze"),
|
||||
)
|
||||
|
||||
with patch.object(guard.async_handler, "post", side_effect=capture):
|
||||
data = {"input": "user@example.com leaked"}
|
||||
await guard.async_pre_call_hook(
|
||||
user_api_key_dict=user_api_key,
|
||||
cache=DualCache(),
|
||||
data=data,
|
||||
call_type="responses",
|
||||
)
|
||||
|
||||
assert data["input"] == "[REDACTED] anonymised"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lakera_v2_multimodal_pii_degrades_to_block(user_api_key, monkeypatch):
|
||||
"""Mask-in-place uses Lakera offsets and cannot preserve image/audio
|
||||
parts of multimodal input. When PII is detected on a multimodal
|
||||
request, the hook must raise the block exception instead of silently
|
||||
flattening ``data["messages"]`` to text-only."""
|
||||
monkeypatch.setenv("LAKERA_API_KEY", "lk-test")
|
||||
from fastapi import HTTPException
|
||||
|
||||
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai_v2 import (
|
||||
LakeraAIGuardrail,
|
||||
)
|
||||
|
||||
guard = LakeraAIGuardrail(api_key="lk-test", on_flagged="block")
|
||||
|
||||
async def fake_call_v2_guard(messages, request_data, event_type):
|
||||
return (
|
||||
{
|
||||
"flagged": True,
|
||||
"payload": [{"detector_type": "pii/email", "start": 0, "end": 5}],
|
||||
},
|
||||
{"EMAIL": 1},
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(guard, "call_v2_guard", side_effect=fake_call_v2_guard),
|
||||
patch.object(guard, "_is_only_pii_violation", return_value=True),
|
||||
patch.object(
|
||||
guard,
|
||||
"_get_http_exception_for_blocked_guardrail",
|
||||
return_value=HTTPException(status_code=400, detail="blocked"),
|
||||
),
|
||||
):
|
||||
with pytest.raises(HTTPException):
|
||||
await guard.async_pre_call_hook(
|
||||
user_api_key_dict=user_api_key,
|
||||
cache=DualCache(),
|
||||
data={
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "leak"},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
],
|
||||
}
|
||||
]
|
||||
},
|
||||
call_type="acompletion",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lakera_v2_inspects_multimodal_list_content(user_api_key, monkeypatch):
|
||||
monkeypatch.setenv("LAKERA_API_KEY", "lk-test")
|
||||
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai_v2 import (
|
||||
LakeraAIGuardrail,
|
||||
)
|
||||
|
||||
guard = LakeraAIGuardrail(api_key="lk-test", on_flagged="monitor")
|
||||
seen_messages = []
|
||||
|
||||
async def fake_call_v2_guard(messages, request_data, event_type):
|
||||
seen_messages.append(messages)
|
||||
return {"flagged": False}, {}
|
||||
|
||||
with patch.object(guard, "call_v2_guard", side_effect=fake_call_v2_guard):
|
||||
await guard.async_pre_call_hook(
|
||||
user_api_key_dict=user_api_key,
|
||||
cache=DualCache(),
|
||||
data={
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "AKIAEXAMPLE"},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
],
|
||||
}
|
||||
]
|
||||
},
|
||||
call_type="acompletion",
|
||||
)
|
||||
|
||||
assert seen_messages == [[{"role": "user", "content": "AKIAEXAMPLE"}]]
|
||||
|
||||
|
||||
# ── Lasso ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lasso_multimodal_falls_back_to_classify(user_api_key, monkeypatch):
|
||||
"""Lasso's classifix (mask) endpoint returns text that overwrites
|
||||
``data["messages"]``. For multimodal input that would silently strip
|
||||
image parts — the hook must use the classify endpoint instead and
|
||||
leave the original payload intact."""
|
||||
monkeypatch.setenv("LASSO_API_KEY", "ls-test")
|
||||
from litellm.proxy.guardrails.guardrail_hooks.lasso.lasso import LassoGuardrail
|
||||
|
||||
guard = LassoGuardrail(lasso_api_key="ls-test", mask=True)
|
||||
|
||||
masking_called = False
|
||||
classify_called = False
|
||||
|
||||
async def fake_masking(data, cache, message_type, messages):
|
||||
nonlocal masking_called
|
||||
masking_called = True
|
||||
return data
|
||||
|
||||
async def fake_classification(data, cache, message_type, messages):
|
||||
nonlocal classify_called
|
||||
classify_called = True
|
||||
return data
|
||||
|
||||
with (
|
||||
patch.object(guard, "_handle_masking", side_effect=fake_masking),
|
||||
patch.object(guard, "_handle_classification", side_effect=fake_classification),
|
||||
):
|
||||
await guard._run_lasso_guardrail(
|
||||
data={
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "hello"},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
],
|
||||
}
|
||||
]
|
||||
},
|
||||
cache=DualCache(),
|
||||
message_type="PROMPT",
|
||||
)
|
||||
|
||||
assert classify_called is True
|
||||
assert masking_called is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lasso_inspects_responses_api_input(user_api_key, monkeypatch):
|
||||
monkeypatch.setenv("LASSO_API_KEY", "ls-test")
|
||||
from litellm.proxy.guardrails.guardrail_hooks.lasso.lasso import LassoGuardrail
|
||||
|
||||
guard = LassoGuardrail(lasso_api_key="ls-test")
|
||||
|
||||
seen_messages = []
|
||||
|
||||
async def fake_handle_classification(data, cache, message_type, messages):
|
||||
seen_messages.append(messages)
|
||||
return data
|
||||
|
||||
with patch.object(
|
||||
guard, "_handle_classification", side_effect=fake_handle_classification
|
||||
):
|
||||
await guard._run_lasso_guardrail(
|
||||
data={"input": "responses-api content"},
|
||||
cache=DualCache(),
|
||||
message_type="PROMPT",
|
||||
)
|
||||
|
||||
assert seen_messages == [[{"role": "user", "content": "responses-api content"}]]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_lasso_masking_writes_back_responses_api_input(user_api_key, monkeypatch):
|
||||
"""Krrish blocker: Lasso classifix masking must update ``data["input"]``
|
||||
for Responses-API requests, not only ``data["messages"]``."""
|
||||
monkeypatch.setenv("LASSO_API_KEY", "ls-test")
|
||||
from litellm.proxy.guardrails.guardrail_hooks.lasso.lasso import LassoGuardrail
|
||||
|
||||
guard = LassoGuardrail(lasso_api_key="ls-test", mask=True)
|
||||
lasso_response = {
|
||||
"violations_detected": True,
|
||||
"deputies": {"pii": True},
|
||||
"findings": {"pii": [{"action": "AUTO_MASKING"}]},
|
||||
"messages": [{"role": "user", "content": "[REDACTED]"}],
|
||||
}
|
||||
|
||||
async def fake_call_lasso_api(headers, payload, api_url=None):
|
||||
return lasso_response
|
||||
|
||||
data = {"input": "user@example.com leaked"}
|
||||
|
||||
with patch.object(guard, "_call_lasso_api", side_effect=fake_call_lasso_api):
|
||||
await guard._run_lasso_guardrail(
|
||||
data=data,
|
||||
cache=DualCache(),
|
||||
message_type="PROMPT",
|
||||
)
|
||||
|
||||
assert data["input"] == "[REDACTED]"
|
||||
|
||||
|
||||
# ── Banned Keywords ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def test_banned_keywords_blocks_multimodal_content(monkeypatch):
|
||||
"""VERIA-11: a banned word hidden in a multimodal text part is now caught.
|
||||
|
||||
Uses ``acompletion`` — the value the proxy ingress actually passes
|
||||
for ``/v1/chat/completions``. Asserting against the literal sync
|
||||
``"completion"`` would pass even if the hook's call-type gate were
|
||||
misaligned with the runtime, so the test wouldn't catch regressions.
|
||||
"""
|
||||
monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
|
||||
from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
|
||||
from fastapi import HTTPException
|
||||
|
||||
guard = _ENTERPRISE_BannedKeywords()
|
||||
|
||||
async def _run():
|
||||
await guard.async_pre_call_hook(
|
||||
user_api_key_dict=UserAPIKeyAuth(api_key="hashed", user_id="u"),
|
||||
cache=DualCache(),
|
||||
data={
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "forbidden word here"},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
],
|
||||
}
|
||||
]
|
||||
},
|
||||
call_type="acompletion",
|
||||
)
|
||||
|
||||
import asyncio
|
||||
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
asyncio.run(_run())
|
||||
assert "forbidden" in str(exc.value.detail).lower()
|
||||
|
||||
|
||||
def test_banned_keywords_blocks_responses_api_input(monkeypatch):
|
||||
monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
|
||||
from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
|
||||
from fastapi import HTTPException
|
||||
|
||||
guard = _ENTERPRISE_BannedKeywords()
|
||||
|
||||
async def _run():
|
||||
await guard.async_pre_call_hook(
|
||||
user_api_key_dict=UserAPIKeyAuth(api_key="hashed", user_id="u"),
|
||||
cache=DualCache(),
|
||||
data={"input": "this contains forbidden content"},
|
||||
call_type="aresponses",
|
||||
)
|
||||
|
||||
import asyncio
|
||||
|
||||
with pytest.raises(HTTPException):
|
||||
asyncio.run(_run())
|
||||
|
||||
|
||||
@pytest.mark.parametrize("call_type", ["completion", "acompletion", "aresponses"])
|
||||
def test_banned_keywords_fires_on_text_content_call_types(monkeypatch, call_type):
|
||||
"""Locks the call-type gate to the runtime ``route_type`` values the
|
||||
proxy actually emits — pinning a regression where the hook had
|
||||
``call_type == "completion"`` and silently no-op'd both
|
||||
``acompletion`` (chat completions) and ``aresponses`` (Responses API).
|
||||
"""
|
||||
monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
|
||||
from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
|
||||
from fastapi import HTTPException
|
||||
|
||||
guard = _ENTERPRISE_BannedKeywords()
|
||||
|
||||
import asyncio
|
||||
|
||||
with pytest.raises(HTTPException):
|
||||
asyncio.run(
|
||||
guard.async_pre_call_hook(
|
||||
user_api_key_dict=UserAPIKeyAuth(api_key="hashed", user_id="u"),
|
||||
cache=DualCache(),
|
||||
data={
|
||||
"messages": [{"role": "user", "content": "forbidden text"}],
|
||||
"input": "forbidden text",
|
||||
},
|
||||
call_type=call_type,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def test_banned_keywords_skips_non_text_call_types(monkeypatch):
|
||||
"""Embedding / moderation / audio paths don't carry chat text and
|
||||
aren't in the text-guardrail scope. They must not trigger the hook
|
||||
even when the request body otherwise looks like a chat payload.
|
||||
"""
|
||||
monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
|
||||
from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
|
||||
|
||||
guard = _ENTERPRISE_BannedKeywords()
|
||||
|
||||
import asyncio
|
||||
|
||||
for call_type in ("aembedding", "amoderation", "aspeech", "atranscription"):
|
||||
# Should return without raising, even though the data carries the banned word.
|
||||
asyncio.run(
|
||||
guard.async_pre_call_hook(
|
||||
user_api_key_dict=UserAPIKeyAuth(api_key="hashed", user_id="u"),
|
||||
cache=DualCache(),
|
||||
data={"input": "forbidden text"},
|
||||
call_type=call_type,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_banned_keywords_post_call_checks_all_choices(monkeypatch, user_api_key):
|
||||
"""Krrish blocker: ``n>1`` responses must not bypass post-call checks by
|
||||
placing the banned text in ``choices[1+]``."""
|
||||
monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
|
||||
from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
|
||||
from fastapi import HTTPException
|
||||
|
||||
guard = _ENTERPRISE_BannedKeywords()
|
||||
response = ModelResponse(
|
||||
choices=[
|
||||
Choices(index=0, message=Message(role="assistant", content="clean")),
|
||||
Choices(index=1, message=Message(role="assistant", content="forbidden")),
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPException) as exc:
|
||||
await guard.async_post_call_success_hook(
|
||||
data={},
|
||||
user_api_key_dict=user_api_key,
|
||||
response=response,
|
||||
)
|
||||
|
||||
assert "forbidden" in str(exc.value.detail).lower()
|
||||
|
||||
|
||||
# ── Azure Content Safety ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize(
|
||||
"call_type, data",
|
||||
[
|
||||
(
|
||||
"acompletion",
|
||||
{
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "scan me"},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
],
|
||||
}
|
||||
]
|
||||
},
|
||||
),
|
||||
("aresponses", {"input": "scan me"}),
|
||||
],
|
||||
)
|
||||
async def test_azure_content_safety_pre_call_fires_on_runtime_call_types(
|
||||
user_api_key, call_type, data
|
||||
):
|
||||
"""The proxy ingress passes ``route_type`` straight through as
|
||||
``call_type`` — ``acompletion`` for chat completions and
|
||||
``aresponses`` for the Responses API. The hook must inspect text
|
||||
fragments under both, not only the literal ``"completion"`` string
|
||||
used by some SDK callers."""
|
||||
from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
|
||||
|
||||
guard = _PROXY_AzureContentSafety.__new__(_PROXY_AzureContentSafety)
|
||||
seen = []
|
||||
|
||||
async def fake_test_violation(content, source=None):
|
||||
seen.append((content, source))
|
||||
|
||||
guard.test_violation = fake_test_violation
|
||||
await guard.async_pre_call_hook(
|
||||
user_api_key_dict=user_api_key,
|
||||
cache=DualCache(),
|
||||
data=data,
|
||||
call_type=call_type,
|
||||
)
|
||||
assert ("scan me", "input") in seen
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_azure_content_safety_post_call_checks_all_choices(user_api_key):
|
||||
"""Krrish blocker: ``n>1`` responses must not bypass Azure Content Safety
|
||||
by placing the unsafe text in ``choices[1+]``."""
|
||||
from fastapi import HTTPException
|
||||
from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
|
||||
|
||||
guard = _PROXY_AzureContentSafety.__new__(_PROXY_AzureContentSafety)
|
||||
seen_outputs = []
|
||||
|
||||
async def fake_test_violation(content, source=None):
|
||||
seen_outputs.append((content, source))
|
||||
if "unsafe" in content:
|
||||
raise HTTPException(status_code=400, detail={"error": "unsafe"})
|
||||
|
||||
guard.test_violation = fake_test_violation
|
||||
response = ModelResponse(
|
||||
choices=[
|
||||
Choices(index=0, message=Message(role="assistant", content="clean")),
|
||||
Choices(index=1, message=Message(role="assistant", content="unsafe")),
|
||||
Choices(index=2, message=Message(role="assistant", content="later")),
|
||||
]
|
||||
)
|
||||
|
||||
with pytest.raises(HTTPException):
|
||||
await guard.async_post_call_success_hook(
|
||||
data={},
|
||||
user_api_key_dict=user_api_key,
|
||||
response=response,
|
||||
)
|
||||
|
||||
assert seen_outputs == [("clean", "output"), ("unsafe", "output")]
|
||||
|
||||
|
||||
# ── Secret Detection ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_secret_detection_redacts_multimodal_text_parts(user_api_key):
|
||||
from enterprise.litellm_enterprise.enterprise_callbacks.secret_detection import (
|
||||
_ENTERPRISE_SecretDetection,
|
||||
)
|
||||
|
||||
guard = _ENTERPRISE_SecretDetection()
|
||||
data = {
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "AKIAIOSFODNN7EXAMPLE is the key",
|
||||
},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
await guard.async_pre_call_hook(
|
||||
user_api_key_dict=user_api_key,
|
||||
cache=DualCache(),
|
||||
data=data,
|
||||
call_type="completion",
|
||||
)
|
||||
|
||||
parts = data["messages"][0]["content"]
|
||||
assert "AKIAIOSFODNN7EXAMPLE" not in parts[0]["text"]
|
||||
assert "[REDACTED]" in parts[0]["text"]
|
||||
# Non-text part is preserved untouched.
|
||||
assert parts[1] == {"type": "image_url", "image_url": {"url": "..."}}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_secret_detection_redacts_responses_api_input(user_api_key):
|
||||
from enterprise.litellm_enterprise.enterprise_callbacks.secret_detection import (
|
||||
_ENTERPRISE_SecretDetection,
|
||||
)
|
||||
|
||||
guard = _ENTERPRISE_SecretDetection()
|
||||
data = {"input": "leak: AKIAIOSFODNN7EXAMPLE"}
|
||||
|
||||
await guard.async_pre_call_hook(
|
||||
user_api_key_dict=user_api_key,
|
||||
cache=DualCache(),
|
||||
data=data,
|
||||
call_type="moderation",
|
||||
)
|
||||
|
||||
assert "AKIAIOSFODNN7EXAMPLE" not in data["input"]
|
||||
assert "[REDACTED]" in data["input"]
|
||||
|
||||
|
||||
# ── OpenAI Moderation ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_openai_moderation_inspects_multimodal_content(monkeypatch, user_api_key):
|
||||
"""The aggregated text passed to ``llm_router.amoderation`` must include
|
||||
list-format text parts and Responses-API input — without this, multimodal
|
||||
content silently passed moderation."""
|
||||
from enterprise.enterprise_hooks.openai_moderation import (
|
||||
_ENTERPRISE_OpenAI_Moderation,
|
||||
)
|
||||
|
||||
guard = _ENTERPRISE_OpenAI_Moderation()
|
||||
|
||||
seen_inputs = []
|
||||
|
||||
class FakeModeration:
|
||||
results = [type("R", (), {"flagged": False})()]
|
||||
|
||||
async def fake_amoderation(model, input):
|
||||
seen_inputs.append(input)
|
||||
return FakeModeration()
|
||||
|
||||
fake_router = MagicMock()
|
||||
fake_router.amoderation = AsyncMock(side_effect=fake_amoderation)
|
||||
|
||||
monkeypatch.setattr(
|
||||
"litellm.proxy.proxy_server.llm_router", fake_router, raising=False
|
||||
)
|
||||
|
||||
await guard.async_moderation_hook(
|
||||
data={
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "alpha "},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
{"type": "text", "text": "beta"},
|
||||
],
|
||||
}
|
||||
]
|
||||
},
|
||||
user_api_key_dict=user_api_key,
|
||||
call_type="acompletion",
|
||||
)
|
||||
|
||||
assert seen_inputs == ["alpha beta"]
|
||||
|
||||
|
||||
# ── Google Text Moderation ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_google_text_moderation_inspects_multimodal_content(user_api_key):
|
||||
"""The text passed to Google's moderation client must include list-format
|
||||
text parts."""
|
||||
from enterprise.enterprise_hooks.google_text_moderation import (
|
||||
_ENTERPRISE_GoogleTextModeration,
|
||||
)
|
||||
|
||||
guard = _ENTERPRISE_GoogleTextModeration.__new__(_ENTERPRISE_GoogleTextModeration)
|
||||
seen_documents = []
|
||||
|
||||
def fake_language_document(content, type_):
|
||||
seen_documents.append(content)
|
||||
return MagicMock()
|
||||
|
||||
fake_response = MagicMock()
|
||||
fake_response.moderation_categories = []
|
||||
|
||||
guard.language_document = fake_language_document
|
||||
guard.moderate_text_request = MagicMock(return_value=MagicMock())
|
||||
guard.document_type = MagicMock()
|
||||
guard.client = MagicMock()
|
||||
guard.client.moderate_text = MagicMock(return_value=fake_response)
|
||||
|
||||
await guard.async_moderation_hook(
|
||||
data={
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "hello "},
|
||||
{"type": "image_url", "image_url": {"url": "..."}},
|
||||
{"type": "text", "text": "world"},
|
||||
],
|
||||
}
|
||||
]
|
||||
},
|
||||
user_api_key_dict=user_api_key,
|
||||
call_type="acompletion",
|
||||
)
|
||||
|
||||
assert seen_documents == ["hello world"]
|
||||
@ -11,10 +11,14 @@ sys.path.insert(
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
from prisma.errors import ClientNotConnectedError, HTTPClientClosedError, PrismaError
|
||||
|
||||
import litellm.proxy.health_endpoints._health_endpoints as _health_endpoints_module
|
||||
|
||||
from litellm.proxy._types import LitellmUserRoles, UserAPIKeyAuth
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.health_endpoints._health_endpoints import (
|
||||
_db_health_readiness_check,
|
||||
get_callback_identifier,
|
||||
@ -512,7 +516,7 @@ def proxy_client(monkeypatch):
|
||||
|
||||
Redis cache:
|
||||
- If REDIS_HOST is set in environment, Redis cache will be automatically configured
|
||||
- Cache configuration is included in /health/readiness endpoint response
|
||||
- Cache diagnostics are included in the authenticated /health/readiness/details response
|
||||
"""
|
||||
client = create_proxy_test_client(monkeypatch)
|
||||
with client:
|
||||
@ -588,11 +592,7 @@ def test_health_liveness_endpoint(proxy_client):
|
||||
def test_health_readiness(proxy_client):
|
||||
"""
|
||||
Test /health/readiness endpoint.
|
||||
Database and Redis are optional - the endpoint should work whether they're available or not.
|
||||
|
||||
If DATABASE_URL is set, the endpoint will check database connectivity.
|
||||
If REDIS_HOST is set, the endpoint will report cache status.
|
||||
If neither is set, the endpoint should still return a valid health status.
|
||||
Database and Redis are optional - the public endpoint should work whether they're available or not.
|
||||
"""
|
||||
# Measure the time taken for the health check call
|
||||
start_time = time.perf_counter()
|
||||
@ -614,40 +614,57 @@ def test_health_readiness(proxy_client):
|
||||
duration_ms < 500
|
||||
), f"Health check took {duration_ms:.2f}ms, expected < 500ms for readiness endpoint"
|
||||
|
||||
# Assert response contains expected fields
|
||||
# Assert response contains only low-detail public probe fields
|
||||
response_data = response.json()
|
||||
assert "status" in response_data, "Response should contain 'status' field"
|
||||
assert (
|
||||
"litellm_version" in response_data
|
||||
), "Response should contain 'litellm_version' field"
|
||||
|
||||
# Display all health endpoint response fields (matches what /health/readiness returns)
|
||||
print("\n" + "-" * 60)
|
||||
print("HEALTH ENDPOINT RESPONSE")
|
||||
print("-" * 60)
|
||||
print(f"Status: {response_data.get('status', 'unknown')}")
|
||||
print(f"Database: {response_data.get('db', 'not reported')}")
|
||||
print(f"LiteLLM Version: {response_data.get('litellm_version', 'unknown')}")
|
||||
print(f"Success Callbacks: {response_data.get('success_callbacks', [])}")
|
||||
print(f"Cache: {response_data.get('cache', 'none')}")
|
||||
print(
|
||||
f"Use AioHTTP Transport: {response_data.get('use_aiohttp_transport', 'unknown')}"
|
||||
)
|
||||
assert response_data == {"status": "healthy"}
|
||||
print(f"Response time: {duration_ms:.2f}ms")
|
||||
|
||||
# If database status is reported, verify it's a valid status
|
||||
# Database may be "connected", "disconnected", "unknown", or "Not connected" (when prisma_client is None)
|
||||
if "db" in response_data:
|
||||
db_status = response_data["db"]
|
||||
# Database status can be any of these valid states
|
||||
assert db_status in [
|
||||
"connected",
|
||||
"disconnected",
|
||||
"unknown",
|
||||
"Not connected",
|
||||
], f"Unexpected db status: {db_status}"
|
||||
|
||||
print("=" * 60 + "\n")
|
||||
def test_health_readiness_details_returns_diagnostic_fields(monkeypatch):
|
||||
"""
|
||||
Detailed readiness diagnostics stay available behind the auth dependency.
|
||||
"""
|
||||
app = FastAPI()
|
||||
app.include_router(_health_endpoints_module.router)
|
||||
app.dependency_overrides[user_api_key_auth] = lambda: UserAPIKeyAuth(
|
||||
user_role=LitellmUserRoles.PROXY_ADMIN
|
||||
)
|
||||
client = TestClient(app)
|
||||
|
||||
monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", None)
|
||||
|
||||
response = client.get("/health/readiness/details")
|
||||
|
||||
assert response.status_code == 200, response.text
|
||||
response_data = response.json()
|
||||
assert response_data["status"] == "healthy"
|
||||
assert "litellm_version" in response_data
|
||||
assert "success_callbacks" in response_data
|
||||
assert "cache" in response_data
|
||||
|
||||
|
||||
def test_health_readiness_allows_explicit_legacy_public_details(monkeypatch):
|
||||
"""
|
||||
Operators can explicitly preserve the legacy public readiness payload.
|
||||
"""
|
||||
app = FastAPI()
|
||||
app.include_router(_health_endpoints_module.router)
|
||||
client = TestClient(app)
|
||||
|
||||
monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", None)
|
||||
monkeypatch.setattr(
|
||||
"litellm.proxy.proxy_server.general_settings",
|
||||
{"allow_public_health_readiness_details": True},
|
||||
)
|
||||
|
||||
response = client.get("/health/readiness")
|
||||
|
||||
assert response.status_code == 200, response.text
|
||||
response_data = response.json()
|
||||
assert response_data["status"] == "healthy"
|
||||
assert "litellm_version" in response_data
|
||||
assert "success_callbacks" in response_data
|
||||
assert "cache" in response_data
|
||||
|
||||
|
||||
def test_get_callback_identifier_string_and_object_with_callback_name():
|
||||
@ -1503,8 +1520,7 @@ async def test_health_readiness_returns_503_when_db_disconnected():
|
||||
result = await health_readiness(response=response)
|
||||
|
||||
assert response.status_code == 503
|
||||
assert result["db"] == "disconnected"
|
||||
assert result["status"] == "healthy" # body shape unchanged for back-compat
|
||||
assert result == {"status": "healthy"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@ -1527,7 +1543,7 @@ async def test_health_readiness_returns_200_when_db_connected():
|
||||
result = await health_readiness(response=response)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert result["db"] == "connected"
|
||||
assert result == {"status": "healthy"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@ -1546,7 +1562,7 @@ async def test_health_readiness_returns_200_when_no_db_configured():
|
||||
result = await health_readiness(response=response)
|
||||
|
||||
assert response.status_code == 200
|
||||
assert result["db"] == "Not connected"
|
||||
assert result == {"status": "healthy"}
|
||||
|
||||
|
||||
def test_clean_endpoint_data_strips_credentials_keeps_routing_fields():
|
||||
|
||||
@ -13,7 +13,13 @@ from fastapi.testclient import TestClient
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../../../.."))
|
||||
|
||||
from litellm.proxy import proxy_server
|
||||
from litellm.proxy._types import LitellmUserRoles, UserAPIKeyAuth
|
||||
from litellm.proxy.management_endpoints.router_settings_endpoints import (
|
||||
get_router_settings,
|
||||
)
|
||||
from litellm.proxy.proxy_server import app
|
||||
from litellm.router import Router
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
@ -71,3 +77,48 @@ class TestRouterSettingsEndpoints:
|
||||
assert "options" in routing_strategy_field
|
||||
assert isinstance(routing_strategy_field["options"], list)
|
||||
assert len(routing_strategy_field["options"]) > 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_router_settings_includes_routing_groups_from_live_router(
|
||||
self, monkeypatch
|
||||
):
|
||||
"""GET /router/settings returns routing_groups from the live router."""
|
||||
groups = [
|
||||
{
|
||||
"group_name": "test-group",
|
||||
"models": ["latency-model"],
|
||||
"routing_strategy": "latency-based-routing",
|
||||
"routing_strategy_args": {},
|
||||
}
|
||||
]
|
||||
llm_router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "latency-model",
|
||||
"litellm_params": {
|
||||
"model": "openai/gpt-4o",
|
||||
"api_key": "sk-x",
|
||||
},
|
||||
}
|
||||
],
|
||||
routing_groups=groups,
|
||||
)
|
||||
|
||||
monkeypatch.setattr(proxy_server, "llm_router", llm_router)
|
||||
|
||||
async def fake_get_config(self, config_file_path=None):
|
||||
return {}
|
||||
|
||||
monkeypatch.setattr(
|
||||
proxy_server.ProxyConfig, "get_config", fake_get_config, raising=True
|
||||
)
|
||||
|
||||
admin_user = UserAPIKeyAuth(
|
||||
user_role=LitellmUserRoles.PROXY_ADMIN, api_key="sk-x"
|
||||
)
|
||||
response = await get_router_settings(user_api_key_dict=admin_user)
|
||||
|
||||
assert response.current_values.get("routing_groups") == groups
|
||||
|
||||
rg_field = next(f for f in response.fields if f.field_name == "routing_groups")
|
||||
assert rg_field.field_value == groups
|
||||
|
||||
@ -1,18 +1,5 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
|
||||
import pytest
|
||||
from fastapi import FastAPI
|
||||
from fastapi.responses import JSONResponse
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
import litellm
|
||||
@ -21,7 +8,7 @@ from litellm.proxy.middleware.prometheus_auth_middleware import PrometheusAuthMi
|
||||
|
||||
|
||||
# Fake auth functions to simulate valid and invalid auth behavior.
|
||||
async def fake_valid_auth(request, api_key):
|
||||
async def fake_valid_auth(request, api_key, **kwargs):
|
||||
# Simulate valid authentication: do nothing (i.e. pass)
|
||||
return
|
||||
|
||||
@ -35,15 +22,11 @@ async def fake_valid_auth_reads_body(request, api_key, **kwargs):
|
||||
return
|
||||
|
||||
|
||||
async def fake_invalid_auth(request, api_key):
|
||||
print("running fake invalid auth", request, api_key)
|
||||
async def fake_invalid_auth(request, api_key, **kwargs):
|
||||
# Simulate invalid auth by raising an exception.
|
||||
raise Exception("Invalid API key")
|
||||
|
||||
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def app_with_middleware():
|
||||
"""Create a FastAPI app with the PrometheusAuthMiddleware and dummy endpoints."""
|
||||
@ -98,7 +81,7 @@ def test_valid_auth_metrics(app_with_middleware, monkeypatch):
|
||||
Test that a request to /metrics (and /metrics/) with valid auth headers passes.
|
||||
"""
|
||||
# Enable auth on metrics endpoints.
|
||||
litellm.require_auth_for_metrics_endpoint = True
|
||||
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
|
||||
# Patch the auth function to simulate a valid authentication.
|
||||
monkeypatch.setattr(
|
||||
"litellm.proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
|
||||
@ -123,7 +106,7 @@ def test_invalid_auth_metrics(app_with_middleware, monkeypatch):
|
||||
"""
|
||||
Test that a request to /metrics with invalid auth headers fails with a 401.
|
||||
"""
|
||||
litellm.require_auth_for_metrics_endpoint = True
|
||||
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
|
||||
# Patch the auth function to simulate a failed authentication.
|
||||
monkeypatch.setattr(
|
||||
"litellm.proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
|
||||
@ -138,12 +121,48 @@ def test_invalid_auth_metrics(app_with_middleware, monkeypatch):
|
||||
assert "Unauthorized access to metrics endpoint" in response.text
|
||||
|
||||
|
||||
def test_metrics_auth_uses_real_auth_when_route_is_public(
|
||||
app_with_middleware, monkeypatch
|
||||
):
|
||||
"""
|
||||
Regression: /metrics is statically public, but require_auth_for_metrics_endpoint
|
||||
must still force the real auth path.
|
||||
"""
|
||||
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
|
||||
monkeypatch.setattr("litellm.proxy.proxy_server.master_key", "sk-master")
|
||||
monkeypatch.setattr("litellm.proxy.proxy_server.general_settings", {})
|
||||
|
||||
client = TestClient(app_with_middleware)
|
||||
|
||||
response = client.get("/metrics")
|
||||
|
||||
assert response.status_code == 401, response.text
|
||||
assert "Unauthorized access to metrics endpoint" in response.text
|
||||
|
||||
|
||||
def test_metrics_auth_is_required_by_default(app_with_middleware, monkeypatch):
|
||||
"""
|
||||
Metrics should require auth unless explicitly configured as public.
|
||||
"""
|
||||
monkeypatch.setattr(
|
||||
"litellm.proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
|
||||
fake_invalid_auth,
|
||||
)
|
||||
|
||||
client = TestClient(app_with_middleware)
|
||||
|
||||
response = client.get("/metrics")
|
||||
|
||||
assert response.status_code == 401, response.text
|
||||
assert "Unauthorized access to metrics endpoint" in response.text
|
||||
|
||||
|
||||
def test_no_auth_metrics_when_disabled(app_with_middleware, monkeypatch):
|
||||
"""
|
||||
Test that when require_auth_for_metrics_endpoint is False, requests to /metrics
|
||||
bypass the auth check.
|
||||
"""
|
||||
litellm.require_auth_for_metrics_endpoint = False
|
||||
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", False)
|
||||
|
||||
# To ensure auth is not run, patch the auth function with one that will raise if called.
|
||||
def should_not_be_called(*args, **kwargs):
|
||||
@ -160,11 +179,11 @@ def test_no_auth_metrics_when_disabled(app_with_middleware, monkeypatch):
|
||||
assert response.json() == {"msg": "metrics OK"}
|
||||
|
||||
|
||||
def test_non_metrics_requests_pass_through(app_with_middleware):
|
||||
def test_non_metrics_requests_pass_through(app_with_middleware, monkeypatch):
|
||||
"""
|
||||
Test that non-metrics endpoints pass through the middleware unaffected.
|
||||
"""
|
||||
litellm.require_auth_for_metrics_endpoint = True
|
||||
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
|
||||
|
||||
client = TestClient(app_with_middleware)
|
||||
|
||||
@ -182,7 +201,7 @@ def test_non_metrics_requests_dont_trigger_auth(app_with_middleware, monkeypatch
|
||||
Test that non-metrics requests never trigger auth, even when auth is enabled
|
||||
and the auth function would reject the request.
|
||||
"""
|
||||
litellm.require_auth_for_metrics_endpoint = True
|
||||
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
|
||||
|
||||
def should_not_be_called(*args, **kwargs):
|
||||
raise Exception("Auth should not be called for non-metrics requests")
|
||||
|
||||
@ -91,6 +91,19 @@ def test_get_litellm_model_cost_map_returns_cost_map():
|
||||
)
|
||||
|
||||
|
||||
def test_public_ai_hub_info_is_public_by_default(monkeypatch):
|
||||
app = FastAPI()
|
||||
app.include_router(router)
|
||||
client = TestClient(app)
|
||||
|
||||
monkeypatch.setattr("litellm.proxy.proxy_server.general_settings", {})
|
||||
monkeypatch.setattr("litellm.proxy.proxy_server.master_key", "sk-master")
|
||||
|
||||
response = client.get("/public/model_hub/info")
|
||||
|
||||
assert response.status_code == 200, response.text
|
||||
|
||||
|
||||
def test_watsonx_provider_fields():
|
||||
"""Test that Watsonx provider has all required credential fields including multiple auth options."""
|
||||
app = FastAPI()
|
||||
@ -166,9 +179,9 @@ def test_anthropic_provider_fields_support_byok():
|
||||
"Anthropic api_key must be optional so admins can configure BYOK models "
|
||||
"without entering a key. See BYOK tutorial."
|
||||
)
|
||||
assert fields_by_key["api_key"].get("tooltip"), (
|
||||
"Anthropic api_key must have a tooltip explaining the BYOK use case."
|
||||
)
|
||||
assert fields_by_key["api_key"].get(
|
||||
"tooltip"
|
||||
), "Anthropic api_key must have a tooltip explaining the BYOK use case."
|
||||
assert "api_base" in fields_by_key, (
|
||||
"Anthropic provider form must expose api_base so cloud customers "
|
||||
"can override the upstream URL without env var access."
|
||||
@ -176,16 +189,16 @@ def test_anthropic_provider_fields_support_byok():
|
||||
api_base_field = fields_by_key["api_base"]
|
||||
assert api_base_field["required"] is False
|
||||
assert api_base_field["field_type"] == "text"
|
||||
assert api_base_field.get("tooltip"), (
|
||||
"api_base should have a tooltip explaining it is optional."
|
||||
)
|
||||
assert api_base_field.get(
|
||||
"tooltip"
|
||||
), "api_base should have a tooltip explaining it is optional."
|
||||
|
||||
# UI forms render fields in credential_fields order; api_base should come first
|
||||
# so an admin sees the URL override before the key field.
|
||||
field_order = [f["key"] for f in anthropic["credential_fields"]]
|
||||
assert field_order.index("api_base") < field_order.index("api_key"), (
|
||||
"api_base must appear before api_key in credential_fields (matches AI21 and ANTHROPIC_TEXT convention)."
|
||||
)
|
||||
assert field_order.index("api_base") < field_order.index(
|
||||
"api_key"
|
||||
), "api_base must appear before api_key in credential_fields (matches AI21 and ANTHROPIC_TEXT convention)."
|
||||
|
||||
|
||||
def test_public_model_hub_with_healthy_model():
|
||||
|
||||
312
tests/test_litellm/proxy/test_pricing_field_strip.py
Normal file
312
tests/test_litellm/proxy/test_pricing_field_strip.py
Normal file
@ -0,0 +1,312 @@
|
||||
"""Proxy strips client-supplied pricing parameters from request bodies.
|
||||
|
||||
`litellm.completion` accepts pricing fields (`input_cost_per_token`,
|
||||
`output_cost_per_token`, the rest of `CustomPricingLiteLLMParams`,
|
||||
`metadata.model_info`) as part of its kwarg surface. On direct SDK use that
|
||||
is intentional. On the proxy, those same fields would let any caller rewrite
|
||||
their own per-request cost and — via `litellm.register_model` — mutate
|
||||
`litellm.model_cost` for every subsequent caller in the worker. The proxy
|
||||
strips them at the boundary; an opt-in key/team flag preserves the override
|
||||
for operators who actually want it.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from fastapi import Request
|
||||
|
||||
import litellm
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.litellm_pre_call_utils import (
|
||||
_CLIENT_PRICING_CONTROL_FIELDS,
|
||||
_CLIENT_PRICING_METADATA_FIELDS,
|
||||
_strip_client_pricing_overrides,
|
||||
add_litellm_data_to_request,
|
||||
)
|
||||
from litellm.types.utils import CustomPricingLiteLLMParams
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../../.."))
|
||||
|
||||
|
||||
def _make_request_mock() -> Request:
|
||||
request_mock = MagicMock(spec=Request)
|
||||
request_mock.url.path = "/v1/chat/completions"
|
||||
request_mock.url = MagicMock()
|
||||
request_mock.url.__str__.return_value = "http://localhost/v1/chat/completions"
|
||||
request_mock.method = "POST"
|
||||
request_mock.query_params = {}
|
||||
request_mock.headers = {"Content-Type": "application/json"}
|
||||
request_mock.client = MagicMock()
|
||||
request_mock.client.host = "127.0.0.1"
|
||||
return request_mock
|
||||
|
||||
|
||||
def _user_api_key_auth(metadata=None, team_metadata=None) -> UserAPIKeyAuth:
|
||||
return UserAPIKeyAuth(
|
||||
api_key="hashed-key",
|
||||
metadata=metadata or {},
|
||||
team_metadata=team_metadata or {},
|
||||
spend=0.0,
|
||||
max_budget=100.0,
|
||||
model_max_budget={},
|
||||
team_spend=0.0,
|
||||
team_max_budget=200.0,
|
||||
)
|
||||
|
||||
|
||||
class TestStripClientPricingOverrides:
|
||||
def test_pricing_field_set_tracks_pydantic_model(self):
|
||||
# The strip set is built from the model so additions are picked up
|
||||
# automatically — this test guards against the model and the strip
|
||||
# set drifting apart if someone replaces the auto-derivation later.
|
||||
assert _CLIENT_PRICING_CONTROL_FIELDS == frozenset(
|
||||
CustomPricingLiteLLMParams.model_fields.keys()
|
||||
)
|
||||
# Sanity: the obvious top-level pricing fields are in the set.
|
||||
for field in (
|
||||
"input_cost_per_token",
|
||||
"output_cost_per_token",
|
||||
"input_cost_per_second",
|
||||
"cache_creation_input_token_cost",
|
||||
):
|
||||
assert field in _CLIENT_PRICING_CONTROL_FIELDS
|
||||
|
||||
def test_root_pricing_fields_dropped(self):
|
||||
data = {
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
"input_cost_per_token": 0.0,
|
||||
"output_cost_per_token": 0.0,
|
||||
"cache_creation_input_token_cost": 0.0,
|
||||
}
|
||||
_strip_client_pricing_overrides(data)
|
||||
assert data == {
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
}
|
||||
|
||||
def test_metadata_model_info_dropped(self):
|
||||
data = {
|
||||
"model": "gpt-4",
|
||||
"metadata": {
|
||||
"user_session": "keep-me",
|
||||
"model_info": {"input_cost_per_token": 0.0},
|
||||
},
|
||||
"litellm_metadata": {
|
||||
"model_info": {"output_cost_per_token": 0.0},
|
||||
},
|
||||
}
|
||||
_strip_client_pricing_overrides(data)
|
||||
assert data["metadata"] == {"user_session": "keep-me"}
|
||||
assert data["litellm_metadata"] == {}
|
||||
|
||||
def test_non_pricing_fields_untouched(self):
|
||||
data = {
|
||||
"model": "gpt-4",
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 100,
|
||||
"tools": [{"type": "function"}],
|
||||
"metadata": {"trace_id": "abc"},
|
||||
}
|
||||
snapshot = {
|
||||
"model": "gpt-4",
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 100,
|
||||
"tools": [{"type": "function"}],
|
||||
"metadata": {"trace_id": "abc"},
|
||||
}
|
||||
_strip_client_pricing_overrides(data)
|
||||
assert data == snapshot
|
||||
|
||||
def test_metadata_strip_handles_non_dict_metadata(self):
|
||||
# Defensive — Pydantic validation would normally reject non-dict
|
||||
# metadata, but the strip mustn't crash if a malformed body sneaks in.
|
||||
_strip_client_pricing_overrides({"metadata": "not-a-dict"})
|
||||
_strip_client_pricing_overrides({"metadata": None})
|
||||
_strip_client_pricing_overrides({"litellm_metadata": ["a", "b"]})
|
||||
|
||||
def test_metadata_field_set_contains_model_info(self):
|
||||
assert "model_info" in _CLIENT_PRICING_METADATA_FIELDS
|
||||
|
||||
def test_strip_emits_debug_log_listing_dropped_fields(self, caplog):
|
||||
# Operators need a paper trail so they can diagnose why a previously
|
||||
# working override stopped applying after the strip landed.
|
||||
import logging
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
||||
verbose_proxy_logger.setLevel(logging.DEBUG)
|
||||
with caplog.at_level(logging.DEBUG, logger=verbose_proxy_logger.name):
|
||||
_strip_client_pricing_overrides(
|
||||
{
|
||||
"model": "gpt-4",
|
||||
"input_cost_per_token": 0.0,
|
||||
"metadata": {"model_info": {"output_cost_per_token": 0.0}},
|
||||
}
|
||||
)
|
||||
log_text = " ".join(record.getMessage() for record in caplog.records)
|
||||
assert "input_cost_per_token" in log_text
|
||||
assert "metadata.model_info" in log_text
|
||||
assert "allow_client_pricing_override" in log_text
|
||||
|
||||
def test_strip_does_not_log_when_no_fields_present(self, caplog):
|
||||
# No-op strips must stay silent so the log isn't filled with noise on
|
||||
# every legitimate request.
|
||||
import logging
|
||||
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
|
||||
verbose_proxy_logger.setLevel(logging.DEBUG)
|
||||
with caplog.at_level(logging.DEBUG, logger=verbose_proxy_logger.name):
|
||||
_strip_client_pricing_overrides({"model": "gpt-4", "temperature": 0.7})
|
||||
assert not any(
|
||||
"pricing" in record.getMessage().lower() for record in caplog.records
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_litellm_data_to_request_strips_root_pricing_fields():
|
||||
data = {
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
"input_cost_per_token": 0.0,
|
||||
"output_cost_per_token": 0.0,
|
||||
}
|
||||
|
||||
updated = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=_make_request_mock(),
|
||||
user_api_key_dict=_user_api_key_auth(),
|
||||
proxy_config=MagicMock(),
|
||||
general_settings={},
|
||||
version="test-version",
|
||||
)
|
||||
|
||||
assert "input_cost_per_token" not in updated
|
||||
assert "output_cost_per_token" not in updated
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_litellm_data_to_request_strips_metadata_model_info():
|
||||
data = {
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
"metadata": {"model_info": {"input_cost_per_token": 0.0}},
|
||||
}
|
||||
|
||||
updated = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=_make_request_mock(),
|
||||
user_api_key_dict=_user_api_key_auth(),
|
||||
proxy_config=MagicMock(),
|
||||
general_settings={},
|
||||
version="test-version",
|
||||
)
|
||||
|
||||
assert "model_info" not in updated.get("metadata", {})
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_litellm_data_to_request_skips_strip_with_key_opt_in():
|
||||
data = {
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
"input_cost_per_token": 0.0001,
|
||||
"metadata": {"model_info": {"output_cost_per_token": 0.0002}},
|
||||
}
|
||||
|
||||
user_auth = _user_api_key_auth(metadata={"allow_client_pricing_override": True})
|
||||
updated = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=_make_request_mock(),
|
||||
user_api_key_dict=user_auth,
|
||||
proxy_config=MagicMock(),
|
||||
general_settings={},
|
||||
version="test-version",
|
||||
)
|
||||
|
||||
assert updated["input_cost_per_token"] == 0.0001
|
||||
assert updated["metadata"]["model_info"] == {"output_cost_per_token": 0.0002}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_litellm_data_to_request_strips_json_string_litellm_metadata():
|
||||
"""``litellm_metadata`` may arrive as a JSON-encoded string (multipart/
|
||||
form-data or ``extra_body``). The strip has to run after the proxy parses
|
||||
it into a dict; otherwise the ``isinstance(dict)`` guard skips the field
|
||||
and ``model_info`` survives the strip via the string path.
|
||||
"""
|
||||
import json
|
||||
|
||||
data = {
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
"litellm_metadata": json.dumps({"model_info": {"input_cost_per_token": 0.0}}),
|
||||
}
|
||||
|
||||
updated = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=_make_request_mock(),
|
||||
user_api_key_dict=_user_api_key_auth(),
|
||||
proxy_config=MagicMock(),
|
||||
general_settings={},
|
||||
version="test-version",
|
||||
)
|
||||
|
||||
parsed_metadata = updated.get("litellm_metadata")
|
||||
assert isinstance(parsed_metadata, dict)
|
||||
assert "model_info" not in parsed_metadata
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_litellm_data_to_request_skips_strip_with_team_opt_in():
|
||||
data = {
|
||||
"model": "gpt-4",
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
"input_cost_per_token": 0.0001,
|
||||
}
|
||||
|
||||
user_auth = _user_api_key_auth(
|
||||
team_metadata={"allow_client_pricing_override": True}
|
||||
)
|
||||
updated = await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=_make_request_mock(),
|
||||
user_api_key_dict=user_auth,
|
||||
proxy_config=MagicMock(),
|
||||
general_settings={},
|
||||
version="test-version",
|
||||
)
|
||||
|
||||
assert updated["input_cost_per_token"] == 0.0001
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_global_model_cost_unmutated_after_stripped_request(monkeypatch):
|
||||
"""After a stripped request, ``litellm.model_cost`` must not carry the
|
||||
caller's submitted pricing for the model. The mutation only happens when
|
||||
the pricing fields reach ``litellm.completion``; the strip prevents that."""
|
||||
snapshot = dict(litellm.model_cost)
|
||||
data = {
|
||||
"model": "test-pricing-canary-model",
|
||||
"messages": [{"role": "user", "content": "hi"}],
|
||||
"input_cost_per_token": 0.0,
|
||||
"output_cost_per_token": 0.0,
|
||||
}
|
||||
|
||||
await add_litellm_data_to_request(
|
||||
data=data,
|
||||
request=_make_request_mock(),
|
||||
user_api_key_dict=_user_api_key_auth(),
|
||||
proxy_config=MagicMock(),
|
||||
general_settings={},
|
||||
version="test-version",
|
||||
)
|
||||
|
||||
# The strip prevents the pricing fields from ever reaching the path that
|
||||
# would mutate the global model_cost map.
|
||||
assert "test-pricing-canary-model" not in litellm.model_cost
|
||||
# And no other entries were mutated as a side effect.
|
||||
assert litellm.model_cost == snapshot
|
||||
34
tests/test_litellm/proxy/test_sensitive_route_auth.py
Normal file
34
tests/test_litellm/proxy/test_sensitive_route_auth.py
Normal file
@ -0,0 +1,34 @@
|
||||
from fastapi.routing import APIRoute
|
||||
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.common_utils.debug_utils import router as debug_router
|
||||
from litellm.proxy.spend_tracking.spend_management_endpoints import (
|
||||
router as spend_router,
|
||||
)
|
||||
|
||||
|
||||
def _get_route_dependency_calls(router, path: str, method: str):
|
||||
for route in router.routes:
|
||||
if (
|
||||
isinstance(route, APIRoute)
|
||||
and route.path == path
|
||||
and method in route.methods
|
||||
):
|
||||
return [dependency.call for dependency in route.dependant.dependencies]
|
||||
raise AssertionError(f"Route {method} {path} not found")
|
||||
|
||||
|
||||
def test_sensitive_debug_routes_require_auth_dependency():
|
||||
for path, method in (
|
||||
("/debug/asyncio-tasks", "GET"),
|
||||
("/otel-spans", "GET"),
|
||||
):
|
||||
assert user_api_key_auth in _get_route_dependency_calls(
|
||||
debug_router, path, method
|
||||
)
|
||||
|
||||
|
||||
def test_provider_budgets_requires_auth_dependency():
|
||||
assert user_api_key_auth in _get_route_dependency_calls(
|
||||
spend_router, "/provider/budgets", "GET"
|
||||
)
|
||||
@ -868,6 +868,7 @@ class TestProxySettingEndpoints:
|
||||
mock_db_record = MagicMock()
|
||||
mock_db_record.ui_settings = {
|
||||
"disable_model_add_for_internal_users": True,
|
||||
"require_auth_for_public_ai_hub": True,
|
||||
"unexpected_flag": True,
|
||||
}
|
||||
mock_prisma.db.litellm_uisettings.find_unique = AsyncMock(
|
||||
@ -880,10 +881,12 @@ class TestProxySettingEndpoints:
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["values"]["disable_model_add_for_internal_users"] is True
|
||||
assert data["values"]["require_auth_for_public_ai_hub"] is True
|
||||
assert "unexpected_flag" not in data["values"]
|
||||
assert (
|
||||
"disable_model_add_for_internal_users" in data["field_schema"]["properties"]
|
||||
)
|
||||
assert "require_auth_for_public_ai_hub" in data["field_schema"]["properties"]
|
||||
mock_prisma.db.litellm_uisettings.find_unique.assert_called_once_with(
|
||||
where={"id": "ui_settings"}
|
||||
)
|
||||
@ -1070,6 +1073,43 @@ class TestProxySettingEndpoints:
|
||||
assert "unsupported_flag" not in stored_settings
|
||||
assert stored_settings["disable_model_add_for_internal_users"] is False
|
||||
|
||||
def test_update_ui_settings_preserves_public_ai_hub_auth_flag(
|
||||
self, mock_auth, monkeypatch
|
||||
):
|
||||
"""Public AI Hub auth is an existing UI setting and must remain writable."""
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
|
||||
mock_user_auth = UserAPIKeyAuth(
|
||||
user_id="test-user-123",
|
||||
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||
)
|
||||
app.dependency_overrides[user_api_key_auth] = lambda: mock_user_auth
|
||||
|
||||
monkeypatch.setattr("litellm.proxy.proxy_server.store_model_in_db", True)
|
||||
mock_prisma = MagicMock()
|
||||
mock_prisma.db.litellm_uisettings.upsert = AsyncMock()
|
||||
mock_prisma.db.litellm_uisettings.find_unique = AsyncMock(return_value=None)
|
||||
monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", mock_prisma)
|
||||
|
||||
payload = {"require_auth_for_public_ai_hub": True}
|
||||
|
||||
try:
|
||||
response = client.patch("/update/ui_settings", json=payload)
|
||||
finally:
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["status"] == "success"
|
||||
assert data["settings"]["require_auth_for_public_ai_hub"] is True
|
||||
|
||||
call_args = mock_prisma.db.litellm_uisettings.upsert.call_args
|
||||
stored_settings = json.loads(call_args.kwargs["data"]["create"]["ui_settings"])
|
||||
assert stored_settings["require_auth_for_public_ai_hub"] is True
|
||||
|
||||
def test_update_ui_settings_persists_forward_llm_provider_auth_headers(
|
||||
self, mock_auth, monkeypatch
|
||||
):
|
||||
@ -1147,6 +1187,43 @@ class TestProxySettingEndpoints:
|
||||
assert response.status_code == 200
|
||||
assert general_settings.get("forward_llm_provider_auth_headers") is True
|
||||
|
||||
def test_update_ui_settings_syncs_public_health_readiness_details_to_general_settings(
|
||||
self, mock_auth, monkeypatch
|
||||
):
|
||||
"""Public readiness details flag must be synced so the health route sees it."""
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
|
||||
mock_user_auth = UserAPIKeyAuth(
|
||||
user_id="test-user-123",
|
||||
user_role=LitellmUserRoles.PROXY_ADMIN,
|
||||
)
|
||||
app.dependency_overrides[user_api_key_auth] = lambda: mock_user_auth
|
||||
|
||||
monkeypatch.setattr("litellm.proxy.proxy_server.store_model_in_db", True)
|
||||
|
||||
general_settings: dict = {}
|
||||
monkeypatch.setattr(
|
||||
"litellm.proxy.proxy_server.general_settings", general_settings
|
||||
)
|
||||
|
||||
mock_prisma = MagicMock()
|
||||
mock_prisma.db.litellm_uisettings.upsert = AsyncMock()
|
||||
mock_prisma.db.litellm_uisettings.find_unique = AsyncMock(return_value=None)
|
||||
monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", mock_prisma)
|
||||
|
||||
payload = {"allow_public_health_readiness_details": True}
|
||||
|
||||
try:
|
||||
response = client.patch("/update/ui_settings", json=payload)
|
||||
finally:
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
assert response.status_code == 200
|
||||
assert general_settings.get("allow_public_health_readiness_details") is True
|
||||
|
||||
def test_update_ui_settings_persists_and_syncs_disable_key_generate_for_org_admin(
|
||||
self, mock_auth, monkeypatch
|
||||
):
|
||||
|
||||
208
ui/litellm-dashboard/package-lock.json
generated
208
ui/litellm-dashboard/package-lock.json
generated
@ -8,12 +8,12 @@
|
||||
"name": "litellm-dashboard",
|
||||
"version": "0.1.0",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "0.54.0",
|
||||
"@anthropic-ai/sdk": "0.92.0",
|
||||
"@headlessui/tailwindcss": "0.2.2",
|
||||
"@heroicons/react": "1.0.6",
|
||||
"@remixicon/react": "4.9.0",
|
||||
"@tanstack/react-pacer": "0.2.0",
|
||||
"@tanstack/react-query": "5.90.20",
|
||||
"@tanstack/react-query": "5.100.7",
|
||||
"@tanstack/react-table": "8.21.3",
|
||||
"@tremor/react": "3.18.7",
|
||||
"@types/papaparse": "5.5.2",
|
||||
@ -23,18 +23,18 @@
|
||||
"jwt-decode": "4.0.0",
|
||||
"lucide-react": "0.513.0",
|
||||
"moment": "2.30.1",
|
||||
"next": "16.1.7",
|
||||
"next": "16.2.4",
|
||||
"openai": "4.104.0",
|
||||
"papaparse": "5.5.3",
|
||||
"react": "18.3.1",
|
||||
"react-copy-to-clipboard": "5.1.0",
|
||||
"react-copy-to-clipboard": "5.1.1",
|
||||
"react-dom": "18.3.1",
|
||||
"react-json-view-lite": "2.5.0",
|
||||
"react-markdown": "9.1.0",
|
||||
"react-syntax-highlighter": "15.6.6",
|
||||
"remark-gfm": "4.0.1",
|
||||
"tailwind-merge": "3.4.0",
|
||||
"uuid": "11.1.0"
|
||||
"uuid": "14.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@playwright/test": "1.58.1",
|
||||
@ -61,7 +61,7 @@
|
||||
"eslint-plugin-unused-imports": "4.3.0",
|
||||
"jsdom": "27.4.0",
|
||||
"knip": "5.83.1",
|
||||
"postcss": "8.5.6",
|
||||
"postcss": "8.5.13",
|
||||
"prettier": "3.2.5",
|
||||
"tailwindcss": "3.4.19",
|
||||
"typescript": "5.9.3",
|
||||
@ -69,7 +69,7 @@
|
||||
"vitest": "3.2.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.17.0",
|
||||
"node": ">=20.9.0",
|
||||
"npm": ">=8.3.0"
|
||||
}
|
||||
},
|
||||
@ -211,12 +211,23 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/sdk": {
|
||||
"version": "0.54.0",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.54.0.tgz",
|
||||
"integrity": "sha512-xyoCtHJnt/qg5GG6IgK+UJEndz8h8ljzt/caKXmq3LfBF81nC/BW6E4x2rOWCZcvsLyVW+e8U5mtIr6UCE/kJw==",
|
||||
"version": "0.92.0",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.92.0.tgz",
|
||||
"integrity": "sha512-l653JFC83wCglH8H83t1xpgDurCyPyslYW1maPRdCsfuNuGbLvQjQ81sWd3Go3LWRm0jNspzAhuqAYV8r9joSw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"json-schema-to-ts": "^3.1.1"
|
||||
},
|
||||
"bin": {
|
||||
"anthropic-ai-sdk": "bin/cli"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"zod": "^3.25.0 || ^4.0.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"zod": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@asamuzakjp/css-color": {
|
||||
@ -1817,9 +1828,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
|
||||
"integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
|
||||
"version": "16.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.2.4.tgz",
|
||||
"integrity": "sha512-dKkkOzOSwFYe5RX6y26fZgkSpVAlIOJKQHIiydQcrWH6y/97+RceSOAdjZ14Qa3zLduVUy0TXcn+EiM6t4rPgw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
@ -1833,9 +1844,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
|
||||
"integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
|
||||
"version": "16.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.2.4.tgz",
|
||||
"integrity": "sha512-OXTFFox5EKN1Ym08vfrz+OXxmCcEjT4SFMbNRsWZE99dMqt2Kcusl5MqPXcW232RYkMLQTy0hqgAMEsfEd/l2A==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@ -1849,9 +1860,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
|
||||
"integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
|
||||
"version": "16.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.2.4.tgz",
|
||||
"integrity": "sha512-XhpVnUfmYWvD3YrXu55XdcAkQtOnvaI6wtQa8fuF5fGoKoxIUZ0kWPtcOfqJEWngFF/lOS9l3+O9CcownhiQxQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@ -1865,12 +1876,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
|
||||
"integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
|
||||
"version": "16.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.2.4.tgz",
|
||||
"integrity": "sha512-Mx/tjlNA3G8kg14QvuGAJ4xBwPk1tUHq56JxZ8CXnZwz1Etz714soCEzGQQzVMz4bEnGPowzkV6Xrp6wAkEWOQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@ -1881,12 +1895,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
|
||||
"integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
|
||||
"version": "16.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.2.4.tgz",
|
||||
"integrity": "sha512-iVMMp14514u7Nup2umQS03nT/bN9HurK8ufylC3FZNykrwjtx7V1A7+4kvhbDSCeonTVqV3Txnv0Lu+m2oDXNg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"libc": [
|
||||
"musl"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@ -1897,12 +1914,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
|
||||
"integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
|
||||
"version": "16.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.2.4.tgz",
|
||||
"integrity": "sha512-EZOvm1aQWgnI/N/xcWOlnS3RQBk0VtVav5Zo7n4p0A7UKyTDx047k8opDbXgBpHl4CulRqRfbw3QrX2w5UOXMQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"libc": [
|
||||
"glibc"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@ -1913,12 +1933,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
|
||||
"integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
|
||||
"version": "16.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.2.4.tgz",
|
||||
"integrity": "sha512-h9FxsngCm9cTBf71AR4fGznDEDx1hS7+kSEiIRjq5kO1oXWm07DxVGZjCvk0SGx7TSjlUqhI8oOyz7NfwAdPoA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"libc": [
|
||||
"musl"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
@ -1929,9 +1952,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
|
||||
"integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
|
||||
"version": "16.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.2.4.tgz",
|
||||
"integrity": "sha512-3NdJV5OXMSOeJYijX+bjaLge3mJBlh4ybydbT4GFoB/2hAojWHtMhl3CYlYoMrjPuodp0nzFVi4Tj2+WaMg+Ow==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
@ -1945,9 +1968,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
|
||||
"integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
|
||||
"version": "16.2.4",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.2.4.tgz",
|
||||
"integrity": "sha512-kMVGgsqhO5YTYODD9IPGGhA6iprWidQckK3LmPeW08PIFENRmgfb4MjXHO+p//d+ts2rpjvK5gXWzXSMrPl9cw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
@ -2981,9 +3004,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@tanstack/query-core": {
|
||||
"version": "5.90.20",
|
||||
"resolved": "https://registry.npmjs.org/@tanstack/query-core/-/query-core-5.90.20.tgz",
|
||||
"integrity": "sha512-OMD2HLpNouXEfZJWcKeVKUgQ5n+n3A2JFmBaScpNDUqSrQSjiveC7dKMe53uJUg1nDG16ttFPz2xfilz6i2uVg==",
|
||||
"version": "5.100.7",
|
||||
"resolved": "https://registry.npmjs.org/@tanstack/query-core/-/query-core-5.100.7.tgz",
|
||||
"integrity": "sha512-5R7i6ENJLhVeeJrrUz7jKBXUXv/BJrxf9FQJSkR13bPrb3zOcE8A0Z0PxYCcsKPOsiIlTibrBL/zZbtUO1TFyQ==",
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"type": "github",
|
||||
@ -3011,12 +3034,12 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@tanstack/react-query": {
|
||||
"version": "5.90.20",
|
||||
"resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.90.20.tgz",
|
||||
"integrity": "sha512-vXBxa+qeyveVO7OA0jX1z+DeyCA4JKnThKv411jd5SORpBKgkcVnYKCiBgECvADvniBX7tobwBmg01qq9JmMJw==",
|
||||
"version": "5.100.7",
|
||||
"resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.100.7.tgz",
|
||||
"integrity": "sha512-LoISYWz8dOOuQbeIctF8K6yi42TWtR1WPGpwGuRUpF3u79JVVIg/PVR0MQdIA0VSHqD/ydf/b7PhKTkg3I4fLQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@tanstack/query-core": "5.90.20"
|
||||
"@tanstack/query-core": "5.100.7"
|
||||
},
|
||||
"funding": {
|
||||
"type": "github",
|
||||
@ -7872,6 +7895,19 @@
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/json-schema-to-ts": {
|
||||
"version": "3.1.1",
|
||||
"resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz",
|
||||
"integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@babel/runtime": "^7.18.3",
|
||||
"ts-algebra": "^2.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16"
|
||||
}
|
||||
},
|
||||
"node_modules/json-schema-traverse": {
|
||||
"version": "0.4.1",
|
||||
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
|
||||
@ -9299,12 +9335,12 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "16.1.7",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
|
||||
"integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
|
||||
"version": "16.2.4",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-16.2.4.tgz",
|
||||
"integrity": "sha512-kPvz56wF5frc+FxlHI5qnklCzbq53HTwORaWBGdT0vNoKh1Aya9XC8aPauH4NJxqtzbWsS5mAbctm4cr+EkQ2Q==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@next/env": "16.1.7",
|
||||
"@next/env": "16.2.4",
|
||||
"@swc/helpers": "0.5.15",
|
||||
"baseline-browser-mapping": "^2.9.19",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
@ -9318,15 +9354,15 @@
|
||||
"node": ">=20.9.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "16.1.7",
|
||||
"@next/swc-darwin-x64": "16.1.7",
|
||||
"@next/swc-linux-arm64-gnu": "16.1.7",
|
||||
"@next/swc-linux-arm64-musl": "16.1.7",
|
||||
"@next/swc-linux-x64-gnu": "16.1.7",
|
||||
"@next/swc-linux-x64-musl": "16.1.7",
|
||||
"@next/swc-win32-arm64-msvc": "16.1.7",
|
||||
"@next/swc-win32-x64-msvc": "16.1.7",
|
||||
"sharp": "^0.34.4"
|
||||
"@next/swc-darwin-arm64": "16.2.4",
|
||||
"@next/swc-darwin-x64": "16.2.4",
|
||||
"@next/swc-linux-arm64-gnu": "16.2.4",
|
||||
"@next/swc-linux-arm64-musl": "16.2.4",
|
||||
"@next/swc-linux-x64-gnu": "16.2.4",
|
||||
"@next/swc-linux-x64-musl": "16.2.4",
|
||||
"@next/swc-win32-arm64-msvc": "16.2.4",
|
||||
"@next/swc-win32-x64-msvc": "16.2.4",
|
||||
"sharp": "^0.34.5"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.1.0",
|
||||
@ -9360,34 +9396,6 @@
|
||||
"tslib": "^2.8.0"
|
||||
}
|
||||
},
|
||||
"node_modules/next/node_modules/postcss": {
|
||||
"version": "8.4.31",
|
||||
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz",
|
||||
"integrity": "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/postcss/"
|
||||
},
|
||||
{
|
||||
"type": "tidelift",
|
||||
"url": "https://tidelift.com/funding/github/npm/postcss"
|
||||
},
|
||||
{
|
||||
"type": "github",
|
||||
"url": "https://github.com/sponsors/ai"
|
||||
}
|
||||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"nanoid": "^3.3.6",
|
||||
"picocolors": "^1.0.0",
|
||||
"source-map-js": "^1.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^10 || ^12 || >=14"
|
||||
}
|
||||
},
|
||||
"node_modules/node-domexception": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
|
||||
@ -9938,9 +9946,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/postcss": {
|
||||
"version": "8.5.6",
|
||||
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz",
|
||||
"integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==",
|
||||
"version": "8.5.13",
|
||||
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.13.tgz",
|
||||
"integrity": "sha512-qif0+jGGZoLWdHey3UFHHWP0H7Gbmsk8T5VEqyYFbWqPr1XqvLGBbk/sl8V5exGmcYJklJOhOQq1pV9IcsiFag==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "opencollective",
|
||||
@ -10838,16 +10846,16 @@
|
||||
}
|
||||
},
|
||||
"node_modules/react-copy-to-clipboard": {
|
||||
"version": "5.1.0",
|
||||
"resolved": "https://registry.npmjs.org/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.0.tgz",
|
||||
"integrity": "sha512-k61RsNgAayIJNoy9yDsYzDe/yAZAzEbEgcz3DZMhF686LEyukcE1hzurxe85JandPUG+yTfGVFzuEw3xt8WP/A==",
|
||||
"version": "5.1.1",
|
||||
"resolved": "https://registry.npmjs.org/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.1.tgz",
|
||||
"integrity": "sha512-s+HrzLyJBxrpGTYXF15dTgMjAJpEPZT/Yp6NytAtZMRngejxt6Pt5WrfFxLAcsqUDU6sY1Jz6tyHwIicE1U2Xg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"copy-to-clipboard": "^3.3.1",
|
||||
"copy-to-clipboard": "^3.3.3",
|
||||
"prop-types": "^15.8.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"react": "^15.3.0 || 16 || 17 || 18"
|
||||
"react": ">=15.3.0"
|
||||
}
|
||||
},
|
||||
"node_modules/react-day-picker": {
|
||||
@ -12374,6 +12382,12 @@
|
||||
"url": "https://github.com/sponsors/wooorm"
|
||||
}
|
||||
},
|
||||
"node_modules/ts-algebra": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz",
|
||||
"integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/ts-api-utils": {
|
||||
"version": "2.4.0",
|
||||
"resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.4.0.tgz",
|
||||
@ -12712,16 +12726,16 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/uuid": {
|
||||
"version": "11.1.0",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-11.1.0.tgz",
|
||||
"integrity": "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==",
|
||||
"version": "14.0.0",
|
||||
"resolved": "https://registry.npmjs.org/uuid/-/uuid-14.0.0.tgz",
|
||||
"integrity": "sha512-Qo+uWgilfSmAhXCMav1uYFynlQO7fMFiMVZsQqZRMIXp0O7rR7qjkj+cPvBHLgBqi960QCoo/PH2/6ZtVqKvrg==",
|
||||
"funding": [
|
||||
"https://github.com/sponsors/broofa",
|
||||
"https://github.com/sponsors/ctavan"
|
||||
],
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"uuid": "dist/esm/bin/uuid"
|
||||
"uuid": "dist-node/bin/uuid"
|
||||
}
|
||||
},
|
||||
"node_modules/vfile": {
|
||||
|
||||
@ -20,12 +20,12 @@
|
||||
"knip:fix": "knip --fix"
|
||||
},
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": "0.54.0",
|
||||
"@anthropic-ai/sdk": "0.92.0",
|
||||
"@headlessui/tailwindcss": "0.2.2",
|
||||
"@heroicons/react": "1.0.6",
|
||||
"@remixicon/react": "4.9.0",
|
||||
"@tanstack/react-pacer": "0.2.0",
|
||||
"@tanstack/react-query": "5.90.20",
|
||||
"@tanstack/react-query": "5.100.7",
|
||||
"@tanstack/react-table": "8.21.3",
|
||||
"@tremor/react": "3.18.7",
|
||||
"@types/papaparse": "5.5.2",
|
||||
@ -35,18 +35,18 @@
|
||||
"jwt-decode": "4.0.0",
|
||||
"lucide-react": "0.513.0",
|
||||
"moment": "2.30.1",
|
||||
"next": "16.1.7",
|
||||
"next": "16.2.4",
|
||||
"openai": "4.104.0",
|
||||
"papaparse": "5.5.3",
|
||||
"react": "18.3.1",
|
||||
"react-copy-to-clipboard": "5.1.0",
|
||||
"react-copy-to-clipboard": "5.1.1",
|
||||
"react-dom": "18.3.1",
|
||||
"react-json-view-lite": "2.5.0",
|
||||
"react-markdown": "9.1.0",
|
||||
"react-syntax-highlighter": "15.6.6",
|
||||
"remark-gfm": "4.0.1",
|
||||
"tailwind-merge": "3.4.0",
|
||||
"uuid": "11.1.0"
|
||||
"uuid": "14.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@playwright/test": "1.58.1",
|
||||
@ -73,7 +73,7 @@
|
||||
"eslint-plugin-unused-imports": "4.3.0",
|
||||
"jsdom": "27.4.0",
|
||||
"knip": "5.83.1",
|
||||
"postcss": "8.5.6",
|
||||
"postcss": "8.5.13",
|
||||
"prettier": "3.2.5",
|
||||
"tailwindcss": "3.4.19",
|
||||
"typescript": "5.9.3",
|
||||
@ -88,10 +88,11 @@
|
||||
"lodash": "4.18.1",
|
||||
"ws": "8.19.0",
|
||||
"braces": "3.0.3",
|
||||
"axios": "1.13.6"
|
||||
"axios": "1.13.6",
|
||||
"postcss": "8.5.13"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.17.0",
|
||||
"node": ">=20.9.0",
|
||||
"npm": ">=8.3.0"
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,51 @@
|
||||
"use client";
|
||||
|
||||
import useAuthorized from "@/app/(dashboard)/hooks/useAuthorized";
|
||||
import { useMutation, useQuery, useQueryClient, UseMutationResult, UseQueryResult } from "@tanstack/react-query";
|
||||
import { getRouterSettingsCall, setCallbacksCall } from "@/components/networking";
|
||||
import { createQueryKeys } from "../common/queryKeysFactory";
|
||||
import type { RoutingGroup } from "@/components/routing_groups/types";
|
||||
|
||||
const routingGroupsKeys = createQueryKeys("routingGroups");
|
||||
|
||||
interface RoutingGroupsQueryData {
|
||||
routingGroups: RoutingGroup[];
|
||||
routingStrategy: string | null;
|
||||
availableStrategies: string[];
|
||||
}
|
||||
|
||||
const fetchRoutingGroups = async (accessToken: string): Promise<RoutingGroupsQueryData> => {
|
||||
const data = await getRouterSettingsCall(accessToken);
|
||||
const currentValues = data?.current_values ?? {};
|
||||
const fields = Array.isArray(data?.fields) ? data.fields : [];
|
||||
const routingStrategyField = fields.find((f: any) => f?.field_name === "routing_strategy");
|
||||
|
||||
return {
|
||||
routingGroups: Array.isArray(currentValues.routing_groups) ? currentValues.routing_groups : [],
|
||||
routingStrategy: currentValues.routing_strategy ?? null,
|
||||
availableStrategies: Array.isArray(routingStrategyField?.options) ? routingStrategyField.options : [],
|
||||
};
|
||||
};
|
||||
|
||||
export const useRoutingGroups = (): UseQueryResult<RoutingGroupsQueryData> => {
|
||||
const { accessToken, userId, userRole } = useAuthorized();
|
||||
return useQuery<RoutingGroupsQueryData>({
|
||||
queryKey: routingGroupsKeys.lists(),
|
||||
queryFn: () => fetchRoutingGroups(accessToken!),
|
||||
enabled: Boolean(accessToken && userId && userRole),
|
||||
});
|
||||
};
|
||||
|
||||
export const useSaveRoutingGroups = (): UseMutationResult<unknown, Error, RoutingGroup[]> => {
|
||||
const { accessToken } = useAuthorized();
|
||||
const queryClient = useQueryClient();
|
||||
return useMutation({
|
||||
mutationFn: (routingGroups: RoutingGroup[]) =>
|
||||
setCallbacksCall(accessToken!, {
|
||||
router_settings: { routing_groups: routingGroups },
|
||||
}),
|
||||
onSuccess: () => {
|
||||
queryClient.invalidateQueries({ queryKey: routingGroupsKeys.lists() });
|
||||
},
|
||||
});
|
||||
};
|
||||
@ -24,6 +24,7 @@ import { TrashIcon, CheckCircleIcon } from "@heroicons/react/outline";
|
||||
|
||||
import RouterSettings from "./router_settings";
|
||||
import Fallbacks from "./Settings/RouterSettings/Fallbacks/Fallbacks";
|
||||
import RoutingGroups from "./routing_groups";
|
||||
interface GeneralSettingsPageProps {
|
||||
accessToken: string | null;
|
||||
userRole: string | null;
|
||||
@ -110,8 +111,9 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({ accessToken, user
|
||||
<TabGroup className="h-[75vh] w-full">
|
||||
<TabList variant="line" defaultValue="1" className="px-8 pt-4">
|
||||
<Tab value="1">Loadbalancing</Tab>
|
||||
<Tab value="2">Fallbacks</Tab>
|
||||
<Tab value="3">General</Tab>
|
||||
<Tab value="2">Routing Groups</Tab>
|
||||
<Tab value="3">Fallbacks</Tab>
|
||||
<Tab value="4">General</Tab>
|
||||
</TabList>
|
||||
<TabPanels className="px-8 py-6">
|
||||
<TabPanel>
|
||||
@ -122,6 +124,9 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({ accessToken, user
|
||||
modelData={modelData}
|
||||
/>
|
||||
</TabPanel>
|
||||
<TabPanel>
|
||||
<RoutingGroups />
|
||||
</TabPanel>
|
||||
<TabPanel>
|
||||
<Fallbacks
|
||||
accessToken={accessToken}
|
||||
|
||||
@ -0,0 +1,194 @@
|
||||
"use client";
|
||||
|
||||
import React, { useMemo } from "react";
|
||||
import { Form, Input, Modal, Select, Space, Typography } from "antd";
|
||||
import type { RoutingGroup, RoutingStrategy } from "./types";
|
||||
|
||||
const { Text, Paragraph } = Typography;
|
||||
|
||||
interface RoutingGroupModalProps {
|
||||
open: boolean;
|
||||
mode: "create" | "edit";
|
||||
initialValue: RoutingGroup | null;
|
||||
availableStrategies: string[];
|
||||
strategyDescriptions: Record<string, string>;
|
||||
modelOptions: string[];
|
||||
existingGroupNames: string[];
|
||||
onClose: () => void;
|
||||
onSubmit: (group: RoutingGroup) => Promise<void> | void;
|
||||
saving?: boolean;
|
||||
}
|
||||
|
||||
interface FormValues {
|
||||
group_name: string;
|
||||
models: string[];
|
||||
routing_strategy: RoutingStrategy | string;
|
||||
routing_strategy_args?: string;
|
||||
}
|
||||
|
||||
const STRATEGIES_WITH_ARGS = new Set<string>(["latency-based-routing", "usage-based-routing"]);
|
||||
|
||||
const GROUP_NAME_PATTERN = /^[A-Za-z0-9._-]+$/;
|
||||
const GROUP_NAME_MAX_LENGTH = 64;
|
||||
|
||||
const RoutingGroupModal: React.FC<RoutingGroupModalProps> = ({
|
||||
open,
|
||||
mode,
|
||||
initialValue,
|
||||
availableStrategies,
|
||||
strategyDescriptions,
|
||||
modelOptions,
|
||||
existingGroupNames,
|
||||
onClose,
|
||||
onSubmit,
|
||||
saving,
|
||||
}) => {
|
||||
const [form] = Form.useForm<FormValues>();
|
||||
const selectedStrategy = Form.useWatch("routing_strategy", form);
|
||||
|
||||
const initialValues: FormValues = {
|
||||
group_name: initialValue?.group_name ?? "",
|
||||
models: initialValue?.models ?? [],
|
||||
routing_strategy: initialValue?.routing_strategy ?? availableStrategies[0] ?? "simple-shuffle",
|
||||
routing_strategy_args: initialValue?.routing_strategy_args
|
||||
? JSON.stringify(initialValue.routing_strategy_args, null, 2)
|
||||
: "",
|
||||
};
|
||||
|
||||
const reservedNames = useMemo(() => {
|
||||
const others = existingGroupNames.filter((n) => n !== initialValue?.group_name);
|
||||
return new Set(others.map((n) => n.toLowerCase()));
|
||||
}, [existingGroupNames, initialValue]);
|
||||
|
||||
const handleSubmit = async () => {
|
||||
const values = await form.validateFields();
|
||||
const strategySupportsArgs = STRATEGIES_WITH_ARGS.has(String(values.routing_strategy));
|
||||
let parsedArgs: Record<string, unknown> | null = null;
|
||||
if (strategySupportsArgs && values.routing_strategy_args && values.routing_strategy_args.trim()) {
|
||||
try {
|
||||
parsedArgs = JSON.parse(values.routing_strategy_args);
|
||||
} catch {
|
||||
form.setFields([
|
||||
{
|
||||
name: "routing_strategy_args",
|
||||
errors: ["Must be valid JSON"],
|
||||
},
|
||||
]);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
await onSubmit({
|
||||
group_name: values.group_name.trim(),
|
||||
models: values.models,
|
||||
routing_strategy: values.routing_strategy,
|
||||
routing_strategy_args: parsedArgs,
|
||||
});
|
||||
};
|
||||
|
||||
return (
|
||||
<Modal
|
||||
title={mode === "create" ? "Create Routing Group" : `Edit ${initialValue?.group_name ?? ""}`}
|
||||
open={open}
|
||||
onCancel={onClose}
|
||||
onOk={handleSubmit}
|
||||
okText={mode === "create" ? "Create Group" : "Save Changes"}
|
||||
cancelText="Cancel"
|
||||
confirmLoading={saving}
|
||||
destroyOnClose
|
||||
width={560}
|
||||
>
|
||||
<Form<FormValues>
|
||||
key={mode === "edit" ? `edit-${initialValue?.group_name ?? ""}` : "create"}
|
||||
form={form}
|
||||
layout="vertical"
|
||||
preserve={false}
|
||||
initialValues={initialValues}
|
||||
>
|
||||
<Form.Item
|
||||
label="Group Name"
|
||||
name="group_name"
|
||||
rules={[
|
||||
{ required: true, message: "Group name is required" },
|
||||
{ max: GROUP_NAME_MAX_LENGTH, message: `Must be ${GROUP_NAME_MAX_LENGTH} characters or fewer` },
|
||||
{
|
||||
pattern: GROUP_NAME_PATTERN,
|
||||
message: "Only letters, numbers, dot, underscore, and dash are allowed",
|
||||
},
|
||||
{
|
||||
validator: (_, value: string) => {
|
||||
if (!value) return Promise.resolve();
|
||||
if (reservedNames.has(value.trim().toLowerCase())) {
|
||||
return Promise.reject(new Error("A group with this name already exists"));
|
||||
}
|
||||
return Promise.resolve();
|
||||
},
|
||||
},
|
||||
]}
|
||||
extra="Use this name as the model in API calls — LiteLLM routes the request to one of the group's models."
|
||||
>
|
||||
<Input placeholder="fast-chat" disabled={mode === "edit"} />
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item
|
||||
label="Models"
|
||||
name="models"
|
||||
rules={[{ required: true, message: "Select at least one model" }]}
|
||||
extra="Models from your model list that this group routes between."
|
||||
>
|
||||
<Select
|
||||
mode="multiple"
|
||||
allowClear
|
||||
placeholder="Select models"
|
||||
options={modelOptions.map((m) => ({ label: m, value: m }))}
|
||||
optionFilterProp="label"
|
||||
/>
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item
|
||||
label="Routing Strategy"
|
||||
name="routing_strategy"
|
||||
rules={[{ required: true, message: "Strategy is required" }]}
|
||||
>
|
||||
<Select
|
||||
options={availableStrategies.map((s) => ({ label: s, value: s }))}
|
||||
placeholder="Select strategy"
|
||||
/>
|
||||
</Form.Item>
|
||||
|
||||
{selectedStrategy && strategyDescriptions[selectedStrategy] && (
|
||||
<Paragraph className="text-xs text-gray-500 -mt-2 mb-4">
|
||||
{strategyDescriptions[selectedStrategy]}
|
||||
</Paragraph>
|
||||
)}
|
||||
|
||||
{STRATEGIES_WITH_ARGS.has(String(selectedStrategy)) && (
|
||||
<Form.Item
|
||||
label="Strategy Arguments (JSON)"
|
||||
name="routing_strategy_args"
|
||||
extra={
|
||||
selectedStrategy === "latency-based-routing"
|
||||
? "Example: { \"ttl\": 3600, \"lowest_latency_buffer\": 0 }"
|
||||
: "Example: { \"ttl\": 60 }"
|
||||
}
|
||||
>
|
||||
<Input.TextArea
|
||||
rows={4}
|
||||
placeholder='{ "ttl": 3600 }'
|
||||
className="font-mono text-xs"
|
||||
/>
|
||||
</Form.Item>
|
||||
)}
|
||||
|
||||
<Space direction="vertical" className="w-full mt-2">
|
||||
<Text type="secondary" className="text-xs">
|
||||
Models not claimed by an explicit group fall through to the proxy's top-level routing
|
||||
strategy.
|
||||
</Text>
|
||||
</Space>
|
||||
</Form>
|
||||
</Modal>
|
||||
);
|
||||
};
|
||||
|
||||
export default RoutingGroupModal;
|
||||
@ -0,0 +1,241 @@
|
||||
"use client";
|
||||
|
||||
import React, { useState } from "react";
|
||||
import { Flex, Table, Tabs, Tag, Tooltip, Typography, Button } from "antd";
|
||||
import type { ColumnsType } from "antd/es/table";
|
||||
import { BranchesOutlined, DeleteOutlined, EditOutlined, CodeOutlined } from "@ant-design/icons";
|
||||
import type { RoutingGroup } from "./types";
|
||||
|
||||
const { Text, Paragraph } = Typography;
|
||||
|
||||
interface RoutingGroupsTableProps {
|
||||
groups: RoutingGroup[];
|
||||
loading?: boolean;
|
||||
onEdit: (group: RoutingGroup) => void;
|
||||
onDelete: (group: RoutingGroup) => void;
|
||||
proxyBaseUrl?: string;
|
||||
}
|
||||
|
||||
const formatStrategyLabel = (strategy: string): string => {
|
||||
switch (strategy) {
|
||||
case "simple-shuffle":
|
||||
return "Simple Shuffle";
|
||||
case "least-busy":
|
||||
return "Least Busy";
|
||||
case "usage-based-routing":
|
||||
return "Usage Based";
|
||||
case "latency-based-routing":
|
||||
return "Latency Based";
|
||||
default:
|
||||
return strategy;
|
||||
}
|
||||
};
|
||||
|
||||
const resolveBaseUrl = (proxyBaseUrl?: string): string => {
|
||||
if (proxyBaseUrl && proxyBaseUrl.trim()) return proxyBaseUrl;
|
||||
if (typeof window !== "undefined" && window.location?.origin) return window.location.origin;
|
||||
return "<your_proxy_base_url>";
|
||||
};
|
||||
|
||||
const exampleModel = (group: RoutingGroup): string => group.models[0] ?? "<your-model>";
|
||||
|
||||
const buildCurlSnippet = (group: RoutingGroup, baseUrl: string): string =>
|
||||
`curl -X POST '${baseUrl}/v1/chat/completions' \\
|
||||
-H 'Content-Type: application/json' \\
|
||||
-H 'Authorization: Bearer $LITELLM_API_KEY' \\
|
||||
-d '{
|
||||
"model": "${exampleModel(group)}",
|
||||
"messages": [{"role": "user", "content": "Hello!"}]
|
||||
}'`;
|
||||
|
||||
const buildPythonSnippet = (group: RoutingGroup, baseUrl: string): string =>
|
||||
`from openai import OpenAI
|
||||
|
||||
client = OpenAI(
|
||||
api_key="$LITELLM_API_KEY",
|
||||
base_url="${baseUrl}",
|
||||
)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="${exampleModel(group)}",
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
)
|
||||
|
||||
print(response)`;
|
||||
|
||||
const buildJsSnippet = (group: RoutingGroup, baseUrl: string): string =>
|
||||
`import OpenAI from "openai";
|
||||
|
||||
const client = new OpenAI({
|
||||
apiKey: process.env.LITELLM_API_KEY,
|
||||
baseURL: "${baseUrl}",
|
||||
});
|
||||
|
||||
const response = await client.chat.completions.create({
|
||||
model: "${exampleModel(group)}",
|
||||
messages: [{ role: "user", content: "Hello!" }],
|
||||
});
|
||||
|
||||
console.log(response);`;
|
||||
|
||||
interface RoutingGroupSnippetProps {
|
||||
group: RoutingGroup;
|
||||
baseUrl: string;
|
||||
}
|
||||
|
||||
const SNIPPET_BLOCK_STYLE: React.CSSProperties = {
|
||||
backgroundColor: "#111827",
|
||||
color: "#f3f4f6",
|
||||
borderRadius: 6,
|
||||
padding: 16,
|
||||
fontSize: 12,
|
||||
whiteSpace: "pre",
|
||||
overflowX: "auto",
|
||||
};
|
||||
|
||||
const RoutingGroupSnippet: React.FC<RoutingGroupSnippetProps> = ({ group, baseUrl }) => {
|
||||
const snippets = {
|
||||
curl: buildCurlSnippet(group, baseUrl),
|
||||
python: buildPythonSnippet(group, baseUrl),
|
||||
javascript: buildJsSnippet(group, baseUrl),
|
||||
} as const;
|
||||
type SnippetKey = keyof typeof snippets;
|
||||
const [activeKey, setActiveKey] = useState<SnippetKey>("curl");
|
||||
|
||||
const items = [
|
||||
{ key: "curl", label: "cURL" },
|
||||
{ key: "python", label: "Python (OpenAI SDK)" },
|
||||
{ key: "javascript", label: "JavaScript (OpenAI SDK)" },
|
||||
].map(({ key, label }) => ({
|
||||
key,
|
||||
label,
|
||||
children: (
|
||||
<Paragraph code className="!mb-0" style={SNIPPET_BLOCK_STYLE}>
|
||||
{snippets[key as SnippetKey]}
|
||||
</Paragraph>
|
||||
),
|
||||
}));
|
||||
|
||||
return (
|
||||
<Tabs
|
||||
size="small"
|
||||
activeKey={activeKey}
|
||||
onChange={(k) => setActiveKey(k as SnippetKey)}
|
||||
items={items}
|
||||
tabBarExtraContent={
|
||||
<Paragraph
|
||||
copyable={{ text: snippets[activeKey], tooltips: ["Copy", "Copied"] }}
|
||||
className="!mb-0"
|
||||
/>
|
||||
}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
||||
const RoutingGroupsTable: React.FC<RoutingGroupsTableProps> = ({
|
||||
groups,
|
||||
loading,
|
||||
onEdit,
|
||||
onDelete,
|
||||
proxyBaseUrl,
|
||||
}) => {
|
||||
const [expandedRowKeys, setExpandedRowKeys] = useState<React.Key[]>([]);
|
||||
const baseUrl = resolveBaseUrl(proxyBaseUrl);
|
||||
|
||||
const columns: ColumnsType<RoutingGroup> = [
|
||||
{
|
||||
title: "GROUP NAME",
|
||||
dataIndex: "group_name",
|
||||
key: "group_name",
|
||||
render: (name: string) => (
|
||||
<Text strong className="text-blue-600">
|
||||
{name}
|
||||
</Text>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "MODELS",
|
||||
dataIndex: "models",
|
||||
key: "models",
|
||||
render: (models: string[]) => (
|
||||
<Flex wrap="wrap" gap={4}>
|
||||
{models.map((m) => (
|
||||
<Tag key={m}>{m}</Tag>
|
||||
))}
|
||||
</Flex>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "STRATEGY",
|
||||
dataIndex: "routing_strategy",
|
||||
key: "routing_strategy",
|
||||
render: (strategy: string) => (
|
||||
<span className="inline-flex items-center gap-1.5">
|
||||
<BranchesOutlined className="text-gray-400" />
|
||||
<Text>{formatStrategyLabel(strategy)}</Text>
|
||||
</span>
|
||||
),
|
||||
},
|
||||
{
|
||||
title: "ACTIONS",
|
||||
key: "actions",
|
||||
width: 120,
|
||||
align: "right",
|
||||
render: (_, group) => (
|
||||
<Flex justify="flex-end" align="center" gap={8}>
|
||||
<Tooltip title="Edit">
|
||||
<Button
|
||||
type="text"
|
||||
icon={<EditOutlined />}
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
onEdit(group);
|
||||
}}
|
||||
/>
|
||||
</Tooltip>
|
||||
<Tooltip title="Delete">
|
||||
<Button
|
||||
type="text"
|
||||
danger
|
||||
icon={<DeleteOutlined />}
|
||||
onClick={(e) => {
|
||||
e.stopPropagation();
|
||||
onDelete(group);
|
||||
}}
|
||||
/>
|
||||
</Tooltip>
|
||||
</Flex>
|
||||
),
|
||||
},
|
||||
];
|
||||
|
||||
return (
|
||||
<Table<RoutingGroup>
|
||||
rowKey="group_name"
|
||||
columns={columns}
|
||||
dataSource={groups}
|
||||
loading={loading}
|
||||
pagination={false}
|
||||
expandable={{
|
||||
expandedRowKeys,
|
||||
onExpandedRowsChange: (keys) => setExpandedRowKeys([...keys]),
|
||||
expandedRowRender: (group) => (
|
||||
<div className="bg-gray-50 border border-gray-200 rounded-md p-4 my-2">
|
||||
<Flex align="center" gap={8} className="mb-2">
|
||||
<CodeOutlined className="text-blue-500" />
|
||||
<Text strong>How routing works for this group</Text>
|
||||
</Flex>
|
||||
<Paragraph className="text-sm text-gray-600 mb-3">
|
||||
Callers request any model in the group by name — LiteLLM picks a deployment behind the
|
||||
scenes using the{" "}
|
||||
<Text strong>{formatStrategyLabel(group.routing_strategy)}</Text> strategy.
|
||||
</Paragraph>
|
||||
<RoutingGroupSnippet group={group} baseUrl={baseUrl} />
|
||||
</div>
|
||||
),
|
||||
}}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
||||
export default RoutingGroupsTable;
|
||||
177
ui/litellm-dashboard/src/components/routing_groups/index.tsx
Normal file
177
ui/litellm-dashboard/src/components/routing_groups/index.tsx
Normal file
@ -0,0 +1,177 @@
|
||||
"use client";
|
||||
|
||||
import React, { useMemo, useState } from "react";
|
||||
import { Button, Card, Flex, Input, Modal, Space, Typography } from "antd";
|
||||
import { PlusOutlined, ReloadOutlined, SearchOutlined } from "@ant-design/icons";
|
||||
import { useRoutingGroups, useSaveRoutingGroups } from "@/app/(dashboard)/hooks/routingGroups/useRoutingGroups";
|
||||
import { useRouterFields } from "@/app/(dashboard)/hooks/router/useRouterFields";
|
||||
import { useModelHub } from "@/app/(dashboard)/hooks/models/useModels";
|
||||
import useProxySettings from "@/app/(dashboard)/hooks/proxySettings/useProxySettings";
|
||||
import RoutingGroupsTable from "./RoutingGroupsTable";
|
||||
import RoutingGroupModal from "./RoutingGroupModal";
|
||||
import NotificationsManager from "../molecules/notifications_manager";
|
||||
import type { RoutingGroup } from "./types";
|
||||
|
||||
const { Text } = Typography;
|
||||
|
||||
const RoutingGroups: React.FC = () => {
|
||||
const { data, isLoading, refetch, isFetching } = useRoutingGroups();
|
||||
const { data: routerFields } = useRouterFields();
|
||||
const { data: modelHub } = useModelHub();
|
||||
const proxySettings = useProxySettings();
|
||||
const saveMutation = useSaveRoutingGroups();
|
||||
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
const [drawerOpen, setDrawerOpen] = useState(false);
|
||||
const [drawerMode, setDrawerMode] = useState<"create" | "edit">("create");
|
||||
const [editingGroup, setEditingGroup] = useState<RoutingGroup | null>(null);
|
||||
const [deletingGroup, setDeletingGroup] = useState<RoutingGroup | null>(null);
|
||||
|
||||
const groups = data?.routingGroups ?? [];
|
||||
|
||||
const filteredGroups = useMemo(() => {
|
||||
const q = searchQuery.trim().toLowerCase();
|
||||
if (!q) return groups;
|
||||
return groups.filter(
|
||||
(g) =>
|
||||
g.group_name.toLowerCase().includes(q) ||
|
||||
g.routing_strategy.toLowerCase().includes(q) ||
|
||||
g.models.some((m) => m.toLowerCase().includes(q)),
|
||||
);
|
||||
}, [groups, searchQuery]);
|
||||
|
||||
const availableStrategies = useMemo(() => {
|
||||
if (data?.availableStrategies?.length) return data.availableStrategies;
|
||||
const fromFields = routerFields?.fields?.find((f) => f.field_name === "routing_strategy")?.options;
|
||||
return fromFields ?? [];
|
||||
}, [data?.availableStrategies, routerFields]);
|
||||
|
||||
const strategyDescriptions = routerFields?.routing_strategy_descriptions ?? {};
|
||||
|
||||
const modelOptions = useMemo<string[]>(() => {
|
||||
const records = (modelHub?.data ?? []) as Array<{ model_group?: string }>;
|
||||
const names = records.map((r) => r.model_group).filter((n): n is string => Boolean(n));
|
||||
return Array.from(new Set(names));
|
||||
}, [modelHub]);
|
||||
|
||||
const openCreate = () => {
|
||||
setDrawerMode("create");
|
||||
setEditingGroup(null);
|
||||
setDrawerOpen(true);
|
||||
};
|
||||
|
||||
const openEdit = (group: RoutingGroup) => {
|
||||
setDrawerMode("edit");
|
||||
setEditingGroup(group);
|
||||
setDrawerOpen(true);
|
||||
};
|
||||
|
||||
const handleSubmit = async (incoming: RoutingGroup) => {
|
||||
const next: RoutingGroup[] =
|
||||
drawerMode === "create"
|
||||
? [...groups, incoming]
|
||||
: groups.map((g) => (g.group_name === editingGroup?.group_name ? incoming : g));
|
||||
|
||||
try {
|
||||
await saveMutation.mutateAsync(next);
|
||||
NotificationsManager.success(
|
||||
drawerMode === "create"
|
||||
? `Created routing group "${incoming.group_name}"`
|
||||
: `Updated routing group "${incoming.group_name}"`,
|
||||
);
|
||||
setDrawerOpen(false);
|
||||
} catch (err) {
|
||||
NotificationsManager.error(
|
||||
err instanceof Error ? err.message : "Failed to save routing group",
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
const confirmDelete = async () => {
|
||||
if (!deletingGroup) return;
|
||||
const next = groups.filter((g) => g.group_name !== deletingGroup.group_name);
|
||||
try {
|
||||
await saveMutation.mutateAsync(next);
|
||||
NotificationsManager.success(`Deleted routing group "${deletingGroup.group_name}"`);
|
||||
setDeletingGroup(null);
|
||||
} catch (err) {
|
||||
NotificationsManager.error(
|
||||
err instanceof Error ? err.message : "Failed to delete routing group",
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<Space direction="vertical" size={16} className="w-full">
|
||||
<Card bodyStyle={{ padding: 16 }}>
|
||||
<Flex justify="space-between" align="center" gap={12} className="mb-4">
|
||||
<Input
|
||||
allowClear
|
||||
prefix={<SearchOutlined className="text-gray-400" />}
|
||||
placeholder="Search groups..."
|
||||
value={searchQuery}
|
||||
onChange={(e) => setSearchQuery(e.target.value)}
|
||||
className="max-w-sm"
|
||||
/>
|
||||
<Flex align="center" gap={12}>
|
||||
<Button
|
||||
icon={<ReloadOutlined />}
|
||||
onClick={() => refetch()}
|
||||
loading={isFetching && !isLoading}
|
||||
>
|
||||
Refresh
|
||||
</Button>
|
||||
<Button type="primary" icon={<PlusOutlined />} onClick={openCreate}>
|
||||
Create Group
|
||||
</Button>
|
||||
<Text type="secondary" className="text-sm whitespace-nowrap">
|
||||
Showing {filteredGroups.length} {filteredGroups.length === 1 ? "result" : "results"}
|
||||
</Text>
|
||||
</Flex>
|
||||
</Flex>
|
||||
|
||||
<RoutingGroupsTable
|
||||
groups={filteredGroups}
|
||||
loading={isLoading}
|
||||
onEdit={openEdit}
|
||||
onDelete={(g) => setDeletingGroup(g)}
|
||||
proxyBaseUrl={
|
||||
proxySettings.LITELLM_UI_API_DOC_BASE_URL?.trim() ||
|
||||
proxySettings.PROXY_BASE_URL ||
|
||||
""
|
||||
}
|
||||
/>
|
||||
</Card>
|
||||
|
||||
<RoutingGroupModal
|
||||
open={drawerOpen}
|
||||
mode={drawerMode}
|
||||
initialValue={editingGroup}
|
||||
availableStrategies={availableStrategies}
|
||||
strategyDescriptions={strategyDescriptions}
|
||||
modelOptions={modelOptions}
|
||||
existingGroupNames={groups.map((g) => g.group_name)}
|
||||
onClose={() => setDrawerOpen(false)}
|
||||
onSubmit={handleSubmit}
|
||||
saving={saveMutation.isPending}
|
||||
/>
|
||||
|
||||
<Modal
|
||||
open={Boolean(deletingGroup)}
|
||||
title="Delete routing group?"
|
||||
okText="Delete"
|
||||
okButtonProps={{ danger: true, loading: saveMutation.isPending }}
|
||||
cancelText="Cancel"
|
||||
onOk={confirmDelete}
|
||||
onCancel={() => setDeletingGroup(null)}
|
||||
>
|
||||
<Text>
|
||||
Models in <Text strong>{deletingGroup?.group_name}</Text> will fall back to the proxy's
|
||||
top-level routing strategy. This cannot be undone.
|
||||
</Text>
|
||||
</Modal>
|
||||
</Space>
|
||||
);
|
||||
};
|
||||
|
||||
export default RoutingGroups;
|
||||
12
ui/litellm-dashboard/src/components/routing_groups/types.ts
Normal file
12
ui/litellm-dashboard/src/components/routing_groups/types.ts
Normal file
@ -0,0 +1,12 @@
|
||||
export type RoutingStrategy =
|
||||
| "simple-shuffle"
|
||||
| "least-busy"
|
||||
| "usage-based-routing"
|
||||
| "latency-based-routing";
|
||||
|
||||
export interface RoutingGroup {
|
||||
group_name: string;
|
||||
models: string[];
|
||||
routing_strategy: RoutingStrategy | string;
|
||||
routing_strategy_args?: Record<string, unknown> | null;
|
||||
}
|
||||
@ -3,6 +3,91 @@ import { cleanup } from "@testing-library/react";
|
||||
import React from "react";
|
||||
import { afterEach, vi } from "vitest";
|
||||
|
||||
const ensureTestLocalStorage = () => {
|
||||
if (typeof window === "undefined" || typeof window.Storage === "undefined") {
|
||||
return;
|
||||
}
|
||||
|
||||
if (typeof window.localStorage?.getItem === "function" && typeof window.localStorage?.clear === "function") {
|
||||
return;
|
||||
}
|
||||
|
||||
const storageStores = new WeakMap<Storage, Map<string, string>>();
|
||||
const storagePrototype = window.Storage.prototype;
|
||||
const getStore = (storage: Storage) => {
|
||||
let store = storageStores.get(storage);
|
||||
if (store === undefined) {
|
||||
store = new Map<string, string>();
|
||||
storageStores.set(storage, store);
|
||||
}
|
||||
return store;
|
||||
};
|
||||
|
||||
Object.defineProperties(storagePrototype, {
|
||||
getItem: {
|
||||
configurable: true,
|
||||
writable: true,
|
||||
value(this: Storage, key: string) {
|
||||
const store = getStore(this);
|
||||
const normalizedKey = String(key);
|
||||
return store.has(normalizedKey) ? store.get(normalizedKey)! : null;
|
||||
},
|
||||
},
|
||||
setItem: {
|
||||
configurable: true,
|
||||
writable: true,
|
||||
value(this: Storage, key: string, value: string) {
|
||||
const store = getStore(this);
|
||||
store.set(String(key), String(value));
|
||||
},
|
||||
},
|
||||
removeItem: {
|
||||
configurable: true,
|
||||
writable: true,
|
||||
value(this: Storage, key: string) {
|
||||
const store = getStore(this);
|
||||
store.delete(String(key));
|
||||
},
|
||||
},
|
||||
clear: {
|
||||
configurable: true,
|
||||
writable: true,
|
||||
value(this: Storage) {
|
||||
const store = getStore(this);
|
||||
store.clear();
|
||||
},
|
||||
},
|
||||
key: {
|
||||
configurable: true,
|
||||
writable: true,
|
||||
value(this: Storage, index: number) {
|
||||
const store = getStore(this);
|
||||
return Array.from(store.keys())[index] ?? null;
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const localStorage = Object.create(storagePrototype);
|
||||
storageStores.set(localStorage, new Map<string, string>());
|
||||
Object.defineProperty(localStorage, "length", {
|
||||
configurable: true,
|
||||
get() {
|
||||
return getStore(localStorage).size;
|
||||
},
|
||||
});
|
||||
|
||||
Object.defineProperty(window, "localStorage", {
|
||||
configurable: true,
|
||||
value: localStorage,
|
||||
});
|
||||
Object.defineProperty(globalThis, "localStorage", {
|
||||
configurable: true,
|
||||
value: localStorage,
|
||||
});
|
||||
};
|
||||
|
||||
ensureTestLocalStorage();
|
||||
|
||||
// Global mock for NotificationManager to prevent React rendering issues in tests
|
||||
// This avoids "window is not defined" errors when notifications try to render
|
||||
// after test environment is torn down
|
||||
@ -31,7 +116,15 @@ vi.mock("@tremor/react", async (importOriginal) => {
|
||||
return React.createElement(React.Fragment, null, children);
|
||||
},
|
||||
// Render as a plain checkbox so toggle interactions are testable without Tremor internals
|
||||
Switch: ({ checked, onChange, className }: { checked?: boolean; onChange?: (v: boolean) => void; className?: string }) =>
|
||||
Switch: ({
|
||||
checked,
|
||||
onChange,
|
||||
className,
|
||||
}: {
|
||||
checked?: boolean;
|
||||
onChange?: (v: boolean) => void;
|
||||
className?: string;
|
||||
}) =>
|
||||
React.createElement("input", {
|
||||
type: "checkbox",
|
||||
role: "switch",
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
"moduleResolution": "bundler",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "preserve",
|
||||
"jsx": "react-jsx",
|
||||
"incremental": true,
|
||||
"plugins": [
|
||||
{
|
||||
|
||||
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user