Merge remote-tracking branch 'upstream/litellm_internal_staging' into codex/skills-containers-tenant-guard

# Conflicts:
#	litellm/proxy/auth/auth_utils.py
This commit is contained in:
user 2026-05-05 01:41:25 +00:00
commit 3dcb6bd3f9
No known key found for this signature in database
80 changed files with 6747 additions and 1183 deletions

View File

@ -1 +1 @@
litellm==1.83.5
litellm==1.83.14

View File

@ -11,6 +11,10 @@ from typing import Literal
import litellm
from litellm.caching.caching import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails._content_utils import (
is_text_content_call_type,
iter_message_text,
)
from litellm.integrations.custom_logger import CustomLogger
from litellm._logging import verbose_proxy_logger
from fastapi import HTTPException
@ -73,10 +77,9 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
- check if user id part of blocked list
"""
self.print_verbose("Inside Banned Keyword List Pre-Call Hook")
if call_type == "completion" and "messages" in data:
for m in data["messages"]:
if "content" in m and isinstance(m["content"], str):
self.test_violation(test_str=m["content"])
if is_text_content_call_type(call_type):
for text in iter_message_text(data):
self.test_violation(test_str=text)
except HTTPException as e:
raise e
@ -93,11 +96,16 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
user_api_key_dict: UserAPIKeyAuth,
response,
):
if isinstance(response, litellm.ModelResponse) and isinstance(
response.choices[0], litellm.utils.Choices
):
for word in self.banned_keywords_list:
self.test_violation(test_str=response.choices[0].message.content or "")
if not isinstance(response, litellm.ModelResponse):
return
for choice in response.choices:
if not isinstance(choice, litellm.utils.Choices):
continue
message = getattr(choice, "message", None)
content = getattr(message, "content", None)
if isinstance(content, str):
self.test_violation(test_str=content)
async def async_post_call_streaming_hook(
self,

View File

@ -12,6 +12,7 @@ import litellm
from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails._content_utils import iter_message_text
from litellm.types.utils import CallTypesLiteral
@ -94,11 +95,9 @@ class _ENTERPRISE_GoogleTextModeration(CustomLogger):
- Calls Google's Text Moderation API
- Rejects request if it fails safety check
"""
if "messages" in data and isinstance(data["messages"], list):
text = ""
for m in data["messages"]: # assume messages is a list
if "content" in m and isinstance(m["content"], str):
text += m["content"]
# Covers multimodal list content + Responses-API input.
text = "".join(iter_message_text(data))
if text:
document = self.language_document(content=text, type_=self.document_type)
request = self.moderate_text_request(

View File

@ -19,6 +19,7 @@ import litellm
from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails._content_utils import iter_message_text
from litellm.types.utils import CallTypesLiteral
@ -37,11 +38,8 @@ class _ENTERPRISE_OpenAI_Moderation(CustomLogger):
user_api_key_dict: UserAPIKeyAuth,
call_type: CallTypesLiteral,
):
text = ""
if "messages" in data and isinstance(data["messages"], list):
for m in data["messages"]: # assume messages is a list
if "content" in m and isinstance(m["content"], str):
text += m["content"]
# Covers multimodal list content + Responses-API input.
text = "".join(iter_message_text(data))
from litellm.proxy.proxy_server import llm_router

View File

@ -18,6 +18,7 @@ from litellm._logging import verbose_proxy_logger
from litellm.caching.caching import DualCache
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails._content_utils import walk_user_text
GUARDRAIL_NAME = "hide_secrets"
@ -473,23 +474,19 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
if await self.should_run_check(user_api_key_dict) is False:
return
if "messages" in data and isinstance(data["messages"], list):
for message in data["messages"]:
if "content" in message and isinstance(message["content"], str):
detected_secrets = self.scan_message_for_secrets(message["content"])
# Covers multimodal list content + Responses-API input.
def _redact_message_text(text: str) -> str:
detected_secrets = self.scan_message_for_secrets(text)
for secret in detected_secrets:
text = text.replace(secret["value"], "[REDACTED]")
if detected_secrets:
secret_types = [secret["type"] for secret in detected_secrets]
verbose_proxy_logger.warning(
f"Detected and redacted secrets in message: {secret_types}"
)
return text
for secret in detected_secrets:
message["content"] = message["content"].replace(
secret["value"], "[REDACTED]"
)
if len(detected_secrets) > 0:
secret_types = [secret["type"] for secret in detected_secrets]
verbose_proxy_logger.warning(
f"Detected and redacted secrets in message: {secret_types}"
)
else:
verbose_proxy_logger.debug("No secrets detected on input.")
walk_user_text(data, _redact_message_text)
if "prompt" in data:
if isinstance(data["prompt"], str):
@ -504,11 +501,15 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
f"Detected and redacted secrets in prompt: {secret_types}"
)
elif isinstance(data["prompt"], list):
for item in data["prompt"]:
# Index back into the list — assigning to ``item`` would only
# rebind the loop variable and leave ``data["prompt"]``
# carrying the unredacted secret.
for idx, item in enumerate(data["prompt"]):
if isinstance(item, str):
detected_secrets = self.scan_message_for_secrets(item)
for secret in detected_secrets:
item = item.replace(secret["value"], "[REDACTED]")
data["prompt"][idx] = item
if len(detected_secrets) > 0:
secret_types = [
secret["type"] for secret in detected_secrets
@ -517,31 +518,6 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
f"Detected and redacted secrets in prompt: {secret_types}"
)
if "input" in data:
if isinstance(data["input"], str):
detected_secrets = self.scan_message_for_secrets(data["input"])
for secret in detected_secrets:
data["input"] = data["input"].replace(secret["value"], "[REDACTED]")
if len(detected_secrets) > 0:
secret_types = [secret["type"] for secret in detected_secrets]
verbose_proxy_logger.warning(
f"Detected and redacted secrets in input: {secret_types}"
)
elif isinstance(data["input"], list):
_input_in_request = data["input"]
for idx, item in enumerate(_input_in_request):
if isinstance(item, str):
detected_secrets = self.scan_message_for_secrets(item)
for secret in detected_secrets:
_input_in_request[idx] = item.replace(
secret["value"], "[REDACTED]"
)
if len(detected_secrets) > 0:
secret_types = [
secret["type"] for secret in detected_secrets
]
verbose_proxy_logger.warning(
f"Detected and redacted secrets in input: {secret_types}"
)
verbose_proxy_logger.debug("Data after redacting input %s", data)
# ``data["input"]`` (Responses API and embeddings/moderation) is
# already covered by ``walk_user_text`` above.
return

View File

@ -16,7 +16,7 @@ Repository = "https://github.com/BerriAI/litellm"
Documentation = "https://docs.litellm.ai"
[build-system]
requires = ["uv_build==0.10.7"]
requires = ["uv_build==0.11.8"]
build-backend = "uv_build"
[tool.uv]

2054
litellm-js/proxy/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -4,11 +4,11 @@
"deploy": "wrangler deploy --minify src/index.ts"
},
"dependencies": {
"hono": "4.12.12",
"hono": "4.12.16",
"openai": "4.29.2"
},
"devDependencies": {
"@cloudflare/workers-types": "4.20240208.0",
"wrangler": "3.32.0"
"@cloudflare/workers-types": "4.20260501.1",
"wrangler": "4.87.0"
}
}

View File

@ -6,7 +6,7 @@
"": {
"dependencies": {
"@hono/node-server": "1.19.13",
"hono": "4.12.12"
"hono": "4.12.16"
},
"devDependencies": {
"@types/node": "20.19.25",
@ -548,9 +548,9 @@
}
},
"node_modules/hono": {
"version": "4.12.12",
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.12.tgz",
"integrity": "sha512-p1JfQMKaceuCbpJKAPKVqyqviZdS0eUxH9v82oWo1kb9xjQ5wA6iP3FNVAPDFlz5/p7d45lO+BpSk1tuSZMF4Q==",
"version": "4.12.16",
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.16.tgz",
"integrity": "sha512-jN0ZewiNAWSe5khM3EyCmBb250+b40wWbwNILNfEvq84VREWwOIkuUsFONk/3i3nqkz7Oe1PcpM2mwQEK2L9Kg==",
"license": "MIT",
"engines": {
"node": ">=16.9.0"

View File

@ -4,7 +4,7 @@
},
"dependencies": {
"@hono/node-server": "1.19.13",
"hono": "4.12.12"
"hono": "4.12.16"
},
"devDependencies": {
"@types/node": "20.19.25",

View File

@ -16,7 +16,7 @@ Repository = "https://github.com/BerriAI/litellm"
Documentation = "https://docs.litellm.ai"
[build-system]
requires = ["uv_build==0.10.7"]
requires = ["uv_build==0.11.8"]
build-backend = "uv_build"
[tool.uv]

View File

@ -166,7 +166,7 @@ langfuse_default_tags: Optional[List[str]] = None
langsmith_batch_size: Optional[int] = None
prometheus_initialize_budget_metrics: Optional[bool] = False
prometheus_latency_buckets: Optional[List[float]] = None
require_auth_for_metrics_endpoint: Optional[bool] = False
require_auth_for_metrics_endpoint: Optional[bool] = True
argilla_batch_size: Optional[int] = None
datadog_use_v1: Optional[bool] = False # if you want to use v1 datadog logged payload.
gcs_pub_sub_use_v1: Optional[bool] = (

View File

@ -5,7 +5,8 @@ Fetches prompt versions from Arize Phoenix and provides workspace-based access c
from typing import Any, Dict, List, Optional, Tuple, Union
from jinja2 import DictLoader, Environment, select_autoescape
from jinja2 import DictLoader, select_autoescape
from jinja2.sandbox import ImmutableSandboxedEnvironment
from litellm.integrations.custom_prompt_management import CustomPromptManagement
from litellm.integrations.prompt_management_base import (
@ -74,7 +75,13 @@ class ArizePhoenixTemplateManager:
api_key=self.api_key, api_base=self.api_base
)
self.jinja_env = Environment(
# Templates fetched from Arize Phoenix come from external workspace
# users; in a plain `Environment()` a malicious template could reach
# `__class__.__init__.__globals__` and execute arbitrary code on the
# proxy host. The sandbox blocks that attribute traversal while
# leaving normal `{{ var }}` substitution intact. Matches the
# dotprompt manager's hardening.
self.jinja_env = ImmutableSandboxedEnvironment(
loader=DictLoader({}),
autoescape=select_autoescape(["html", "xml"]),
# Use Mustache/Handlebars-style delimiters

View File

@ -5,7 +5,8 @@ Fetches .prompt files from BitBucket repositories and provides team-based access
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
from jinja2 import DictLoader, Environment, select_autoescape
from jinja2 import DictLoader, select_autoescape
from jinja2.sandbox import ImmutableSandboxedEnvironment
from litellm.integrations.custom_prompt_management import CustomPromptManagement
@ -74,7 +75,13 @@ class BitBucketTemplateManager:
self.prompts: Dict[str, BitBucketPromptTemplate] = {}
self.bitbucket_client = BitBucketClient(bitbucket_config)
self.jinja_env = Environment(
# Templates fetched from a BitBucket repo are not trustworthy:
# anyone with repo write access can ship Jinja syntax that, in a
# plain `Environment()`, would reach `__class__.__init__.__globals__`
# and pivot into RCE on the proxy host. The sandbox blocks that
# attribute traversal while leaving normal `{{ var }}` substitution
# intact. Matches the dotprompt manager's hardening.
self.jinja_env = ImmutableSandboxedEnvironment(
loader=DictLoader({}),
autoescape=select_autoescape(["html", "xml"]),
# Use Handlebars-style delimiters to match Dotprompt spec

View File

@ -4,7 +4,8 @@ GitLab prompt manager with configurable prompts folder.
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
from jinja2 import DictLoader, Environment, select_autoescape
from jinja2 import DictLoader, select_autoescape
from jinja2.sandbox import ImmutableSandboxedEnvironment
from litellm.integrations.custom_prompt_management import CustomPromptManagement
@ -90,7 +91,13 @@ class GitLabTemplateManager:
or ""
).strip("/")
self.jinja_env = Environment(
# Templates fetched from a GitLab repo are not trustworthy:
# anyone with repo write access can ship Jinja syntax that, in a
# plain `Environment()`, would reach `__class__.__init__.__globals__`
# and pivot into RCE on the proxy host. The sandbox blocks that
# attribute traversal while leaving normal `{{ var }}` substitution
# intact. Matches the dotprompt manager's hardening.
self.jinja_env = ImmutableSandboxedEnvironment(
loader=DictLoader({}),
autoescape=select_autoescape(["html", "xml"]),
variable_start_string="{{",

View File

@ -617,13 +617,12 @@ class LiteLLMRoutes(enum.Enum):
"/",
"/health/liveliness",
"/health/liveness",
"/health/readiness",
"/test",
"/config/yaml",
"/metrics",
"/litellm/.well-known/litellm-ui-config",
"/.well-known/litellm-ui-config",
"/public/model_hub",
"/public/model_hub/info",
"/public/agent_hub",
"/public/mcp_hub",
"/public/skill_hub",

View File

@ -216,20 +216,15 @@ _EXTRA_BANNED_OBSERVABILITY_PARAMS: FrozenSet[str] = frozenset(
def _build_banned_observability_params() -> FrozenSet[str]:
"""Derive the observability ban list from the canonical allowlist.
``_supported_callback_params`` in
``_supported_callback_params`` and ``_request_blocked_callback_params`` in
``litellm/litellm_core_utils/initialize_dynamic_callback_params.py`` is
the single place that enumerates every observability field
integrations resolve from kwargs/metadata. Subtract the small set of
informational fields (``_SAFE_CLIENT_CALLBACK_PARAMS``) and union with
the extras the canonical allowlist hasn't caught up to yet. New
integrations added to the canonical allowlist are banned by default,
which is the safe failure mode.
``_request_blocked_callback_params`` (e.g. ``gcs_bucket_name``,
``gcs_path_service_account``) is the GCS-logging-specific deny list
that lives alongside the allowlist; fold it in here so a single
declaration of "this field must not be caller-supplied" covers both
the request-body bouncer and the dynamic callback initializer.
the single place that enumerates every observability field integrations
resolve from kwargs/metadata, plus fields that integration code explicitly
blocks from request-supplied callback params. Subtract the small set of
informational fields (``_SAFE_CLIENT_CALLBACK_PARAMS``) and union with the
extras the canonical allowlist hasn't caught up to yet. New integrations
added to the canonical allowlist are banned by default, which is the safe
failure mode.
"""
from litellm.litellm_core_utils.initialize_dynamic_callback_params import (
_request_blocked_callback_params,
@ -238,8 +233,8 @@ def _build_banned_observability_params() -> FrozenSet[str]:
return (
(frozenset(_supported_callback_params) - _SAFE_CLIENT_CALLBACK_PARAMS)
| _EXTRA_BANNED_OBSERVABILITY_PARAMS
| frozenset(_request_blocked_callback_params)
| _EXTRA_BANNED_OBSERVABILITY_PARAMS
)

View File

@ -87,6 +87,23 @@ except ImportError as e:
user_api_key_service_logger_obj = ServiceLogging() # used for tracking latency on OTEL
def _normalize_public_auth_route(route: str) -> str:
if route != "/" and route.endswith("/"):
return route.rstrip("/")
return route
def _route_requires_auth_despite_public(
route: str, general_settings: Optional[dict]
) -> bool:
normalized_route = _normalize_public_auth_route(route)
if normalized_route == "/metrics":
return litellm.require_auth_for_metrics_endpoint is not False
return False
custom_litellm_key_header = APIKeyHeader(
name=SpecialHeaders.custom_litellm_api_key.value,
auto_error=False,
@ -714,7 +731,9 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
"""
######## Route Checks Before Reading DB / Cache for "token" ################
if (
if not _route_requires_auth_despite_public(
route=route, general_settings=general_settings
) and (
route in LiteLLMRoutes.public_routes.value # type: ignore
or route_in_additonal_public_routes(current_route=route)
):
@ -1698,7 +1717,7 @@ async def _run_centralized_common_checks(
user_custom_auth,
)
# Public routes (e.g. /health/readiness, /metrics) are exempt from
# Public routes (e.g. /health/liveness) are exempt from
# auth in the builder — the wrapper must not retroactively apply
# authz on top, or k8s readiness probes and other unauthenticated
# callers get 401.

View File

@ -50,7 +50,10 @@ def configure_gc_thresholds():
configure_gc_thresholds()
@router.get("/debug/asyncio-tasks")
@router.get(
"/debug/asyncio-tasks",
dependencies=[Depends(user_api_key_auth)],
)
async def get_active_tasks_stats():
"""
Returns:
@ -103,7 +106,11 @@ if os.environ.get("LITELLM_PROFILE", "false").lower() == "true":
tracemalloc.start(10)
@router.get("/memory-usage", include_in_schema=False)
@router.get(
"/memory-usage",
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def memory_usage():
# Take a snapshot of the current memory usage
snapshot = tracemalloc.take_snapshot()
@ -711,7 +718,11 @@ async def configure_gc_thresholds_endpoint(
}
@router.get("/otel-spans", include_in_schema=False)
@router.get(
"/otel-spans",
dependencies=[Depends(user_api_key_auth)],
include_in_schema=False,
)
async def get_otel_spans():
from litellm.proxy.proxy_server import open_telemetry_logger

View File

@ -0,0 +1,236 @@
"""
Shared helpers for guardrail hooks: extract text from a request body
regardless of whether it uses Chat Completions ``messages``, Responses-API
``input``, or multimodal list-format ``content`` parts.
Hooks that only check ``data["messages"]`` for string content silently
skip the other shapes these helpers normalise that so every hook sees
every text fragment.
"""
from typing import Any, Callable, Dict, FrozenSet, Iterator, List
# Call types whose body carries free-form chat / prompt text that
# text-content guardrails (banned keywords, content moderation, secret
# detection, …) should inspect. The proxy ingress passes ``route_type``
# straight through as ``call_type``, so the literal values here are
# what the guardrail dispatcher actually receives:
#
# /v1/chat/completions -> "acompletion"
# /v1/responses -> "aresponses"
#
# ``"completion"`` is included for SDK / internal callers that invoke
# ``pre_call_hook`` directly with the sync name. Embedding, moderation,
# audio, and transcription endpoints are deliberately excluded — text
# guardrails on those paths are a separate scope.
TEXT_CONTENT_CALL_TYPES: FrozenSet[str] = frozenset(
{"completion", "acompletion", "aresponses"}
)
def is_text_content_call_type(call_type: str) -> bool:
"""Return True if ``call_type`` carries free-form text that text
guardrails should inspect (Chat Completions or Responses API)."""
return call_type in TEXT_CONTENT_CALL_TYPES
def _iter_text_parts_in_content(content: Any) -> Iterator[str]:
"""Yield text fragments from a ``message.content`` value (string or
multimodal list). Non-text parts (images, audio, ) are skipped."""
if isinstance(content, str):
if content:
yield content
elif isinstance(content, list):
for part in content:
if isinstance(part, str):
# A bare string in a content/input list is itself a text
# fragment (Responses-API mixed-list shape).
if part:
yield part
continue
if not isinstance(part, dict):
continue
if part.get("type") == "text":
text = part.get("text")
if isinstance(text, str) and text:
yield text
def _coerce_input_to_messages(input_value: Any) -> List[Dict[str, Any]]:
"""Coerce a Responses-API ``data["input"]`` value into chat-style messages."""
if isinstance(input_value, str):
return [{"role": "user", "content": input_value}]
if isinstance(input_value, list):
if input_value and all(
isinstance(item, dict) and "role" in item for item in input_value
):
return list(input_value)
# Mixed lists (content-part dicts + bare strings) and pure
# string/dict lists all become a single user message; the content
# iterator below handles each element type uniformly.
return [{"role": "user", "content": input_value}]
return []
def _iter_inspection_messages(data: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
"""Yield every message-like dict, walking ``messages`` AND ``input``."""
messages = data.get("messages")
if isinstance(messages, list):
yield from messages
yield from _coerce_input_to_messages(data.get("input"))
def iter_message_text(data: Dict[str, Any]) -> Iterator[str]:
"""Yield every text fragment from ``messages`` AND ``input``.
Walks every role (user, assistant, system, ) guardrails inspect
the entire conversation, not just user turns.
"""
for message in _iter_inspection_messages(data):
if not isinstance(message, dict):
continue
yield from _iter_text_parts_in_content(message.get("content"))
def walk_user_text(data: Dict[str, Any], visit: Callable[[str], str]) -> int:
"""Rewrite every text fragment in place via ``visit``.
Mutates ``data["messages"]`` and ``data["input"]``. Returns the number
of fragments visited so callers can short-circuit when nothing was
inspected.
"""
visited = 0
def _rewrite_content(content: Any) -> Any:
nonlocal visited
if isinstance(content, str):
if content:
visited += 1
return visit(content)
return content
if isinstance(content, list):
new_parts: List[Any] = []
for part in content:
if isinstance(part, str) and part:
visited += 1
new_parts.append(visit(part))
elif (
isinstance(part, dict)
and part.get("type") == "text"
and isinstance(part.get("text"), str)
and part["text"]
):
visited += 1
new_parts.append({**part, "text": visit(part["text"])})
else:
new_parts.append(part)
return new_parts
return content
messages = data.get("messages")
if isinstance(messages, list):
for message in messages:
if isinstance(message, dict) and "content" in message:
message["content"] = _rewrite_content(message["content"])
input_value = data.get("input")
if isinstance(input_value, str):
if input_value:
visited += 1
data["input"] = visit(input_value)
return visited
if isinstance(input_value, list):
# List of full messages: rewrite each message's content.
if input_value and all(
isinstance(item, dict) and "role" in item for item in input_value
):
for item in input_value:
if "content" in item:
item["content"] = _rewrite_content(item["content"])
return visited
# List of content parts and/or bare strings: rewrite in place.
for idx, item in enumerate(input_value):
if isinstance(item, str) and item:
visited += 1
input_value[idx] = visit(item)
elif (
isinstance(item, dict)
and item.get("type") == "text"
and isinstance(item.get("text"), str)
and item["text"]
):
visited += 1
input_value[idx] = {**item, "text": visit(item["text"])}
return visited
return visited
def apply_redacted_messages_back(
data: Dict[str, Any], redacted_messages: List[Dict[str, Any]]
) -> None:
"""Write redacted messages back to whichever field(s) the caller used.
Mask/anonymize paths take a synthesised messages list (from
:func:`build_inspection_messages`), get a redacted version back from a
third-party guardrail, and need to rewrite the request body. Writing
only to ``data["messages"]`` leaves the Responses-API ``data["input"]``
field untouched, so the unredacted text still reaches the LLM.
This helper updates both fields when both are present.
"""
if "messages" in data:
data["messages"] = redacted_messages
if isinstance(data.get("input"), str):
text_parts: List[str] = []
for msg in redacted_messages:
if not isinstance(msg, dict):
continue
text_parts.extend(_iter_text_parts_in_content(msg.get("content")))
data["input"] = "\n".join(text_parts)
def has_non_string_content(data: Dict[str, Any]) -> bool:
"""Return True if any inspected content is not a plain string.
Used by hooks whose mask/redact path operates on string offsets and
therefore cannot preserve multimodal non-text parts. Such hooks should
degrade to block-on-detect when this returns True so image/audio parts
are not silently stripped during in-place masking.
"""
messages = data.get("messages")
if isinstance(messages, list):
for message in messages:
if isinstance(message, dict) and not isinstance(
message.get("content"), str
):
if message.get("content") is not None:
return True
input_value = data.get("input")
if input_value is not None and not isinstance(input_value, str):
return True
return False
def build_inspection_messages(data: Dict[str, Any]) -> List[Dict[str, str]]:
"""Synthesize a chat-style messages list for posting to a guardrail API.
Each returned message has a plain-string ``content`` multimodal text
parts are joined with newlines and Responses-API ``input`` is lifted
into synthetic messages. Messages with no inspectable text are dropped.
Hooks that POST ``{"messages": [...]}`` to an external service should
call this instead of ``data.get("messages", [])`` so the Responses API
and multimodal content are covered.
"""
flattened: List[Dict[str, str]] = []
for message in _iter_inspection_messages(data):
if not isinstance(message, dict):
continue
text = "\n".join(_iter_text_parts_in_content(message.get("content")))
if not text:
continue
role = message.get("role", "user") or "user"
flattened.append({"role": role, "content": text})
return flattened

View File

@ -22,6 +22,11 @@ from litellm.llms.custom_httpx.http_handler import (
httpxSpecialProvider,
)
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails._content_utils import (
apply_redacted_messages_back,
build_inspection_messages,
has_non_string_content,
)
from litellm.types.utils import (
CallTypesLiteral,
Choices,
@ -101,10 +106,11 @@ class AimGuardrail(CustomGuardrail):
user_email=user_email,
litellm_call_id=call_id,
)
# Covers multimodal list content + Responses-API input.
response = await self.async_handler.post(
f"{self.api_base}/fw/v1/analyze",
headers=headers,
json={"messages": data.get("messages", [])},
json={"messages": build_inspection_messages(data)},
)
response.raise_for_status()
res = response.json()
@ -137,13 +143,31 @@ class AimGuardrail(CustomGuardrail):
redacted_chat = res.get("redacted_chat")
if not redacted_chat:
return data
data["messages"] = [
# Aim returns text-only redacted messages. Overwriting
# ``data["messages"]`` with that would silently strip image/audio
# parts from a multimodal request — degrade to block so the
# multimodal payload is never silently rewritten.
if has_non_string_content(data):
raise HTTPException(
status_code=400,
detail=(
"Aim: anonymize action requested for multimodal input "
"but mask-in-place would drop non-text parts. Send the "
"request with plain string content to use anonymize, "
"or rely on block-mode policies."
),
)
redacted_messages = [
{
"role": message["role"],
"content": message["content"],
}
for message in redacted_chat["all_redacted_messages"]
]
# Write back to ``messages`` AND ``input``. The Responses-API
# backend reads ``input``; writing only to ``messages`` would let
# unredacted text reach the LLM for ``/v1/responses`` calls.
apply_redacted_messages_back(data, redacted_messages)
return data
async def call_aim_guardrail_on_output(
@ -162,7 +186,7 @@ class AimGuardrail(CustomGuardrail):
litellm_call_id=call_id,
),
json={
"messages": request_data.get("messages", [])
"messages": build_inspection_messages(request_data)
+ [{"role": "assistant", "content": output}]
},
)
@ -233,15 +257,33 @@ class AimGuardrail(CustomGuardrail):
user_api_key_dict: UserAPIKeyAuth,
response: Union[Any, ModelResponse, EmbeddingResponse, ImageResponse],
) -> Any:
if (
isinstance(response, ModelResponse)
and response.choices
and isinstance(response.choices[0], Choices)
):
content = response.choices[0].message.content or ""
aim_output_guardrail_result = await self.call_aim_guardrail_on_output(
data, content, hook="output", key_alias=user_api_key_dict.key_alias
)
if not (isinstance(response, ModelResponse) and response.choices):
return response
# Inspect every choice — when ``n>1`` the additional completions
# used to bypass Aim entirely because the hook only inspected
# ``choices[0]``. Run inspections concurrently so multi-completion
# responses don't pay an n× latency penalty.
choices_to_inspect = [c for c in response.choices if isinstance(c, Choices)]
if not choices_to_inspect:
return response
# ``return_exceptions=True`` lets every inspection finish even if
# one fails — without it, the first exception would propagate and
# leave the remaining tasks running in the background.
results = await asyncio.gather(
*(
self.call_aim_guardrail_on_output(
data,
choice.message.content or "",
hook="output",
key_alias=user_api_key_dict.key_alias,
)
for choice in choices_to_inspect
),
return_exceptions=True,
)
for choice, aim_output_guardrail_result in zip(choices_to_inspect, results):
if isinstance(aim_output_guardrail_result, BaseException):
raise aim_output_guardrail_result
if aim_output_guardrail_result and aim_output_guardrail_result.get(
"detection_message"
):
@ -252,7 +294,7 @@ class AimGuardrail(CustomGuardrail):
if aim_output_guardrail_result and aim_output_guardrail_result.get(
"redacted_output"
):
response.choices[0].message.content = aim_output_guardrail_result.get(
choice.message.content = aim_output_guardrail_result.get(
"redacted_output"
)
return response

View File

@ -254,15 +254,16 @@ class AzureContentSafetyTextModerationGuardrail(AzureGuardrailBase, CustomGuardr
) -> Any:
from litellm.types.utils import Choices, ModelResponse
if (
isinstance(response, ModelResponse)
and response.choices
and isinstance(response.choices[0], Choices)
):
content = response.choices[0].message.content or ""
await self.async_make_request(
text=content,
)
if isinstance(response, ModelResponse) and response.choices:
for choice in response.choices:
if not isinstance(choice, Choices):
continue
content = _message_content_to_text(choice.message.content)
if not content:
continue
await self.async_make_request(
text=content,
)
return response
async def async_post_call_streaming_hook(
@ -279,3 +280,16 @@ class AzureContentSafetyTextModerationGuardrail(AzureGuardrailBase, CustomGuardr
error_returned = json.dumps({"error": e.detail})
return f"data: {error_returned}\n\n"
def _message_content_to_text(content: Any) -> str:
if isinstance(content, str):
return content
if isinstance(content, list):
text_parts = [
item.get("text")
for item in content
if isinstance(item, dict) and isinstance(item.get("text"), str)
]
return "\n".join(part for part in text_parts if part)
return ""

View File

@ -20,6 +20,7 @@ from litellm.llms.custom_httpx.http_handler import (
httpxSpecialProvider,
)
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails._content_utils import iter_message_text
from litellm.types.guardrails import GuardrailEventHooks
from litellm.types.proxy.guardrails.guardrail_hooks.ibm import (
IBMDetectorDetection,
@ -463,65 +464,53 @@ class IBMGuardrailDetector(CustomGuardrail):
if self.should_run_guardrail(data=data, event_type=event_type) is not True:
return data
_messages = data.get("messages")
if _messages:
contents_to_check: List[str] = []
for message in _messages:
_content = message.get("content")
if isinstance(_content, str):
contents_to_check.append(_content)
# Covers multimodal list content + Responses-API input.
contents_to_check: List[str] = list(iter_message_text(data))
if contents_to_check:
if self.is_detector_server:
# Call detector server with all contents at once
result = await self._call_detector_server(
contents=contents_to_check,
request_data=data,
event_type=GuardrailEventHooks.pre_call,
)
if contents_to_check:
if self.is_detector_server:
# Call detector server with all contents at once
result = await self._call_detector_server(
contents=contents_to_check,
verbose_proxy_logger.debug(
"IBM Detector Server async_pre_call_hook result: %s", result
)
# Check if any detections were found
has_violations = False
for message_detections in result:
filtered = self._filter_detections_by_threshold(message_detections)
if filtered:
has_violations = True
break
if has_violations and self.block_on_detection:
error_message = self._create_error_message_detector_server(result)
raise ValueError(error_message)
else:
# Call orchestrator for each content separately
for content in contents_to_check:
orchestrator_result = await self._call_orchestrator(
content=content,
request_data=data,
event_type=GuardrailEventHooks.pre_call,
)
verbose_proxy_logger.debug(
"IBM Detector Server async_pre_call_hook result: %s", result
"IBM Orchestrator async_pre_call_hook result: %s",
orchestrator_result,
)
# Check if any detections were found
has_violations = False
for message_detections in result:
filtered = self._filter_detections_by_threshold(
message_detections
)
if filtered:
has_violations = True
break
if has_violations and self.block_on_detection:
error_message = self._create_error_message_detector_server(
result
)
raise ValueError(error_message)
else:
# Call orchestrator for each content separately
for content in contents_to_check:
orchestrator_result = await self._call_orchestrator(
content=content,
request_data=data,
event_type=GuardrailEventHooks.pre_call,
)
verbose_proxy_logger.debug(
"IBM Orchestrator async_pre_call_hook result: %s",
orchestrator_result,
)
filtered = self._filter_detections_by_threshold(
filtered = self._filter_detections_by_threshold(orchestrator_result)
if filtered and self.block_on_detection:
error_message = self._create_error_message_orchestrator(
orchestrator_result
)
if filtered and self.block_on_detection:
error_message = self._create_error_message_orchestrator(
orchestrator_result
)
raise ValueError(error_message)
raise ValueError(error_message)
# Add guardrail to applied guardrails header
add_guardrail_to_applied_guardrails_header(
@ -550,65 +539,53 @@ class IBMGuardrailDetector(CustomGuardrail):
if self.should_run_guardrail(data=data, event_type=event_type) is not True:
return
_messages = data.get("messages")
if _messages:
contents_to_check: List[str] = []
for message in _messages:
_content = message.get("content")
if isinstance(_content, str):
contents_to_check.append(_content)
# Covers multimodal list content + Responses-API input.
contents_to_check: List[str] = list(iter_message_text(data))
if contents_to_check:
if self.is_detector_server:
# Call detector server with all contents at once
result = await self._call_detector_server(
contents=contents_to_check,
request_data=data,
event_type=GuardrailEventHooks.during_call,
)
if contents_to_check:
if self.is_detector_server:
# Call detector server with all contents at once
result = await self._call_detector_server(
contents=contents_to_check,
verbose_proxy_logger.debug(
"IBM Detector Server async_moderation_hook result: %s", result
)
# Check if any detections were found
has_violations = False
for message_detections in result:
filtered = self._filter_detections_by_threshold(message_detections)
if filtered:
has_violations = True
break
if has_violations and self.block_on_detection:
error_message = self._create_error_message_detector_server(result)
raise ValueError(error_message)
else:
# Call orchestrator for each content separately
for content in contents_to_check:
orchestrator_result = await self._call_orchestrator(
content=content,
request_data=data,
event_type=GuardrailEventHooks.during_call,
)
verbose_proxy_logger.debug(
"IBM Detector Server async_moderation_hook result: %s", result
"IBM Orchestrator async_moderation_hook result: %s",
orchestrator_result,
)
# Check if any detections were found
has_violations = False
for message_detections in result:
filtered = self._filter_detections_by_threshold(
message_detections
)
if filtered:
has_violations = True
break
if has_violations and self.block_on_detection:
error_message = self._create_error_message_detector_server(
result
)
raise ValueError(error_message)
else:
# Call orchestrator for each content separately
for content in contents_to_check:
orchestrator_result = await self._call_orchestrator(
content=content,
request_data=data,
event_type=GuardrailEventHooks.during_call,
)
verbose_proxy_logger.debug(
"IBM Orchestrator async_moderation_hook result: %s",
orchestrator_result,
)
filtered = self._filter_detections_by_threshold(
filtered = self._filter_detections_by_threshold(orchestrator_result)
if filtered and self.block_on_detection:
error_message = self._create_error_message_orchestrator(
orchestrator_result
)
if filtered and self.block_on_detection:
error_message = self._create_error_message_orchestrator(
orchestrator_result
)
raise ValueError(error_message)
raise ValueError(error_message)
# Add guardrail to applied guardrails header
add_guardrail_to_applied_guardrails_header(

View File

@ -13,6 +13,11 @@ from litellm.llms.custom_httpx.http_handler import (
httpxSpecialProvider,
)
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails._content_utils import (
apply_redacted_messages_back,
build_inspection_messages,
has_non_string_content,
)
from litellm.secret_managers.main import get_secret_str
from litellm.types.guardrails import GuardrailEventHooks
from litellm.types.llms.openai import AllMessageValues
@ -214,18 +219,26 @@ class LakeraAIGuardrail(CustomGuardrail):
)
return data
new_messages: Optional[List[AllMessageValues]] = data.get("messages")
if new_messages is None:
# Covers multimodal list content + Responses-API input.
new_messages = build_inspection_messages(data)
if not new_messages:
verbose_proxy_logger.warning(
"Lakera AI: not running guardrail. No messages in data"
"Lakera AI: not running guardrail. No inspectable text in data"
)
return data
# Mask-in-place uses offsets returned by Lakera and can only
# preserve non-text parts (images, audio, …) when the original
# content is a plain string. For multimodal/Responses-API input
# we degrade to block-on-detect so we never silently strip image
# parts while attempting to redact text.
is_multimodal_input = has_non_string_content(data)
#########################################################
########## 1. Make the Lakera AI v2 guard API request ##########
#########################################################
lakera_guardrail_response, masked_entity_count = await self.call_v2_guard(
messages=new_messages,
messages=new_messages, # type: ignore[arg-type]
request_data=data,
event_type=GuardrailEventHooks.pre_call,
)
@ -234,13 +247,20 @@ class LakeraAIGuardrail(CustomGuardrail):
########## 2. Handle flagged content ##########
#########################################################
if lakera_guardrail_response.get("flagged") is True:
# If only PII violations exist, mask the PII
if self._is_only_pii_violation(lakera_guardrail_response):
data["messages"] = self._mask_pii_in_messages(
messages=new_messages,
# If only PII violations exist, mask the PII (string input only).
if (
self._is_only_pii_violation(lakera_guardrail_response)
and not is_multimodal_input
):
redacted_messages = self._mask_pii_in_messages(
messages=new_messages, # type: ignore[arg-type]
lakera_response=lakera_guardrail_response,
masked_entity_count=masked_entity_count,
)
# Write back to ``messages`` AND ``input``. The Responses-API
# backend reads ``input``; writing only to ``messages``
# would let unredacted PII reach the LLM for /v1/responses.
apply_redacted_messages_back(data, list(redacted_messages)) # type: ignore[arg-type]
verbose_proxy_logger.debug(
"Lakera AI: Masked PII in messages instead of blocking request"
)
@ -252,7 +272,9 @@ class LakeraAIGuardrail(CustomGuardrail):
)
# Log violation but continue
elif self.on_flagged == "block":
# If there are other violations or not set to mask PII, raise exception
# Either non-PII violations, or PII on multimodal input
# (which cannot be masked in place without dropping
# image/audio parts) — raise the standard block error.
raise self._get_http_exception_for_blocked_guardrail(
lakera_guardrail_response
)
@ -280,18 +302,22 @@ class LakeraAIGuardrail(CustomGuardrail):
if self.should_run_guardrail(data=data, event_type=event_type) is not True:
return
new_messages: Optional[List[AllMessageValues]] = data.get("messages")
if new_messages is None:
new_messages = build_inspection_messages(data)
if not new_messages:
verbose_proxy_logger.warning(
"Lakera AI: not running guardrail. No messages in data"
"Lakera AI: not running guardrail. No inspectable text in data"
)
return
# See ``async_pre_call_hook`` — multimodal input degrades to
# block-on-detect because mask-in-place would drop image parts.
is_multimodal_input = has_non_string_content(data)
#########################################################
########## 1. Make the Lakera AI v2 guard API request ##########
#########################################################
lakera_guardrail_response, masked_entity_count = await self.call_v2_guard(
messages=new_messages,
messages=new_messages, # type: ignore[arg-type]
request_data=data,
event_type=GuardrailEventHooks.during_call,
)
@ -300,25 +326,28 @@ class LakeraAIGuardrail(CustomGuardrail):
########## 2. Handle flagged content ##########
#########################################################
if lakera_guardrail_response.get("flagged") is True:
# If only PII violations exist, mask the PII
if self._is_only_pii_violation(lakera_guardrail_response):
data["messages"] = self._mask_pii_in_messages(
messages=new_messages,
if (
self._is_only_pii_violation(lakera_guardrail_response)
and not is_multimodal_input
):
redacted_messages = self._mask_pii_in_messages(
messages=new_messages, # type: ignore[arg-type]
lakera_response=lakera_guardrail_response,
masked_entity_count=masked_entity_count,
)
# Write back to ``messages`` AND ``input``. The Responses-API
# backend reads ``input``; writing only to ``messages``
# would let unredacted PII reach the LLM for /v1/responses.
apply_redacted_messages_back(data, list(redacted_messages)) # type: ignore[arg-type]
verbose_proxy_logger.debug(
"Lakera AI: Masked PII in messages instead of blocking request"
)
else:
# Check on_flagged setting
if self.on_flagged == "monitor":
verbose_proxy_logger.warning(
"Lakera Guardrail: Monitoring mode - violation detected but allowing request"
)
# Log violation but continue
elif self.on_flagged == "block":
# If there are other violations or not set to mask PII, raise exception
raise self._get_http_exception_for_blocked_guardrail(
lakera_guardrail_response
)

View File

@ -50,6 +50,11 @@ from litellm.llms.custom_httpx.http_handler import (
httpxSpecialProvider,
)
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails._content_utils import (
apply_redacted_messages_back,
build_inspection_messages,
has_non_string_content,
)
from litellm.types.guardrails import GuardrailEventHooks
import litellm
@ -366,16 +371,19 @@ class LassoGuardrail(CustomGuardrail):
LassoGuardrailAPIError: If the Lasso API call fails
HTTPException: If blocking violations are detected
"""
messages: List[Dict[str, str]] = data.get("messages", [])
# Covers multimodal list content + Responses-API input.
messages: List[Dict[str, str]] = build_inspection_messages(data)
if not messages:
return data
if self.mask:
# Lasso's classifix endpoint returns masked text that we copy back
# into ``data["messages"]``. For multimodal/Responses-API input we
# would silently strip image/audio parts, so fall back to the
# classify endpoint (which still raises on BLOCK actions) and
# leave the original payload intact.
if self.mask and not has_non_string_content(data):
return await self._handle_masking(data, cache, message_type, messages)
else:
return await self._handle_classification(
data, cache, message_type, messages
)
return await self._handle_classification(data, cache, message_type, messages)
async def _handle_classification(
self,
@ -413,8 +421,9 @@ class LassoGuardrail(CustomGuardrail):
self._process_lasso_response(response)
# Apply masking to messages if violations detected and masked messages are available
if response.get("violations_detected") and response.get("messages"):
data["messages"] = response["messages"]
redacted_messages = response.get("messages")
if response.get("violations_detected") and redacted_messages:
apply_redacted_messages_back(data, list(redacted_messages))
self._log_masking_applied(message_type, dict(response))
return data

View File

@ -1873,8 +1873,9 @@ class ContentFilterGuardrail(CustomGuardrail):
and the UI Request Lifecycle panel. Mirrors apply_guardrail's finally-block
contract.
"""
accumulated_full_text = ""
yielded_masked_text_len = 0
accumulated_text_by_choice: Dict[int, str] = {}
yielded_masked_text_len_by_choice: Dict[int, int] = {}
latest_detections_by_choice: Dict[int, List[ContentFilterDetection]] = {}
buffer_size = 50 # Increased buffer to catch patterns split across many chunks
start_time = datetime.now()
@ -1890,79 +1891,90 @@ class ContentFilterGuardrail(CustomGuardrail):
try:
async for item in response:
if isinstance(item, ModelResponseStream) and item.choices:
delta_content = ""
is_final = False
for choice in item.choices:
if hasattr(choice, "delta") and choice.delta:
content = getattr(choice.delta, "content", None)
if content and isinstance(content, str):
delta_content += content
if getattr(choice, "finish_reason", None):
is_final = True
if not (hasattr(choice, "delta") and choice.delta):
continue
accumulated_full_text += delta_content
choice_index = getattr(choice, "index", 0)
if not isinstance(choice_index, int):
choice_index = 0
# Check for blocking or apply masking
# Add a space at the end if it's the final chunk to trigger word boundaries (\b)
text_to_check = accumulated_full_text
if is_final:
text_to_check += " "
content = getattr(choice.delta, "content", None)
is_final = bool(getattr(choice, "finish_reason", None))
if isinstance(content, str) and content:
accumulated_text_by_choice[choice_index] = (
accumulated_text_by_choice.get(choice_index, "")
+ content
)
elif not is_final:
continue
try:
# Reset before each scan: _filter_single_text scans the
# whole accumulated buffer every chunk, so previous-chunk
# matches are guaranteed to be re-found. Keeping only the
# latest scan's detections avoids N× duplication in the
# final log row. BLOCK still records correctly because
# handlers append to detections before raising.
detections.clear()
masked_text = self._filter_single_text(
text_to_check, detections=detections
text_to_check = accumulated_text_by_choice.get(choice_index, "")
if not text_to_check:
continue
# Add a space at the end if it's the final chunk to trigger word boundaries (\b)
text_to_scan = text_to_check + (" " if is_final else "")
choice_detections: List[ContentFilterDetection] = []
try:
# _filter_single_text scans the whole accumulated
# choice buffer every chunk, so previous-chunk
# matches are guaranteed to be re-found. Keeping
# only each choice's latest scan avoids duplicate
# detections in the final log row.
masked_text = self._filter_single_text(
text_to_scan, detections=choice_detections
)
if is_final and masked_text.endswith(" "):
masked_text = masked_text[:-1]
latest_detections_by_choice[choice_index] = (
choice_detections
)
except HTTPException:
latest_detections_by_choice[choice_index] = (
choice_detections
)
raise
except Exception as e:
verbose_proxy_logger.error(
f"ContentFilterGuardrail: Error in masking: {e}"
)
masked_text = text_to_scan # Fallback to current text
# Determine how much can be safely yielded
if is_final:
safe_to_yield_len = len(masked_text)
else:
safe_to_yield_len = max(0, len(masked_text) - buffer_size)
yielded_masked_text_len = yielded_masked_text_len_by_choice.get(
choice_index, 0
)
if is_final and masked_text.endswith(" "):
masked_text = masked_text[:-1]
except HTTPException:
raise
except Exception as e:
verbose_proxy_logger.error(
f"ContentFilterGuardrail: Error in masking: {e}"
)
masked_text = text_to_check # Fallback to current text
if safe_to_yield_len > yielded_masked_text_len:
new_masked_content = masked_text[
yielded_masked_text_len:safe_to_yield_len
]
choice.delta.content = new_masked_content
yielded_masked_text_len_by_choice[choice_index] = (
safe_to_yield_len
)
else:
# Hold content by yielding empty content on this choice
# while preserving chunk metadata and other choices.
choice.delta.content = ""
# Determine how much can be safely yielded
if is_final:
safe_to_yield_len = len(masked_text)
else:
safe_to_yield_len = max(0, len(masked_text) - buffer_size)
if safe_to_yield_len > yielded_masked_text_len:
new_masked_content = masked_text[
yielded_masked_text_len:safe_to_yield_len
]
# Modify the chunk to contain only the new masked content
if (
item.choices
and hasattr(item.choices[0], "delta")
and item.choices[0].delta
):
item.choices[0].delta.content = new_masked_content
yielded_masked_text_len = safe_to_yield_len
yield item
else:
# Hold content by yielding empty content chunk (keeps metadata/structure)
if (
item.choices
and hasattr(item.choices[0], "delta")
and item.choices[0].delta
):
item.choices[0].delta.content = ""
yield item
yield item
else:
# Not a ModelResponseStream or no choices - yield as is
yield item
# Any remaining content (should have been handled by is_final, but just in case)
if yielded_masked_text_len < len(accumulated_full_text):
if any(
yielded_masked_text_len_by_choice.get(choice_index, 0)
< len(accumulated_text)
for choice_index, accumulated_text in accumulated_text_by_choice.items()
):
# We already reached the end of the generator
pass
except HTTPException:
@ -1973,6 +1985,11 @@ class ContentFilterGuardrail(CustomGuardrail):
exception_str = str(e)
raise e
finally:
detections = [
detection
for choice_detections in latest_detections_by_choice.values()
for detection in choice_detections
]
self._count_masked_entities(detections, masked_entity_count)
self._log_guardrail_information(
request_data=request_data,

View File

@ -187,11 +187,28 @@ def _extract_user_text(messages: List) -> str:
def _extract_response_text(response: Any) -> str:
"""Extract text from LLM response object."""
"""Extract text from every LLM response choice."""
if hasattr(response, "choices") and response.choices:
choice = response.choices[0]
if hasattr(choice, "message") and choice.message:
return choice.message.content or ""
text_parts: List[str] = []
for choice in response.choices:
if hasattr(choice, "message") and choice.message:
text = _content_to_text(choice.message.content)
if text:
text_parts.append(text)
return "\n".join(text_parts)
return ""
def _content_to_text(content: Any) -> str:
if isinstance(content, str):
return content
if isinstance(content, list):
text_parts = [
block.get("text")
for block in content
if isinstance(block, dict) and isinstance(block.get("text"), str)
]
return " ".join(part for part in text_parts if part)
return ""

View File

@ -480,21 +480,32 @@ class XecGuardGuardrail(CustomGuardrail):
choices = response.get("choices")
if not choices:
return None
first = choices[0]
if hasattr(first, "message"):
message = first.message
elif isinstance(first, dict):
message = first.get("message")
text_parts: List[str] = []
for choice in choices:
content = XecGuardGuardrail._extract_choice_content(choice)
text = XecGuardGuardrail._content_to_text(content)
if text:
text_parts.append(text)
return "\n".join(text_parts) or None
@staticmethod
def _extract_choice_content(choice: Any) -> Any:
if hasattr(choice, "message"):
message = choice.message
elif isinstance(choice, dict):
message = choice.get("message")
else:
return None
if message is None:
return None
if hasattr(message, "content"):
content = message.content
elif isinstance(message, dict):
content = message.get("content")
else:
return None
return message.content
if isinstance(message, dict):
return message.get("content")
return None
@staticmethod
def _content_to_text(content: Any) -> Optional[str]:
if isinstance(content, str) and content:
return content
if isinstance(content, list):

View File

@ -1447,14 +1447,11 @@ def callback_name(callback):
return str(callback)
@router.get(
"/health/readiness",
tags=["health"],
dependencies=[Depends(user_api_key_auth)],
)
async def health_readiness(response: Response):
async def _get_health_readiness_details(
response: Optional[Response] = None,
) -> Dict[str, Any]:
"""
Unprotected endpoint for checking if worker can receive requests
Detailed health payload for authenticated diagnostics.
"""
from litellm.proxy.proxy_server import prisma_client, version
@ -1473,7 +1470,7 @@ async def health_readiness(response: Response):
success_callback_names = litellm.success_callback
# check Cache
cache_type = None
cache_type: Any = None
if litellm.cache is not None:
from litellm.caching.caching import RedisSemanticCache
@ -1482,6 +1479,7 @@ async def health_readiness(response: Response):
if isinstance(litellm.cache.cache, RedisSemanticCache):
# ping the cache
# TODO: @ishaan-jaff - we should probably not ping the cache on every /health/readiness check
index_info: Any
try:
index_info = await litellm.cache.cache._index_info()
except Exception as e:
@ -1499,7 +1497,7 @@ async def health_readiness(response: Response):
# serve requests that depend on persisted state (keys, budgets,
# spend logs). Return 503 so orchestrators take this pod out of
# rotation; "Not connected" (no DB configured at all) stays 200.
if db_health_status["status"] != "connected":
if response is not None and db_health_status["status"] != "connected":
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
return {
"status": "healthy",
@ -1526,6 +1524,52 @@ async def health_readiness(response: Response):
raise HTTPException(status_code=503, detail=f"Service Unhealthy ({str(e)})")
def _allow_public_health_readiness_details() -> bool:
from litellm.proxy.proxy_server import general_settings
return general_settings.get("allow_public_health_readiness_details") is True
async def _set_public_readiness_status(response: Response) -> None:
from litellm.proxy.proxy_server import prisma_client
if prisma_client is None:
return
db_health_status = await _db_health_readiness_check()
if db_health_status["status"] != "connected":
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
@router.get(
"/health/readiness",
tags=["health"],
)
async def health_readiness(response: Response):
"""
Public readiness probe. Keep this low-detail for unauthenticated load
balancers by default. Admins can opt into the legacy detailed public
payload with general_settings.allow_public_health_readiness_details.
"""
if _allow_public_health_readiness_details():
return await _get_health_readiness_details(response=response)
await _set_public_readiness_status(response=response)
return {"status": "healthy"}
@router.get(
"/health/readiness/details",
tags=["health"],
dependencies=[Depends(user_api_key_auth)],
)
async def health_readiness_details(response: Response):
"""
Authenticated readiness diagnostics with DB/cache/callback metadata.
"""
return await _get_health_readiness_details(response=response)
@router.get(
"/health/backlog",
tags=["health"],
@ -1561,7 +1605,6 @@ async def health_liveliness():
@router.options(
"/health/readiness",
tags=["health"],
dependencies=[Depends(user_api_key_auth)],
)
async def health_readiness_options():
"""

View File

@ -8,6 +8,10 @@ from litellm._logging import verbose_proxy_logger
from litellm.caching.caching import DualCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.guardrails._content_utils import (
is_text_content_call_type,
iter_message_text,
)
class _PROXY_AzureContentSafety(
@ -118,10 +122,9 @@ class _PROXY_AzureContentSafety(
):
verbose_proxy_logger.debug("Inside Azure Content-Safety Pre-Call Hook")
try:
if call_type == "completion" and "messages" in data:
for m in data["messages"]:
if "content" in m and isinstance(m["content"], str):
await self.test_violation(content=m["content"], source="input")
if is_text_content_call_type(call_type):
for text in iter_message_text(data):
await self.test_violation(content=text, source="input")
except HTTPException as e:
raise e
@ -140,12 +143,16 @@ class _PROXY_AzureContentSafety(
response,
):
verbose_proxy_logger.debug("Inside Azure Content-Safety Post-Call Hook")
if isinstance(response, litellm.ModelResponse) and isinstance(
response.choices[0], litellm.utils.Choices
):
await self.test_violation(
content=response.choices[0].message.content or "", source="output"
)
if not isinstance(response, litellm.ModelResponse):
return
for choice in response.choices:
if not isinstance(choice, litellm.utils.Choices):
continue
message = getattr(choice, "message", None)
content = getattr(message, "content", None)
if isinstance(content, str):
await self.test_violation(content=content, source="output")
# async def async_post_call_streaming_hook(
# self,

View File

@ -61,6 +61,7 @@ from litellm.secret_managers.main import get_secret_bool
from litellm.types.llms.anthropic import ANTHROPIC_API_HEADERS
from litellm.types.services import ServiceTypes
from litellm.types.utils import (
CustomPricingLiteLLMParams,
LlmProviders,
ProviderSpecificHeader,
StandardLoggingUserAPIKeyMetadata,
@ -168,6 +169,20 @@ _ALLOW_CLIENT_MESSAGE_REDACTION_OPT_OUT_METADATA_KEY = (
"allow_client_message_redaction_opt_out"
)
# Per-request pricing parameters mutate cost-tracking output and (via
# ``litellm.completion`` → ``register_model``) the process-wide
# ``litellm.model_cost`` map. Both effects belong to deployment configuration,
# not to user-supplied request bodies, so the proxy strips them before they
# reach the call path. Built from the Pydantic model so newly-added pricing
# fields are covered automatically.
_CLIENT_PRICING_CONTROL_FIELDS = frozenset(
CustomPricingLiteLLMParams.model_fields.keys()
)
# ``model_info`` carries the same pricing fields when read by
# ``use_custom_pricing_for_model``; strip from metadata for the same reason.
_CLIENT_PRICING_METADATA_FIELDS = frozenset({"model_info"})
_ALLOW_CLIENT_PRICING_OVERRIDE_METADATA_KEY = "allow_client_pricing_override"
# Request fields whose value, when URL-valued, becomes the outbound destination
# for a provider call. Letting a proxy caller pin the destination is an SSRF
# primitive (HuggingFace/Oobabooga `model`, Gemini files `file_id`); guard
@ -265,6 +280,46 @@ def _key_or_team_allows_client_message_redaction_opt_out(
)
def _key_or_team_allows_client_pricing_override(
user_api_key_dict: UserAPIKeyAuth,
) -> bool:
return _key_or_team_metadata_flag_is_true(
user_api_key_dict=user_api_key_dict,
metadata_key=_ALLOW_CLIENT_PRICING_OVERRIDE_METADATA_KEY,
)
def _strip_client_pricing_overrides(data: Dict[str, Any]) -> None:
"""Drop pricing overrides from the request body and any metadata variant.
Skipped only when the calling key/team carries
``allow_client_pricing_override: True`` in its metadata. Emits a
``debug``-level log line naming the dropped fields so operators can
trace why a client-supplied pricing override stopped being applied
(otherwise the strip is invisible from the caller's perspective).
"""
stripped: List[str] = []
for field in _CLIENT_PRICING_CONTROL_FIELDS:
if field in data:
stripped.append(field)
data.pop(field, None)
for metadata_key in ("metadata", "litellm_metadata"):
metadata = data.get(metadata_key)
if not isinstance(metadata, dict):
continue
for field in _CLIENT_PRICING_METADATA_FIELDS:
if field in metadata:
stripped.append(f"{metadata_key}.{field}")
metadata.pop(field, None)
if stripped:
verbose_proxy_logger.debug(
"Stripped client-supplied pricing fields from request body: %s. "
"Set `allow_client_pricing_override: true` on the key or team "
"metadata to keep these values.",
", ".join(stripped),
)
def _get_metadata_variable_name(request: Request) -> str:
"""
Helper to return what the "metadata" field should be called in the request data
@ -1364,6 +1419,14 @@ async def add_litellm_data_to_request( # noqa: PLR0915
]:
_user_meta.pop(_k, None)
# Strip pricing overrides AFTER the litellm_metadata string-to-dict parse
# above, for the same reason as the user_api_key_* strip — JSON-string
# metadata (sent via multipart/form-data or extra_body) wouldn't be a
# dict yet at the earlier strip point and the isinstance(dict) guard
# would silently skip the field.
if not _key_or_team_allows_client_pricing_override(user_api_key_dict):
_strip_client_pricing_overrides(data)
# Strip caller-supplied routing/budget tags unless the admin has opted
# this key or team in via metadata.allow_client_tags=True. Tags drive
# tag-based routing and tag budget attribution — accepting them from

View File

@ -104,11 +104,16 @@ async def get_router_settings(
config = await proxy_config.get_config()
router_settings_from_config = config.get("router_settings", {})
# Get current values from llm_router if initialized
current_values = {}
current_values: Dict[str, Any] = {}
if llm_router is not None:
# Check all field names from the fields list
# Router exposes routing groups as private `_routing_groups`; the
# generic `hasattr` loop below would miss them.
current_values["routing_groups"] = [
group.model_dump() for group in llm_router._routing_groups.values()
]
for field in router_fields:
if field.field_name == "routing_groups":
continue
if hasattr(llm_router, field.field_name):
value = getattr(llm_router, field.field_name)
current_values[field.field_name] = value

View File

@ -20,13 +20,13 @@ class PrometheusAuthMiddleware:
"""
Middleware to authenticate requests to the metrics endpoint.
By default, auth is not run on the metrics endpoint.
By default, auth is run on the metrics endpoint.
Enabled by setting the following in proxy_config.yaml:
To allow unauthenticated metrics in proxy_config.yaml:
```yaml
litellm_settings:
require_auth_for_metrics_endpoint: true
require_auth_for_metrics_endpoint: false
```
"""
@ -39,8 +39,8 @@ class PrometheusAuthMiddleware:
await self.app(scope, receive, send)
return
# Only run auth if configured to do so
if litellm.require_auth_for_metrics_endpoint is True:
# Run auth by default; allow legacy public metrics only when explicitly disabled.
if litellm.require_auth_for_metrics_endpoint is not False:
# user_api_key_auth reads the request body, which consumes ASGI `receive`.
# Buffer those messages and replay them for the inner app; otherwise a
# successful auth would forward an exhausted receive and /metrics hangs.
@ -52,10 +52,29 @@ class PrometheusAuthMiddleware:
return message
request = Request(scope, receive_for_auth)
api_key = request.headers.get(_AUTHORIZATION_HEADER) or ""
try:
await user_api_key_auth(request=request, api_key=api_key)
await user_api_key_auth(
request=request,
api_key=request.headers.get(_AUTHORIZATION_HEADER) or "",
azure_api_key_header=request.headers.get(
SpecialHeaders.azure_authorization.value
)
or "",
anthropic_api_key_header=request.headers.get(
SpecialHeaders.anthropic_authorization.value
),
google_ai_studio_api_key_header=request.headers.get(
SpecialHeaders.google_ai_studio_authorization.value
),
azure_apim_header=request.headers.get(
SpecialHeaders.azure_apim_authorization.value
)
or "",
custom_litellm_key_header=request.headers.get(
SpecialHeaders.custom_litellm_api_key.value
),
)
except Exception as e:
# Send 401 response directly via ASGI protocol
error_message = getattr(e, "message", str(e))

View File

@ -5,7 +5,7 @@ from importlib.resources import files
from typing import Any, Dict, List, Optional
import litellm
from fastapi import APIRouter, Depends, HTTPException
from fastapi import APIRouter, HTTPException
from litellm._logging import verbose_logger
from litellm.litellm_core_utils.get_blog_posts import (
@ -14,8 +14,9 @@ from litellm.litellm_core_utils.get_blog_posts import (
GetBlogPosts,
get_blog_posts,
)
from litellm.proxy._types import CommonProxyErrors
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
from litellm.proxy._types import (
CommonProxyErrors,
)
from litellm.types.agents import AgentCard
from litellm.types.mcp import MCPPublicServer
from litellm.types.proxy.management_endpoints.model_management_endpoints import (
@ -31,6 +32,7 @@ from litellm.types.utils import LlmProviders
router = APIRouter()
# ---------------------------------------------------------------------------
# /public/endpoints — helpers
# ---------------------------------------------------------------------------
@ -153,7 +155,6 @@ def _load_endpoints() -> List[Dict[str, Any]]:
@router.get(
"/public/model_hub",
tags=["public", "model management"],
dependencies=[Depends(user_api_key_auth)],
response_model=List[ModelGroupInfoProxy],
)
async def public_model_hub():
@ -208,7 +209,6 @@ async def public_model_hub():
@router.get(
"/public/agent_hub",
tags=["[beta] Agents", "public"],
dependencies=[Depends(user_api_key_auth)],
response_model=List[AgentCard],
)
async def get_agents():
@ -230,7 +230,6 @@ async def get_agents():
@router.get(
"/public/mcp_hub",
tags=["[beta] MCP", "public"],
dependencies=[Depends(user_api_key_auth)],
response_model=List[MCPPublicServer],
)
async def get_mcp_servers():

View File

@ -3079,7 +3079,11 @@ async def global_spend_models(
return response
@router.get("/provider/budgets", response_model=ProviderBudgetResponse)
@router.get(
"/provider/budgets",
dependencies=[Depends(user_api_key_auth)],
response_model=ProviderBudgetResponse,
)
async def provider_budgets() -> ProviderBudgetResponse:
"""
Provider Budget Routing - Get Budget, Spend Details https://docs.litellm.ai/docs/proxy/provider_budget_routing

View File

@ -99,6 +99,11 @@ class UISettings(BaseModel):
description="If true, requires authentication for accessing the public AI Hub.",
)
allow_public_health_readiness_details: bool = Field(
default=False,
description="If true, returns the legacy detailed payload from the unauthenticated /health/readiness endpoint.",
)
forward_client_headers_to_llm_api: bool = Field(
default=False,
description=(
@ -169,6 +174,7 @@ ALLOWED_UI_SETTINGS_FIELDS = {
"disable_team_admin_delete_team_user",
"enabled_ui_pages_internal_users",
"require_auth_for_public_ai_hub",
"allow_public_health_readiness_details",
"forward_client_headers_to_llm_api",
"forward_llm_provider_auth_headers",
"disable_agents_for_internal_users",
@ -183,6 +189,7 @@ ALLOWED_UI_SETTINGS_FIELDS = {
# Flags that must be synced from the persisted UISettings into
# general_settings at runtime (on both read and write).
_RUNTIME_GENERAL_SETTINGS_FLAGS = [
"allow_public_health_readiness_details",
"forward_client_headers_to_llm_api",
"forward_llm_provider_auth_headers",
"disable_agents_for_internal_users",

View File

@ -1052,11 +1052,17 @@ class Router:
strategy = self._normalize_strategy(self.routing_strategy)
attr = self._DEFAULT_SELECTOR_ATTR_BY_STRATEGY.get(strategy or "")
selector = getattr(self, attr, None) if attr is not None else None
verbose_router_logger.debug(
"routing_group=default model=%s strategy=%s", model, strategy
)
return strategy, selector
group = self._routing_groups[group_name]
strategy = self._normalize_strategy(group.routing_strategy)
selector = self._group_selectors.get(group_name, {}).get(strategy or "")
verbose_router_logger.debug(
"routing_group=%s model=%s strategy=%s", group_name, model, strategy
)
return strategy, selector
async def _select_deployment_async(

View File

@ -112,6 +112,14 @@ ROUTER_SETTINGS_FIELDS: List[RouterSettingsField] = [
field_default={},
ui_field_name="Routing Strategy Args",
),
RouterSettingsField(
field_name="routing_groups",
field_type="List",
field_value=None,
field_description="Named subsets of model_names that share a routing strategy. Models not claimed by an explicit group fall through to the top-level routing_strategy.",
field_default=[],
ui_field_name="Routing Groups",
),
RouterSettingsField(
field_name="num_retries",
field_type="Integer",

View File

@ -2243,12 +2243,52 @@ def encode(model="", text="", custom_tokenizer: Optional[dict] = None):
return enc
def decode(model="", tokens: List[int] = [], custom_tokenizer: Optional[dict] = None):
def decode(
model="",
tokens: List[int] = [],
custom_tokenizer: Optional[dict] = None,
skip_special_tokens: bool = True,
):
"""
Decodes token ids using the selected tokenizer.
Args:
skip_special_tokens: For HuggingFace tokenizers, keep the historical
LiteLLM round-trip behavior by omitting special tokens by default.
Set to False to inspect decoded BOS/EOS tokens.
"""
tokenizer_json = custom_tokenizer or _select_tokenizer(model=model)
if tokenizer_json["type"] == "huggingface_tokenizer":
if skip_special_tokens:
tokens = _strip_huggingface_special_token_ids(
tokenizer_json["tokenizer"], tokens
)
dec = tokenizer_json["tokenizer"].decode(
tokens, skip_special_tokens=skip_special_tokens
)
return dec
dec = tokenizer_json["tokenizer"].decode(tokens)
return dec
def _strip_huggingface_special_token_ids(
tokenizer: Tokenizer, tokens: List[int]
) -> List[int]:
try:
added_tokens_decoder = tokenizer.get_added_tokens_decoder()
except Exception:
return tokens
special_token_ids = {
token_id
for token_id, added_token in added_tokens_decoder.items()
if getattr(added_token, "special", False)
}
if not special_token_ids:
return tokens
return [token for token in tokens if token not in special_token_ids]
def create_pretrained_tokenizer(
identifier: str, revision="main", auth_token: Optional[str] = None
):

172
package-lock.json generated
View File

@ -1,19 +1,19 @@
{
"name": "litellm",
"name": "litellm-dependency-refresh",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"dependencies": {
"prism-react-renderer": "^2.4.1",
"prisma": "^5.17.0",
"react-copy-to-clipboard": "^5.1.0"
"prism-react-renderer": "2.4.1",
"prisma": "5.17.0",
"react-copy-to-clipboard": "5.1.1"
},
"devDependencies": {
"@testing-library/jest-dom": "^6.8.0",
"@testing-library/react": "^14.3.1",
"@types/react-copy-to-clipboard": "^5.0.7",
"jest": "^29.7.0"
"@testing-library/jest-dom": "6.8.0",
"@testing-library/react": "14.3.1",
"@types/react-copy-to-clipboard": "5.0.7",
"jest": "29.7.0"
}
},
"node_modules/@adobe/css-tools": {
@ -529,29 +529,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/@isaacs/balanced-match": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz",
"integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==",
"dev": true,
"license": "MIT",
"engines": {
"node": "20 || >=22"
}
},
"node_modules/@isaacs/brace-expansion": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.0.tgz",
"integrity": "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@isaacs/balanced-match": "^4.0.1"
},
"engines": {
"node": "20 || >=22"
}
},
"node_modules/@istanbuljs/load-nyc-config": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz",
@ -957,48 +934,48 @@
}
},
"node_modules/@prisma/debug": {
"version": "5.22.0",
"resolved": "https://registry.npmjs.org/@prisma/debug/-/debug-5.22.0.tgz",
"integrity": "sha512-AUt44v3YJeggO2ZU5BkXI7M4hu9BF2zzH2iF2V5pyXT/lRTyWiElZ7It+bRH1EshoMRxHgpYg4VB6rCM+mG5jQ==",
"version": "5.17.0",
"resolved": "https://registry.npmjs.org/@prisma/debug/-/debug-5.17.0.tgz",
"integrity": "sha512-l7+AteR3P8FXiYyo496zkuoiJ5r9jLQEdUuxIxNCN1ud8rdbH3GTxm+f+dCyaSv9l9WY+29L9czaVRXz9mULfg==",
"license": "Apache-2.0"
},
"node_modules/@prisma/engines": {
"version": "5.22.0",
"resolved": "https://registry.npmjs.org/@prisma/engines/-/engines-5.22.0.tgz",
"integrity": "sha512-UNjfslWhAt06kVL3CjkuYpHAWSO6L4kDCVPegV6itt7nD1kSJavd3vhgAEhjglLJJKEdJ7oIqDJ+yHk6qO8gPA==",
"version": "5.17.0",
"resolved": "https://registry.npmjs.org/@prisma/engines/-/engines-5.17.0.tgz",
"integrity": "sha512-+r+Nf+JP210Jur+/X8SIPLtz+uW9YA4QO5IXA+KcSOBe/shT47bCcRMTYCbOESw3FFYFTwe7vU6KTWHKPiwvtg==",
"hasInstallScript": true,
"license": "Apache-2.0",
"dependencies": {
"@prisma/debug": "5.22.0",
"@prisma/engines-version": "5.22.0-44.605197351a3c8bdd595af2d2a9bc3025bca48ea2",
"@prisma/fetch-engine": "5.22.0",
"@prisma/get-platform": "5.22.0"
"@prisma/debug": "5.17.0",
"@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
"@prisma/fetch-engine": "5.17.0",
"@prisma/get-platform": "5.17.0"
}
},
"node_modules/@prisma/engines-version": {
"version": "5.22.0-44.605197351a3c8bdd595af2d2a9bc3025bca48ea2",
"resolved": "https://registry.npmjs.org/@prisma/engines-version/-/engines-version-5.22.0-44.605197351a3c8bdd595af2d2a9bc3025bca48ea2.tgz",
"integrity": "sha512-2PTmxFR2yHW/eB3uqWtcgRcgAbG1rwG9ZriSvQw+nnb7c4uCr3RAcGMb6/zfE88SKlC1Nj2ziUvc96Z379mHgQ==",
"version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
"resolved": "https://registry.npmjs.org/@prisma/engines-version/-/engines-version-5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053.tgz",
"integrity": "sha512-tUuxZZysZDcrk5oaNOdrBnnkoTtmNQPkzINFDjz7eG6vcs9AVDmA/F6K5Plsb2aQc/l5M2EnFqn3htng9FA4hg==",
"license": "Apache-2.0"
},
"node_modules/@prisma/fetch-engine": {
"version": "5.22.0",
"resolved": "https://registry.npmjs.org/@prisma/fetch-engine/-/fetch-engine-5.22.0.tgz",
"integrity": "sha512-bkrD/Mc2fSvkQBV5EpoFcZ87AvOgDxbG99488a5cexp5Ccny+UM6MAe/UFkUC0wLYD9+9befNOqGiIJhhq+HbA==",
"version": "5.17.0",
"resolved": "https://registry.npmjs.org/@prisma/fetch-engine/-/fetch-engine-5.17.0.tgz",
"integrity": "sha512-ESxiOaHuC488ilLPnrv/tM2KrPhQB5TRris/IeIV4ZvUuKeaicCl4Xj/JCQeG9IlxqOgf1cCg5h5vAzlewN91Q==",
"license": "Apache-2.0",
"dependencies": {
"@prisma/debug": "5.22.0",
"@prisma/engines-version": "5.22.0-44.605197351a3c8bdd595af2d2a9bc3025bca48ea2",
"@prisma/get-platform": "5.22.0"
"@prisma/debug": "5.17.0",
"@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
"@prisma/get-platform": "5.17.0"
}
},
"node_modules/@prisma/get-platform": {
"version": "5.22.0",
"resolved": "https://registry.npmjs.org/@prisma/get-platform/-/get-platform-5.22.0.tgz",
"integrity": "sha512-pHhpQdr1UPFpt+zFfnPazhulaZYCUqeIcPpJViYoq9R+D/yw4fjE+CtnsnKzPYm0ddUbeXUzjGVGIRVgPDCk4Q==",
"version": "5.17.0",
"resolved": "https://registry.npmjs.org/@prisma/get-platform/-/get-platform-5.17.0.tgz",
"integrity": "sha512-UlDgbRozCP1rfJ5Tlkf3Cnftb6srGrEQ4Nm3og+1Se2gWmCZ0hmPIi+tQikGDUVLlvOWx3Gyi9LzgRP+HTXV9w==",
"license": "Apache-2.0",
"dependencies": {
"@prisma/debug": "5.22.0"
"@prisma/debug": "5.17.0"
}
},
"node_modules/@sinclair/typebox": {
@ -1066,9 +1043,9 @@
"license": "MIT"
},
"node_modules/@testing-library/jest-dom": {
"version": "6.9.1",
"resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.9.1.tgz",
"integrity": "sha512-zIcONa+hVtVSSep9UT3jZ5rizo2BsxgyDYU7WFD5eICBE7no3881HGeb/QkGfsJs6JTkY1aQhT7rIPC7e+0nnA==",
"version": "6.8.0",
"resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.8.0.tgz",
"integrity": "sha512-WgXcWzVM6idy5JaftTVC8Vs83NKRmGJz4Hqs4oyOuO2J4r/y79vvKZsb+CaGyCSEbUPI6OsewfPd0G1A0/TUZQ==",
"dev": true,
"license": "MIT",
"dependencies": {
@ -1497,11 +1474,14 @@
}
},
"node_modules/balanced-match": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
"integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==",
"dev": true,
"license": "MIT"
"license": "MIT",
"engines": {
"node": "18 || 20 || >=22"
}
},
"node_modules/baseline-browser-mapping": {
"version": "2.8.30",
@ -1514,14 +1494,16 @@
}
},
"node_modules/brace-expansion": {
"version": "1.1.12",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
"version": "5.0.5",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
"integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
"balanced-match": "^4.0.2"
},
"engines": {
"node": "18 || 20 || >=22"
}
},
"node_modules/braces": {
@ -1791,13 +1773,6 @@
"dev": true,
"license": "MIT"
},
"node_modules/concat-map": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
"integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
"dev": true,
"license": "MIT"
},
"node_modules/convert-source-map": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
@ -2258,6 +2233,7 @@
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"optional": true,
@ -4090,16 +4066,16 @@
}
},
"node_modules/minimatch": {
"version": "10.1.1",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.1.1.tgz",
"integrity": "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==",
"version": "10.2.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz",
"integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==",
"dev": true,
"license": "BlueOak-1.0.0",
"dependencies": {
"@isaacs/brace-expansion": "^5.0.0"
"brace-expansion": "^5.0.5"
},
"engines": {
"node": "20 || >=22"
"node": "18 || 20 || >=22"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
@ -4388,9 +4364,9 @@
"license": "ISC"
},
"node_modules/picomatch": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
"integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
"integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
"dev": true,
"license": "MIT",
"engines": {
@ -4475,22 +4451,19 @@
}
},
"node_modules/prisma": {
"version": "5.22.0",
"resolved": "https://registry.npmjs.org/prisma/-/prisma-5.22.0.tgz",
"integrity": "sha512-vtpjW3XuYCSnMsNVBjLMNkTj6OZbudcPPTPYHqX0CJfpcdWciI1dM8uHETwmDxxiqEwCIE6WvXucWUetJgfu/A==",
"version": "5.17.0",
"resolved": "https://registry.npmjs.org/prisma/-/prisma-5.17.0.tgz",
"integrity": "sha512-m4UWkN5lBE6yevqeOxEvmepnL5cNPEjzMw2IqDB59AcEV6w7D8vGljDLd1gPFH+W6gUxw9x7/RmN5dCS/WTPxA==",
"hasInstallScript": true,
"license": "Apache-2.0",
"dependencies": {
"@prisma/engines": "5.22.0"
"@prisma/engines": "5.17.0"
},
"bin": {
"prisma": "build/index.js"
},
"engines": {
"node": ">=16.13"
},
"optionalDependencies": {
"fsevents": "2.3.3"
}
},
"node_modules/prompts": {
@ -4555,16 +4528,16 @@
}
},
"node_modules/react-copy-to-clipboard": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.0.tgz",
"integrity": "sha512-k61RsNgAayIJNoy9yDsYzDe/yAZAzEbEgcz3DZMhF686LEyukcE1hzurxe85JandPUG+yTfGVFzuEw3xt8WP/A==",
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.1.tgz",
"integrity": "sha512-s+HrzLyJBxrpGTYXF15dTgMjAJpEPZT/Yp6NytAtZMRngejxt6Pt5WrfFxLAcsqUDU6sY1Jz6tyHwIicE1U2Xg==",
"license": "MIT",
"dependencies": {
"copy-to-clipboard": "^3.3.1",
"copy-to-clipboard": "^3.3.3",
"prop-types": "^15.8.1"
},
"peerDependencies": {
"react": "^15.3.0 || 16 || 17 || 18"
"react": ">=15.3.0"
}
},
"node_modules/react-dom": {
@ -5068,19 +5041,6 @@
"node": ">=8"
}
},
"node_modules/test-exclude/node_modules/minimatch": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
"dev": true,
"license": "ISC",
"dependencies": {
"brace-expansion": "^1.1.7"
},
"engines": {
"node": "*"
}
},
"node_modules/tmpl": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz",

View File

@ -2,7 +2,7 @@
"dependencies": {
"prism-react-renderer": "2.4.1",
"prisma": "5.17.0",
"react-copy-to-clipboard": "5.1.0"
"react-copy-to-clipboard": "5.1.1"
},
"devDependencies": {
"@testing-library/jest-dom": "6.8.0",
@ -12,7 +12,8 @@
},
"overrides": {
"glob": "13.0.0",
"minimatch": "10.1.1",
"minimatch": "10.2.5",
"picomatch": "2.3.2",
"@isaacs/brace-expansion": "5.0.0",
"@babel/traverse": "7.28.5",
"braces": "3.0.3"

View File

@ -12,11 +12,11 @@ authors = [
dependencies = [
"fastuuid==0.14.0",
"httpx==0.28.1",
"openai==2.24.0",
"openai==2.33.0",
"python-dotenv==1.2.2",
"tiktoken==0.12.0",
"importlib-metadata==8.5.0",
"tokenizers==0.22.2",
"tokenizers==0.23.1",
"click==8.1.8",
"jinja2==3.1.6",
"aiohttp==3.13.4",
@ -44,11 +44,11 @@ proxy = [
"apscheduler==3.11.2",
"fastapi-sso==0.19.0",
"PyJWT==2.12.0",
"python-multipart==0.0.26",
"python-multipart==0.0.27",
"cryptography==46.0.7",
"pynacl==1.6.2",
"websockets==15.0.1",
"boto3==1.42.59",
"boto3==1.43.1",
"azure-identity==1.25.2",
"azure-storage-blob==12.28.0",
"mcp==1.26.0",
@ -120,9 +120,9 @@ dev = [
"flake8==7.3.0",
"black==24.10.0",
"mypy==1.19.0",
"pytest==8.3.5",
"pytest==9.0.3",
"pytest-mock==3.15.1",
"pytest-asyncio==1.2.0",
"pytest-asyncio==1.3.0",
"pytest-postgresql==7.0.2",
# pytest-postgresql imports psycopg v3 during pytest startup. Keep the base
# package and the binary wheel in the default dev environment so local
@ -191,7 +191,7 @@ ci = [
"pylint==4.0.5",
"pyright==1.1.408",
"langchain-mcp-adapters==0.2.1",
"langchain-openai==1.1.10",
"langchain-openai==1.1.14",
"langgraph==1.0.10",
# langgraph-prebuilt 1.0.9 imports ExecutionInfo/ServerInfo from
# langgraph.runtime, which is not exported until langgraph 1.1.0.
@ -205,7 +205,7 @@ healthcheck = [
]
[build-system]
requires = ["uv_build==0.10.7"]
requires = ["uv_build==0.11.8"]
build-backend = "uv_build"
[tool.uv]

View File

@ -150,6 +150,7 @@ jaraco.context: >=6.1.0 # Unknown license
pypdf: >=6.6.2 # BSD-3-Clause license - https://github.com/py-pdf/pypdf/blob/main/LICENSE
hf-xet: >=1.4.2 # Apache 2.0 License - https://github.com/huggingface/xet-tools/blob/main/LICENSE
pytest-asyncio: >=1.2.0 # Apache 2.0 license
pytest: >=9.0.3 # MIT license
pytest-postgresql: >=7.0.2 # LGPLv3+ license
pytest-xdist: >=3.8.0 # MIT License
ruff: >=0.15.3 # MIT License

View File

@ -171,6 +171,25 @@ class TestHelperFunctions:
mock_response.choices[0].message.content = "Hello from LLM"
assert _extract_response_text(mock_response) == "Hello from LLM"
def test_extract_response_text_combines_all_choices(self):
from litellm.proxy.guardrails.guardrail_hooks.semantic_guard.semantic_guard import (
_extract_response_text,
)
first_choice = MagicMock()
first_choice.message.content = "first response"
second_choice = MagicMock()
second_choice.message.content = [
{"type": "text", "text": "second"},
{"type": "text", "text": "response"},
]
mock_response = MagicMock()
mock_response.choices = [first_choice, second_choice]
assert (
_extract_response_text(mock_response) == "first response\nsecond response"
)
def test_extract_response_text_empty(self):
from litellm.proxy.guardrails.guardrail_hooks.semantic_guard.semantic_guard import (
_extract_response_text,

View File

@ -18,6 +18,7 @@ from litellm import Router
# this tests debug logs from litellm router and litellm proxy server
from litellm._logging import verbose_logger, verbose_proxy_logger, verbose_router_logger
from litellm.llms.custom_httpx.async_client_cleanup import close_litellm_async_clients
# this tests debug logs from litellm router and litellm proxy server
@ -74,6 +75,9 @@ def test_async_fallbacks(caplog):
pytest.fail(f"An exception occurred: {e}")
finally:
router.reset()
# Close cached aiohttp/httpx clients before the event loop ends
# to prevent "Unclosed client session" / "Unclosed connector" warnings.
await close_litellm_async_clients()
asyncio.run(_make_request())
captured_logs = [rec.message for rec in caplog.records]

View File

@ -3640,7 +3640,7 @@ def test_mock_response_iterator_tool_use():
[
# "deepseek/deepseek-reasoner",
# "anthropic/claude-3-7-sonnet-20250219",
"openrouter/anthropic/claude-3.7-sonnet",
"openrouter/anthropic/claude-sonnet-4.5",
],
)
def test_reasoning_content_completion(model):

View File

@ -8,8 +8,8 @@
"name": "litellm-pass-through-tests",
"version": "0.0.0",
"dependencies": {
"@google-cloud/vertexai": "1.9.3",
"@google/generative-ai": "0.21.0"
"@google-cloud/vertexai": "1.12.0",
"@google/generative-ai": "0.24.1"
},
"devDependencies": {
"jest": "29.7.0"
@ -512,21 +512,46 @@
"license": "MIT"
},
"node_modules/@google-cloud/vertexai": {
"version": "1.9.3",
"resolved": "https://registry.npmjs.org/@google-cloud/vertexai/-/vertexai-1.9.3.tgz",
"integrity": "sha512-35o5tIEMLW3JeFJOaaMNR2e5sq+6rpnhrF97PuAxeOm0GlqVTESKhkGj7a5B5mmJSSSU3hUfIhcQCRRsw4Ipzg==",
"version": "1.12.0",
"resolved": "https://registry.npmjs.org/@google-cloud/vertexai/-/vertexai-1.12.0.tgz",
"integrity": "sha512-XMJIk7GIeavFLP5A3YEUlowKa5Y5PZRrnnuTJcqR0k+lFKkv7+IWpdRp+Xbqb8xNDrvQaE2hP2RYPUylyD5EdA==",
"license": "Apache-2.0",
"dependencies": {
"@google/genai": "^1.45.0",
"google-auth-library": "^9.1.0"
},
"engines": {
"node": ">=18.0.0"
}
},
"node_modules/@google/genai": {
"version": "1.51.0",
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.51.0.tgz",
"integrity": "sha512-vTZZF3CSimN7cn2zsLpW2p5WF0eZa5Gz69ITMPCNHpPrDlAstOfGifSfi0p/s9Z9400f7xJRkgvkQNrcM7pJ6w==",
"hasInstallScript": true,
"license": "Apache-2.0",
"dependencies": {
"google-auth-library": "^10.3.0",
"p-retry": "^4.6.2",
"protobufjs": "^7.5.4",
"ws": "^8.18.0"
},
"engines": {
"node": ">=20.0.0"
},
"peerDependencies": {
"@modelcontextprotocol/sdk": "^1.25.2"
},
"peerDependenciesMeta": {
"@modelcontextprotocol/sdk": {
"optional": true
}
}
},
"node_modules/@google/generative-ai": {
"version": "0.21.0",
"resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.21.0.tgz",
"integrity": "sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg==",
"version": "0.24.1",
"resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz",
"integrity": "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==",
"license": "Apache-2.0",
"engines": {
"node": ">=18.0.0"
@ -901,6 +926,70 @@
"@jridgewell/sourcemap-codec": "^1.4.14"
}
},
"node_modules/@protobufjs/aspromise": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
"integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/base64": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
"integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/codegen": {
"version": "2.0.5",
"resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.5.tgz",
"integrity": "sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/eventemitter": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
"integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/fetch": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
"integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
"license": "BSD-3-Clause",
"dependencies": {
"@protobufjs/aspromise": "^1.1.1",
"@protobufjs/inquire": "^1.1.0"
}
},
"node_modules/@protobufjs/float": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
"integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/inquire": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.1.tgz",
"integrity": "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/path": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
"integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/pool": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
"integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
"license": "BSD-3-Clause"
},
"node_modules/@protobufjs/utf8": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.1.tgz",
"integrity": "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg==",
"license": "BSD-3-Clause"
},
"node_modules/@sinclair/typebox": {
"version": "0.27.10",
"resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.10.tgz",
@ -1014,12 +1103,17 @@
"version": "25.6.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-25.6.0.tgz",
"integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"undici-types": "~7.19.0"
}
},
"node_modules/@types/retry": {
"version": "0.12.0",
"resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz",
"integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==",
"license": "MIT"
},
"node_modules/@types/stack-utils": {
"version": "2.0.3",
"resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.3.tgz",
@ -1560,6 +1654,15 @@
"node": ">= 8"
}
},
"node_modules/data-uri-to-buffer": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
"integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==",
"license": "MIT",
"engines": {
"node": ">= 12"
}
},
"node_modules/debug": {
"version": "4.4.3",
"resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@ -1785,6 +1888,29 @@
"bser": "2.1.1"
}
},
"node_modules/fetch-blob": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
"integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "paypal",
"url": "https://paypal.me/jimmywarting"
}
],
"license": "MIT",
"dependencies": {
"node-domexception": "^1.0.0",
"web-streams-polyfill": "^3.0.3"
},
"engines": {
"node": "^12.20 || >= 14.13"
}
},
"node_modules/fill-range": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
@ -1812,6 +1938,18 @@
"node": ">=8"
}
},
"node_modules/formdata-polyfill": {
"version": "4.0.10",
"resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
"integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
"license": "MIT",
"dependencies": {
"fetch-blob": "^3.1.2"
},
"engines": {
"node": ">=12.20.0"
}
},
"node_modules/fs.realpath": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
@ -1845,33 +1983,31 @@
}
},
"node_modules/gaxios": {
"version": "6.7.1",
"resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz",
"integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==",
"version": "7.1.4",
"resolved": "https://registry.npmjs.org/gaxios/-/gaxios-7.1.4.tgz",
"integrity": "sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA==",
"license": "Apache-2.0",
"dependencies": {
"extend": "^3.0.2",
"https-proxy-agent": "^7.0.1",
"is-stream": "^2.0.0",
"node-fetch": "^2.6.9",
"uuid": "^9.0.1"
"node-fetch": "^3.3.2"
},
"engines": {
"node": ">=14"
"node": ">=18"
}
},
"node_modules/gcp-metadata": {
"version": "6.1.1",
"resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz",
"integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==",
"version": "8.1.2",
"resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-8.1.2.tgz",
"integrity": "sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==",
"license": "Apache-2.0",
"dependencies": {
"gaxios": "^6.1.1",
"google-logging-utils": "^0.0.2",
"gaxios": "^7.0.0",
"google-logging-utils": "^1.0.0",
"json-bigint": "^1.0.0"
},
"engines": {
"node": ">=14"
"node": ">=18"
}
},
"node_modules/gensync": {
@ -1940,26 +2076,26 @@
}
},
"node_modules/google-auth-library": {
"version": "9.15.1",
"resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz",
"integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==",
"version": "10.6.2",
"resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-10.6.2.tgz",
"integrity": "sha512-e27Z6EThmVNNvtYASwQxose/G57rkRuaRbQyxM2bvYLLX/GqWZ5chWq2EBoUchJbCc57eC9ArzO5wMsEmWftCw==",
"license": "Apache-2.0",
"dependencies": {
"base64-js": "^1.3.0",
"ecdsa-sig-formatter": "^1.0.11",
"gaxios": "^6.1.1",
"gcp-metadata": "^6.1.0",
"gtoken": "^7.0.0",
"gaxios": "^7.1.4",
"gcp-metadata": "8.1.2",
"google-logging-utils": "1.1.3",
"jws": "^4.0.0"
},
"engines": {
"node": ">=14"
"node": ">=18"
}
},
"node_modules/google-logging-utils": {
"version": "0.0.2",
"resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz",
"integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==",
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-1.1.3.tgz",
"integrity": "sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==",
"license": "Apache-2.0",
"engines": {
"node": ">=14"
@ -1972,19 +2108,6 @@
"dev": true,
"license": "ISC"
},
"node_modules/gtoken": {
"version": "7.1.0",
"resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz",
"integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==",
"license": "MIT",
"dependencies": {
"gaxios": "^6.0.0",
"jws": "^4.0.0"
},
"engines": {
"node": ">=14.0.0"
}
},
"node_modules/has-flag": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
@ -2144,6 +2267,7 @@
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz",
"integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==",
"dev": true,
"license": "MIT",
"engines": {
"node": ">=8"
@ -2963,6 +3087,12 @@
"node": ">=8"
}
},
"node_modules/long": {
"version": "5.3.2",
"resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
"integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
"license": "Apache-2.0"
},
"node_modules/lru-cache": {
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
@ -3069,24 +3199,42 @@
"dev": true,
"license": "MIT"
},
"node_modules/node-domexception": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
"deprecated": "Use your platform's native DOMException instead",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "github",
"url": "https://paypal.me/jimmywarting"
}
],
"license": "MIT",
"engines": {
"node": ">=10.5.0"
}
},
"node_modules/node-fetch": {
"version": "2.7.0",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
"version": "3.3.2",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz",
"integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==",
"license": "MIT",
"dependencies": {
"whatwg-url": "^5.0.0"
"data-uri-to-buffer": "^4.0.0",
"fetch-blob": "^3.1.4",
"formdata-polyfill": "^4.0.10"
},
"engines": {
"node": "4.x || >=6.0.0"
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
},
"peerDependencies": {
"encoding": "^0.1.0"
},
"peerDependenciesMeta": {
"encoding": {
"optional": true
}
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/node-fetch"
}
},
"node_modules/node-int64": {
@ -3197,6 +3345,19 @@
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/p-retry": {
"version": "4.6.2",
"resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz",
"integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==",
"license": "MIT",
"dependencies": {
"@types/retry": "0.12.0",
"retry": "^0.13.1"
},
"engines": {
"node": ">=8"
}
},
"node_modules/p-try": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
@ -3348,6 +3509,30 @@
"node": ">= 6"
}
},
"node_modules/protobufjs": {
"version": "7.5.6",
"resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.6.tgz",
"integrity": "sha512-M71sTMB146U3u0di3yup8iM+zv8yPRNQVr1KK4tyBitl3qFvEGucq/rGDRShD2rsJhtN02RJaJ7j5X5hmy8SJg==",
"hasInstallScript": true,
"license": "BSD-3-Clause",
"dependencies": {
"@protobufjs/aspromise": "^1.1.2",
"@protobufjs/base64": "^1.1.2",
"@protobufjs/codegen": "^2.0.5",
"@protobufjs/eventemitter": "^1.1.0",
"@protobufjs/fetch": "^1.1.0",
"@protobufjs/float": "^1.0.2",
"@protobufjs/inquire": "^1.1.1",
"@protobufjs/path": "^1.1.2",
"@protobufjs/pool": "^1.1.0",
"@protobufjs/utf8": "^1.1.1",
"@types/node": ">=13.7.0",
"long": "^5.0.0"
},
"engines": {
"node": ">=12.0.0"
}
},
"node_modules/pure-rand": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz",
@ -3437,6 +3622,15 @@
"node": ">=10"
}
},
"node_modules/retry": {
"version": "0.13.1",
"resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz",
"integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==",
"license": "MIT",
"engines": {
"node": ">= 4"
}
},
"node_modules/safe-buffer": {
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
@ -3691,12 +3885,6 @@
"node": ">=8.0"
}
},
"node_modules/tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
"license": "MIT"
},
"node_modules/type-detect": {
"version": "4.0.8",
"resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz",
@ -3724,7 +3912,6 @@
"version": "7.19.2",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.19.2.tgz",
"integrity": "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==",
"dev": true,
"license": "MIT"
},
"node_modules/update-browserslist-db": {
@ -3758,19 +3945,6 @@
"browserslist": ">= 4.21.0"
}
},
"node_modules/uuid": {
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
],
"license": "MIT",
"bin": {
"uuid": "dist/bin/uuid"
}
},
"node_modules/v8-to-istanbul": {
"version": "9.3.0",
"resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.3.0.tgz",
@ -3796,20 +3970,13 @@
"makeerror": "1.0.12"
}
},
"node_modules/webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
"license": "BSD-2-Clause"
},
"node_modules/whatwg-url": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
"node_modules/web-streams-polyfill": {
"version": "3.3.3",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
"integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==",
"license": "MIT",
"dependencies": {
"tr46": "~0.0.3",
"webidl-conversions": "^3.0.0"
"engines": {
"node": ">= 8"
}
},
"node_modules/which": {
@ -3867,6 +4034,27 @@
"node": "^12.13.0 || ^14.15.0 || >=16.0.0"
}
},
"node_modules/ws": {
"version": "8.20.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
"integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==",
"license": "MIT",
"engines": {
"node": ">=10.0.0"
},
"peerDependencies": {
"bufferutil": "^4.0.1",
"utf-8-validate": ">=5.0.2"
},
"peerDependenciesMeta": {
"bufferutil": {
"optional": true
},
"utf-8-validate": {
"optional": true
}
}
},
"node_modules/y18n": {
"version": "5.0.8",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",

View File

@ -4,10 +4,15 @@
"private": true,
"description": "JS pass-through tests for Vertex AI / Google AI Studio routes. CI-only; not published.",
"dependencies": {
"@google-cloud/vertexai": "1.9.3",
"@google/generative-ai": "0.21.0"
"@google-cloud/vertexai": "1.12.0",
"@google/generative-ai": "0.24.1"
},
"devDependencies": {
"jest": "29.7.0"
},
"overrides": {
"@google-cloud/vertexai": {
"google-auth-library": "10.6.2"
}
}
}

View File

@ -8,22 +8,22 @@
"name": "ui-unit-tests",
"version": "1.0.0",
"dependencies": {
"@ant-design/icons": "^5.0.0",
"antd": "^5.12.5",
"react": "^18.2.0",
"react-dom": "^18.2.0"
"@ant-design/icons": "5.6.1",
"antd": "5.29.1",
"react": "18.3.1",
"react-dom": "18.3.1"
},
"devDependencies": {
"@testing-library/jest-dom": "^6.0.0",
"@testing-library/react": "^14.0.0",
"@types/jest": "^29.5.0",
"@types/react": "^18.2.0",
"@types/react-dom": "^18.2.0",
"identity-obj-proxy": "^3.0.0",
"jest": "^29.5.0",
"jest-environment-jsdom": "^29.5.0",
"ts-jest": "^29.1.0",
"typescript": "^5.0.0"
"@testing-library/jest-dom": "6.9.1",
"@testing-library/react": "14.3.1",
"@types/jest": "29.5.14",
"@types/react": "18.3.27",
"@types/react-dom": "18.3.7",
"identity-obj-proxy": "3.0.0",
"jest": "29.7.0",
"jest-environment-jsdom": "29.7.0",
"ts-jest": "29.4.5",
"typescript": "5.9.3"
}
},
"node_modules/@adobe/css-tools": {
@ -647,29 +647,6 @@
"integrity": "sha512-OWORNpfjMsSSUBVrRBVGECkhWcULOAJz9ZW8uK9qgxD+87M7jHRcvh/A96XXNhXTLmKcoYSQtBEX7lHMO7YRwg==",
"license": "MIT"
},
"node_modules/@isaacs/balanced-match": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz",
"integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==",
"dev": true,
"license": "MIT",
"engines": {
"node": "20 || >=22"
}
},
"node_modules/@isaacs/brace-expansion": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.0.tgz",
"integrity": "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==",
"dev": true,
"license": "MIT",
"dependencies": {
"@isaacs/balanced-match": "^4.0.1"
},
"engines": {
"node": "20 || >=22"
}
},
"node_modules/@istanbuljs/load-nyc-config": {
"version": "1.1.0",
"resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz",
@ -1326,9 +1303,9 @@
}
},
"node_modules/@tootallnate/once": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-2.0.0.tgz",
"integrity": "sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==",
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-3.0.1.tgz",
"integrity": "sha512-VyMVKRrpHTT8PnotUeV8L/mDaMwD5DaAKCFLP73zAqAtvF0FCqky+Ki7BYbFCYQmqFyTe9316Ed5zS70QUR9eg==",
"dev": true,
"license": "MIT",
"engines": {
@ -1907,11 +1884,14 @@
}
},
"node_modules/balanced-match": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
"version": "4.0.4",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
"integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==",
"dev": true,
"license": "MIT"
"license": "MIT",
"engines": {
"node": "18 || 20 || >=22"
}
},
"node_modules/baseline-browser-mapping": {
"version": "2.8.30",
@ -1924,14 +1904,16 @@
}
},
"node_modules/brace-expansion": {
"version": "1.1.12",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
"version": "5.0.5",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
"integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
"balanced-match": "^4.0.2"
},
"engines": {
"node": "18 || 20 || >=22"
}
},
"node_modules/braces": {
@ -2230,13 +2212,6 @@
"integrity": "sha512-VRhuHOLoKYOy4UbilLbUzbYg93XLjv2PncJC50EuTWPA3gaja1UjBsUP/D/9/juV3vQFr6XBEzn9KCAHdUvOHw==",
"license": "MIT"
},
"node_modules/concat-map": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
"integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
"dev": true,
"license": "MIT"
},
"node_modules/convert-source-map": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
@ -3015,9 +2990,9 @@
"license": "ISC"
},
"node_modules/handlebars": {
"version": "4.7.8",
"resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.8.tgz",
"integrity": "sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==",
"version": "4.7.9",
"resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.9.tgz",
"integrity": "sha512-4E71E0rpOaQuJR2A3xDZ+GM1HyWYv1clR58tC8emQNeQe3RH7MAzSbat+V0wG78LQBo6m6bzSG/L4pBuCsgnUQ==",
"dev": true,
"license": "MIT",
"dependencies": {
@ -4920,16 +4895,16 @@
}
},
"node_modules/minimatch": {
"version": "10.1.1",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.1.1.tgz",
"integrity": "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==",
"version": "10.2.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz",
"integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==",
"dev": true,
"license": "BlueOak-1.0.0",
"dependencies": {
"@isaacs/brace-expansion": "^5.0.0"
"brace-expansion": "^5.0.5"
},
"engines": {
"node": "20 || >=22"
"node": "18 || 20 || >=22"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
@ -5246,9 +5221,9 @@
"license": "ISC"
},
"node_modules/picomatch": {
"version": "2.3.1",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
"integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
"integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
"dev": true,
"license": "MIT",
"engines": {
@ -6562,19 +6537,6 @@
"node": ">=8"
}
},
"node_modules/test-exclude/node_modules/minimatch": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
"dev": true,
"license": "ISC",
"dependencies": {
"brace-expansion": "^1.1.7"
},
"engines": {
"node": "*"
}
},
"node_modules/throttle-debounce": {
"version": "5.0.2",
"resolved": "https://registry.npmjs.org/throttle-debounce/-/throttle-debounce-5.0.2.tgz",

View File

@ -25,10 +25,13 @@
},
"overrides": {
"glob": "13.0.0",
"minimatch": "10.1.1",
"minimatch": "10.2.5",
"picomatch": "2.3.2",
"handlebars": "4.7.9",
"@tootallnate/once": "3.0.1",
"@isaacs/brace-expansion": "5.0.0",
"@babel/traverse": "7.28.5",
"ws": "8.18.3",
"braces": "3.0.3"
}
}
}

View File

@ -128,8 +128,8 @@ async def get_spend_info(session, entity_type: str, entity_id: str):
async def get_proxy_readiness(session):
"""Fetch /health/readiness. Used both as a fail-fast gate and as a diagnostic on poll timeout."""
url = "http://0.0.0.0:4000/health/readiness"
"""Fetch authenticated readiness details. Used both as a fail-fast gate and as a diagnostic on poll timeout."""
url = "http://0.0.0.0:4000/health/readiness/details"
headers = {"Authorization": "Bearer sk-1234"}
async with session.get(url, headers=headers) as response:
return response.status, await response.json()
@ -140,7 +140,7 @@ async def assert_proxy_healthy(session):
status, body = await get_proxy_readiness(session)
if status != 200 or body.get("db") != "connected":
pytest.fail(
f"Proxy /health/readiness unhealthy (status={status}). "
f"Proxy /health/readiness/details unhealthy (status={status}). "
f"Cannot run spend accuracy test. Response: {body}"
)
print(f"Proxy readiness OK: {body}")

View File

@ -73,13 +73,32 @@ async def test_health_readiness():
response_json = await response.json()
print(response_json)
assert "litellm_version" in response_json
assert "status" in response_json
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
@pytest.mark.asyncio
async def test_health_readiness_details():
"""
Check if authenticated readiness diagnostics expose version metadata.
"""
async with aiohttp.ClientSession() as session:
url = "http://0.0.0.0:4000/health/readiness/details"
headers = {"Authorization": "Bearer sk-1234"}
async with session.get(url, headers=headers) as response:
status = response.status
response_json = await response.json()
print(response_json)
assert "status" in response_json
assert "litellm_version" in response_json
if status != 200:
raise Exception(f"Request did not return a 200 status code: {status}")
@pytest.mark.asyncio
async def test_health_liveliness():
"""

View File

@ -0,0 +1,125 @@
"""SSTI regression coverage for non-dotprompt prompt managers.
DotpromptManager was hardened to render through
``ImmutableSandboxedEnvironment``. The sibling managers (gitlab, arize,
bitbucket) ship the exact same attacker-controlled-template surface
repository write access or workspace edit access turns into RCE on the
proxy host if the renderer is unsandboxed. This suite locks in the sandbox
so the regression can't recur.
"""
from unittest.mock import MagicMock
import pytest
from jinja2.exceptions import SecurityError
from jinja2.sandbox import ImmutableSandboxedEnvironment
from litellm.integrations.arize.arize_phoenix_prompt_manager import (
ArizePhoenixTemplateManager,
)
from litellm.integrations.bitbucket.bitbucket_prompt_manager import (
BitBucketTemplateManager,
)
from litellm.integrations.gitlab.gitlab_prompt_manager import GitLabTemplateManager
# Classic Jinja2 SSTI payloads. Any one of these rendering as anything other
# than the literal string (or raising) means the sandbox isn't engaged.
_SSTI_PAYLOADS = [
"{{ ''.__class__.__mro__[1].__subclasses__() }}",
"{{ config.__class__.__init__.__globals__['os'].popen('id').read() }}",
"{{ cycler.__init__.__globals__.os.popen('id').read() }}",
"{{ ().__class__.__bases__[0].__subclasses__() }}",
]
def _build_gitlab_manager() -> GitLabTemplateManager:
# The constructor calls into a GitLab client when prompt_id is set; pass
# None so __init__ stops at jinja_env construction and we can assert on it.
return GitLabTemplateManager(
gitlab_config={"project": "p", "access_token": "t", "branch": "main"},
prompt_id=None,
gitlab_client=MagicMock(),
)
def _build_bitbucket_manager(monkeypatch) -> BitBucketTemplateManager:
# Stub the BitBucket client so we don't need network or real config.
from litellm.integrations.bitbucket import bitbucket_prompt_manager
monkeypatch.setattr(
bitbucket_prompt_manager, "BitBucketClient", lambda *a, **kw: MagicMock()
)
return BitBucketTemplateManager(
bitbucket_config={"workspace": "w", "repository": "r", "access_token": "t"},
prompt_id=None,
)
def _build_arize_manager(monkeypatch) -> ArizePhoenixTemplateManager:
from litellm.integrations.arize import arize_phoenix_prompt_manager
monkeypatch.setattr(
arize_phoenix_prompt_manager, "ArizePhoenixClient", lambda *a, **kw: MagicMock()
)
return ArizePhoenixTemplateManager(
api_key="k",
api_base="https://example.test",
prompt_id=None,
)
@pytest.mark.parametrize(
"manager_factory",
[
("gitlab", lambda mp: _build_gitlab_manager()),
("bitbucket", _build_bitbucket_manager),
("arize", _build_arize_manager),
],
ids=lambda v: v[0] if isinstance(v, tuple) else v,
)
def test_jinja_env_is_sandboxed(manager_factory, monkeypatch):
"""Each prompt manager must render via ``ImmutableSandboxedEnvironment``."""
_, factory = manager_factory
manager = factory(monkeypatch)
assert isinstance(manager.jinja_env, ImmutableSandboxedEnvironment)
@pytest.mark.parametrize(
"manager_factory",
[
("gitlab", lambda mp: _build_gitlab_manager()),
("bitbucket", _build_bitbucket_manager),
("arize", _build_arize_manager),
],
ids=lambda v: v[0] if isinstance(v, tuple) else v,
)
@pytest.mark.parametrize("payload", _SSTI_PAYLOADS)
def test_jinja_env_blocks_ssti_payloads(manager_factory, payload, monkeypatch):
"""Attribute-traversal payloads must raise ``SecurityError`` at render time.
A plain ``Environment()`` would happily evaluate these and execute
arbitrary Python on the proxy host.
"""
_, factory = manager_factory
manager = factory(monkeypatch)
template = manager.jinja_env.from_string(payload)
with pytest.raises(SecurityError):
template.render()
@pytest.mark.parametrize(
"manager_factory",
[
("gitlab", lambda mp: _build_gitlab_manager()),
("bitbucket", _build_bitbucket_manager),
("arize", _build_arize_manager),
],
ids=lambda v: v[0] if isinstance(v, tuple) else v,
)
def test_jinja_env_still_renders_normal_variables(manager_factory, monkeypatch):
"""The sandbox is a strict superset for the legitimate use case — plain
``{{ var }}`` substitution must keep working unchanged."""
_, factory = manager_factory
manager = factory(monkeypatch)
template = manager.jinja_env.from_string("Hello {{ name }}!")
assert template.render(name="world") == "Hello world!"

View File

@ -0,0 +1,37 @@
from tokenizers import AddedToken, Tokenizer
from tokenizers.models import WordLevel
from tokenizers.pre_tokenizers import Whitespace
from tokenizers.processors import TemplateProcessing
from litellm import decode, encode
def _create_custom_tokenizer():
tokenizer = Tokenizer(
WordLevel({"[UNK]": 0, "Hello": 1, "World": 2}, unk_token="[UNK]")
)
tokenizer.pre_tokenizer = Whitespace()
tokenizer.add_special_tokens([AddedToken("[BOS]", special=True)])
bos_token_id = tokenizer.token_to_id("[BOS]")
assert bos_token_id is not None
tokenizer.post_processor = TemplateProcessing(
single="[BOS] $A",
special_tokens=[("[BOS]", bos_token_id)],
)
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
def test_decode_can_preserve_huggingface_special_tokens():
custom_tokenizer = _create_custom_tokenizer()
sample_text = "Hello World"
tokens = encode(text=sample_text, custom_tokenizer=custom_tokenizer)
decoded_text = decode(tokens=tokens, custom_tokenizer=custom_tokenizer)
decoded_text_with_special_tokens = decode(
tokens=tokens,
custom_tokenizer=custom_tokenizer,
skip_special_tokens=False,
)
assert decoded_text == sample_text
assert decoded_text_with_special_tokens == "[BOS] Hello World"

View File

@ -1493,6 +1493,7 @@ def test_observability_ban_covers_canonical_supported_callback_params():
safe is an explicit decision recorded in
``_SAFE_CLIENT_CALLBACK_PARAMS``."""
from litellm.litellm_core_utils.initialize_dynamic_callback_params import (
_request_blocked_callback_params,
_supported_callback_params,
)
from litellm.proxy.auth.auth_utils import (
@ -1508,3 +1509,8 @@ def test_observability_ban_covers_canonical_supported_callback_params():
f"informational per-request field; otherwise the derivation will "
f"ban it automatically."
)
for param in _request_blocked_callback_params:
assert param in banned, (
f"{param} is in _request_blocked_callback_params but is not banned "
"at the proxy request-body boundary."
)

View File

@ -10,9 +10,11 @@ sys.path.insert(
import pytest
import litellm
import litellm.proxy.proxy_server
from litellm.caching.dual_cache import DualCache
from litellm.proxy._types import (
LiteLLMRoutes,
LiteLLM_JWTAuth,
LiteLLM_BudgetTable,
LiteLLM_EndUserTable,
@ -27,6 +29,7 @@ from litellm.proxy.auth.handle_jwt import JWTHandler
from litellm.proxy.auth.auth_checks import get_key_object, _cache_key_object
from litellm.proxy.auth.route_checks import RouteChecks
from litellm.proxy.auth.user_api_key_auth import (
_route_requires_auth_despite_public,
_reserve_budget_after_common_checks,
_run_centralized_common_checks,
_run_post_custom_auth_checks,
@ -59,6 +62,29 @@ def test_get_api_key():
) == (api_key, passed_in_key)
def test_route_requires_auth_despite_public_for_metrics(monkeypatch):
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
assert _route_requires_auth_despite_public("/metrics", {}) is True
assert _route_requires_auth_despite_public("/metrics/", {}) is True
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", False)
assert _route_requires_auth_despite_public("/metrics", {}) is False
def test_public_ai_hub_routes_remain_public():
for route in (
"/public/model_hub",
"/public/model_hub/info",
"/public/agent_hub",
"/public/mcp_hub",
"/public/skill_hub",
):
assert route in LiteLLMRoutes.public_routes.value
assert _route_requires_auth_despite_public(route, {}) is False
@pytest.mark.asyncio
async def test_should_clear_stale_budget_reservation_when_budget_checks_skip():
user_api_key_auth_obj = UserAPIKeyAuth(
@ -2352,18 +2378,18 @@ async def test_centralized_common_checks_short_circuits_when_master_key_unset():
@pytest.mark.asyncio
async def test_centralized_common_checks_skips_public_routes():
"""Regression: public routes (e.g. /health/readiness) are exempted
"""Regression: public routes (e.g. /health/liveness) are exempted
by the builder fast-path. The wrapper must not retroactively run
common_checks on top the synthetic INTERNAL_USER_VIEW_ONLY token
has no user_id, so common_checks would reject the request as
admin-only. Breaks k8s readiness probes when master_key is set."""
admin-only."""
import litellm.proxy.proxy_server as _proxy_server_mod
from fastapi import Request
from starlette.datastructures import URL
token = UserAPIKeyAuth(user_role=LitellmUserRoles.INTERNAL_USER_VIEW_ONLY)
request = Request(scope={"type": "http"})
request._url = URL(url="/health/readiness")
request._url = URL(url="/health/liveness")
attrs = _proxy_attrs_for_centralized_checks(user_custom_auth=None)
originals = {a: getattr(_proxy_server_mod, a, None) for a in attrs}
@ -2378,7 +2404,7 @@ async def test_centralized_common_checks_skips_public_routes():
user_api_key_auth_obj=token,
request=request,
request_data={},
route="/health/readiness",
route="/health/liveness",
)
mock_checks.assert_not_awaited()
finally:

View File

@ -232,6 +232,52 @@ async def test_azure_text_moderation_guardrail_post_call_success_hook():
assert mock_async_make_request.call_args.kwargs["text"] == "Hello world"
@pytest.mark.asyncio
async def test_azure_text_moderation_guardrail_post_call_checks_all_choices():
azure_text_moderation_guardrail = AzureContentSafetyTextModerationGuardrail(
guardrail_name="azure_text_moderation",
api_key="azure_text_moderation_api_key",
api_base="azure_text_moderation_api_base",
)
with patch.object(
azure_text_moderation_guardrail, "async_make_request"
) as mock_async_make_request:
mock_async_make_request.side_effect = [
{
"blocklistsMatch": [],
"categoriesAnalysis": [{"category": "Hate", "severity": 0}],
},
HTTPException(
status_code=400,
detail={"error": "blocked second choice"},
),
]
with pytest.raises(HTTPException):
await azure_text_moderation_guardrail.async_post_call_success_hook(
data={},
user_api_key_dict=UserAPIKeyAuth(
api_key="azure_text_moderation_api_key"
),
response=ModelResponse(
choices=[
Choices(
index=0,
message=Message(content="safe response"),
),
Choices(
index=1,
message=Message(content="unsafe response"),
),
]
),
)
assert [
call.kwargs["text"] for call in mock_async_make_request.call_args_list
] == ["safe response", "unsafe response"]
@pytest.mark.asyncio
async def test_azure_text_moderation_guardrail_post_call_streaming_hook():

View File

@ -453,6 +453,71 @@ class TestContentFilterGuardrail:
assert "[EMAIL_REDACTED]" in full_content
assert "Contact me at [EMAIL_REDACTED] for info" in full_content
@pytest.mark.asyncio
async def test_streaming_hook_mask_checks_all_choices(self):
from litellm.types.utils import Delta, ModelResponseStream, StreamingChoices
patterns = [
ContentFilterPattern(
pattern_type="prebuilt",
pattern_name="email",
action=ContentFilterAction.MASK,
),
]
guardrail = ContentFilterGuardrail(
guardrail_name="test-streaming-mask-all-choices",
patterns=patterns,
event_hook=GuardrailEventHooks.during_call,
)
async def mock_stream():
yield ModelResponseStream(
id="chunk1",
choices=[
StreamingChoices(
delta=Delta(content="Contact first@ex"),
index=0,
),
StreamingChoices(
delta=Delta(content="Email second@ex"),
index=1,
),
],
model="gpt-4",
)
yield ModelResponseStream(
id="chunk2",
choices=[
StreamingChoices(
delta=Delta(content="ample.com for help"),
index=0,
finish_reason="stop",
),
StreamingChoices(
delta=Delta(content="ample.com for support"),
index=1,
finish_reason="stop",
),
],
model="gpt-4",
)
content_by_choice = {0: "", 1: ""}
async for chunk in guardrail.async_post_call_streaming_iterator_hook(
user_api_key_dict=MagicMock(),
response=mock_stream(),
request_data={},
):
for choice in chunk.choices:
if choice.delta.content:
content_by_choice[choice.index] += choice.delta.content
assert "first@example.com" not in content_by_choice[0]
assert "second@example.com" not in content_by_choice[1]
assert content_by_choice[0] == "Contact [EMAIL_REDACTED] for help"
assert content_by_choice[1] == "Email [EMAIL_REDACTED] for support"
@pytest.mark.asyncio
async def test_streaming_hook_block(self):
"""

View File

@ -6,7 +6,6 @@ branch coverage. Network calls are always mocked; the companion live
suite lives in ``test_xecguard_live.py``.
"""
import asyncio
import os
from unittest.mock import MagicMock, patch
@ -1196,6 +1195,26 @@ class TestXecGuardMessageAssembly:
is None
)
def test_extract_assistant_text_combines_all_choices(self, xecguard_guardrail):
assert (
xecguard_guardrail._extract_assistant_text_from_response(
{
"choices": [
{"message": {"content": "first response"}},
{
"message": {
"content": [
{"type": "text", "text": "second"},
{"type": "text", "text": "response"},
]
}
},
]
}
)
== "first response\nsecond\nresponse"
)
def test_synthesize_user_inputs_not_dict(self, xecguard_guardrail):
assert xecguard_guardrail._synthesize_user_from_inputs("not-dict") is None

View File

@ -0,0 +1,303 @@
"""Tests for the shared guardrail content extraction helpers."""
from litellm.proxy.guardrails._content_utils import (
apply_redacted_messages_back,
build_inspection_messages,
has_non_string_content,
iter_message_text,
walk_user_text,
)
# ── iter_message_text ────────────────────────────────────────────────────────────
def test_iter_message_text_string_messages():
data = {
"messages": [
{"role": "user", "content": "hello"},
{"role": "assistant", "content": "hi"},
]
}
assert list(iter_message_text(data)) == ["hello", "hi"]
def test_iter_message_text_multimodal_list_content():
"""VERIA-11: list-format content must be inspected, not silently skipped."""
data = {
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "AWS_KEY=AKIA..."},
{"type": "image_url", "image_url": {"url": "..."}},
{"type": "text", "text": "more secrets"},
],
}
]
}
assert list(iter_message_text(data)) == ["AWS_KEY=AKIA...", "more secrets"]
def test_iter_message_text_responses_api_string_input():
"""fniVO9-F: Responses-API ``input`` must be inspectable when ``messages`` absent."""
data = {"input": "tell me a secret"}
assert list(iter_message_text(data)) == ["tell me a secret"]
def test_iter_message_text_responses_api_list_input_messages():
data = {
"input": [
{"role": "user", "content": "first"},
{"role": "user", "content": "second"},
]
}
assert list(iter_message_text(data)) == ["first", "second"]
def test_iter_message_text_responses_api_list_input_content_parts():
data = {
"input": [
{"type": "text", "text": "alpha"},
{"type": "image_url", "image_url": {"url": "..."}},
{"type": "text", "text": "beta"},
]
}
assert list(iter_message_text(data)) == ["alpha", "beta"]
def test_iter_message_text_responses_api_list_input_mixed_dicts_and_strings():
"""Greptile P2: mixed-list ``input`` with content-part dicts AND bare
strings must yield every text fragment read helpers used to truncate
the bare strings."""
data = {
"input": [
{"type": "text", "text": "from-dict"},
"from-bare-string",
{"type": "image_url", "image_url": {"url": "..."}},
"another-bare-string",
]
}
assert list(iter_message_text(data)) == [
"from-dict",
"from-bare-string",
"another-bare-string",
]
def test_iter_message_text_walks_messages_and_input_independently():
"""When both are present (rare), every fragment from either field is
inspected a stricter guarantee than "first one wins"."""
data = {
"messages": [{"role": "user", "content": "msg-content"}],
"input": "input-content",
}
assert list(iter_message_text(data)) == ["msg-content", "input-content"]
def test_iter_message_text_empty_data():
assert list(iter_message_text({})) == []
assert list(iter_message_text({"messages": []})) == []
assert list(iter_message_text({"input": ""})) == []
# ── walk_user_text ────────────────────────────────────────────────────────────
def test_walk_user_text_redacts_string_messages_in_place():
data = {
"messages": [
{"role": "user", "content": "leak: AKIAEXAMPLE"},
{"role": "assistant", "content": "ok"},
]
}
visited = walk_user_text(data, lambda s: s.replace("AKIAEXAMPLE", "[REDACTED]"))
assert visited == 2
assert data["messages"][0]["content"] == "leak: [REDACTED]"
assert data["messages"][1]["content"] == "ok"
def test_walk_user_text_redacts_multimodal_text_parts():
"""VERIA-11: list-content text parts must be mutable for in-place redaction."""
data = {
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "AKIAEXAMPLE here"},
{"type": "image_url", "image_url": {"url": "..."}},
{"type": "text", "text": "no secret"},
],
}
]
}
visited = walk_user_text(data, lambda s: s.replace("AKIAEXAMPLE", "[REDACTED]"))
assert visited == 2
parts = data["messages"][0]["content"]
assert parts[0] == {"type": "text", "text": "[REDACTED] here"}
# Non-text part must be left untouched.
assert parts[1] == {"type": "image_url", "image_url": {"url": "..."}}
assert parts[2] == {"type": "text", "text": "no secret"}
def test_walk_user_text_redacts_responses_api_string_input():
data = {"input": "leak AKIAEXAMPLE"}
visited = walk_user_text(data, lambda s: s.replace("AKIAEXAMPLE", "[REDACTED]"))
assert visited == 1
assert data["input"] == "leak [REDACTED]"
def test_walk_user_text_redacts_responses_api_list_input():
data = {
"input": [
{"type": "text", "text": "AKIAEXAMPLE"},
{"type": "image_url", "image_url": {"url": "..."}},
]
}
visited = walk_user_text(data, lambda s: f"[redacted]{s}[/]")
assert visited == 1
assert data["input"][0] == {"type": "text", "text": "[redacted]AKIAEXAMPLE[/]"}
assert data["input"][1] == {"type": "image_url", "image_url": {"url": "..."}}
def test_walk_user_text_redacts_mixed_list_input():
"""Read and write helpers must agree on coverage — bare strings inside
a mixed ``input`` list are inspected by both."""
data = {
"input": [
{"type": "text", "text": "secret-one"},
"secret-two",
{"type": "image_url", "image_url": {"url": "..."}},
]
}
visited = walk_user_text(data, lambda s: f"<{s}>")
assert visited == 2
assert data["input"][0] == {"type": "text", "text": "<secret-one>"}
assert data["input"][1] == "<secret-two>"
assert data["input"][2] == {"type": "image_url", "image_url": {"url": "..."}}
# ── build_inspection_messages ─────────────────────────────────────────────────
def test_build_inspection_messages_chat_completion_passthrough():
data = {
"messages": [
{"role": "system", "content": "be helpful"},
{"role": "user", "content": "hi"},
]
}
assert build_inspection_messages(data) == [
{"role": "system", "content": "be helpful"},
{"role": "user", "content": "hi"},
]
def test_build_inspection_messages_joins_multimodal_text_parts():
data = {
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "first part"},
{"type": "image_url", "image_url": {"url": "..."}},
{"type": "text", "text": "second part"},
],
}
]
}
assert build_inspection_messages(data) == [
{"role": "user", "content": "first part\nsecond part"}
]
def test_build_inspection_messages_lifts_responses_api_input():
"""fniVO9-F: ``input`` must be visible to hooks that POST messages to a remote API."""
data = {"input": "responses-api content"}
assert build_inspection_messages(data) == [
{"role": "user", "content": "responses-api content"}
]
def test_build_inspection_messages_drops_messages_with_no_text():
data = {
"messages": [
{"role": "user", "content": ""},
{
"role": "user",
"content": [{"type": "image_url", "image_url": {"url": "..."}}],
},
{"role": "user", "content": "kept"},
]
}
assert build_inspection_messages(data) == [{"role": "user", "content": "kept"}]
def test_build_inspection_messages_empty_data():
assert build_inspection_messages({}) == []
assert build_inspection_messages({"messages": []}) == []
assert build_inspection_messages({"input": ""}) == []
# ── has_non_string_content ────────────────────────────────────────────────────
def test_has_non_string_content_string_messages():
data = {"messages": [{"role": "user", "content": "hello"}]}
assert has_non_string_content(data) is False
def test_has_non_string_content_multimodal_messages():
data = {"messages": [{"role": "user", "content": [{"type": "text", "text": "hi"}]}]}
assert has_non_string_content(data) is True
def test_has_non_string_content_responses_api_string_input():
assert has_non_string_content({"input": "plain string"}) is False
def test_has_non_string_content_responses_api_list_input():
assert has_non_string_content({"input": ["a", "b"]}) is True
def test_has_non_string_content_empty_data():
assert has_non_string_content({}) is False
assert has_non_string_content({"messages": []}) is False
assert has_non_string_content({"input": ""}) is False
# ── apply_redacted_messages_back ──────────────────────────────────────────────
def test_apply_redacted_messages_back_chat_completion():
data = {"messages": [{"role": "user", "content": "secret"}]}
apply_redacted_messages_back(data, [{"role": "user", "content": "[REDACTED]"}])
assert data["messages"] == [{"role": "user", "content": "[REDACTED]"}]
assert "input" not in data
def test_apply_redacted_messages_back_responses_api_string_input():
"""A Responses-API request reads ``data["input"]``; writing only to
``messages`` would let unredacted text reach the LLM."""
data = {"input": "secret payload"}
apply_redacted_messages_back(data, [{"role": "user", "content": "[REDACTED]"}])
assert data["input"] == "[REDACTED]"
def test_apply_redacted_messages_back_both_fields():
"""Defensive: when both fields are present, both are updated."""
data = {
"messages": [{"role": "user", "content": "old"}],
"input": "old",
}
apply_redacted_messages_back(data, [{"role": "user", "content": "[REDACTED]"}])
assert data["messages"] == [{"role": "user", "content": "[REDACTED]"}]
assert data["input"] == "[REDACTED]"
def test_apply_redacted_messages_back_skips_input_when_not_string():
"""List ``input`` (multimodal Responses-API) is left alone — the
multimodal-degrades-to-block guard runs upstream."""
data = {"input": [{"type": "text", "text": "leak"}]}
apply_redacted_messages_back(data, [{"role": "user", "content": "[REDACTED]"}])
assert data["input"] == [{"type": "text", "text": "leak"}]

View File

@ -0,0 +1,811 @@
"""
Regression tests for guardrail-coverage gaps.
Each test confirms that a previously-bypassable input shape now triggers
inspection by the relevant guardrail hook:
- VERIA-11: multimodal list-format ``content`` is inspected (no longer
silently skipped because of an ``isinstance(content, str)`` check).
- fniVO9-F: Responses-API ``data["input"]`` is inspected (no longer
silently skipped because the hook only looked at ``data["messages"]``).
- yVS0wMDO: Aim's post-call hook inspects every choice when ``n>1``,
not just ``choices[0]``.
"""
from typing import Any, Dict
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from httpx import Request, Response
from litellm import DualCache
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.utils import Choices, Message, ModelResponse
@pytest.fixture
def user_api_key():
return UserAPIKeyAuth(api_key="hashed", user_id="u", key_alias=None)
# ── Aim ───────────────────────────────────────────────────────────────────────
def _aim_no_action_response() -> Response:
return Response(
status_code=200,
json={"required_action": None},
request=Request("POST", "https://api.aim.security/fw/v1/analyze"),
)
@pytest.mark.asyncio
async def test_aim_inspects_multimodal_list_content(user_api_key, monkeypatch):
monkeypatch.setenv("AIM_API_KEY", "hs-aim-key")
from litellm.proxy.guardrails.guardrail_hooks.aim.aim import AimGuardrail
guard = AimGuardrail()
sent_payload: Dict[str, Any] = {}
async def capture(url, headers, json):
sent_payload.update(json)
return _aim_no_action_response()
with patch.object(guard.async_handler, "post", side_effect=capture):
await guard.async_pre_call_hook(
user_api_key_dict=user_api_key,
cache=DualCache(),
data={
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "secret payload"},
{"type": "image_url", "image_url": {"url": "..."}},
],
}
]
},
call_type="acompletion",
)
# The multimodal text part must be visible to Aim.
assert sent_payload["messages"] == [{"role": "user", "content": "secret payload"}]
@pytest.mark.asyncio
async def test_aim_inspects_responses_api_input(user_api_key, monkeypatch):
monkeypatch.setenv("AIM_API_KEY", "hs-aim-key")
from litellm.proxy.guardrails.guardrail_hooks.aim.aim import AimGuardrail
guard = AimGuardrail()
sent_payload: Dict[str, Any] = {}
async def capture(url, headers, json):
sent_payload.update(json)
return _aim_no_action_response()
with patch.object(guard.async_handler, "post", side_effect=capture):
await guard.async_pre_call_hook(
user_api_key_dict=user_api_key,
cache=DualCache(),
data={"input": "responses-api content"},
call_type="acompletion",
)
assert sent_payload["messages"] == [
{"role": "user", "content": "responses-api content"}
]
@pytest.mark.asyncio
async def test_aim_post_call_inspects_all_choices(user_api_key, monkeypatch):
"""yVS0wMDO: ``n>1`` no longer bypasses Aim by hiding violations in
``choices[1+]``."""
monkeypatch.setenv("AIM_API_KEY", "hs-aim-key")
from litellm.proxy.guardrails.guardrail_hooks.aim.aim import AimGuardrail
guard = AimGuardrail()
inspected_outputs = []
async def capture(request_data, output, hook, key_alias):
inspected_outputs.append(output)
return {"redacted_output": output}
response = ModelResponse(
choices=[
Choices(index=0, message=Message(role="assistant", content="first")),
Choices(index=1, message=Message(role="assistant", content="second")),
Choices(index=2, message=Message(role="assistant", content="third")),
]
)
with patch.object(guard, "call_aim_guardrail_on_output", side_effect=capture):
await guard.async_post_call_success_hook(
data={"messages": [{"role": "user", "content": "hi"}]},
user_api_key_dict=user_api_key,
response=response,
)
# ``asyncio.gather`` is used for parallelism, so order of inspection is
# not guaranteed.
assert sorted(inspected_outputs) == ["first", "second", "third"]
# ── Lakera v2 ─────────────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_lakera_v2_inspects_responses_api_input(user_api_key, monkeypatch):
monkeypatch.setenv("LAKERA_API_KEY", "lk-test")
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai_v2 import (
LakeraAIGuardrail,
)
guard = LakeraAIGuardrail(api_key="lk-test", on_flagged="monitor")
seen_messages = []
async def fake_call_v2_guard(messages, request_data, event_type):
seen_messages.append(messages)
return {"flagged": False}, {}
with patch.object(guard, "call_v2_guard", side_effect=fake_call_v2_guard):
await guard.async_pre_call_hook(
user_api_key_dict=user_api_key,
cache=DualCache(),
data={"input": "responses-api content"},
call_type="responses",
)
assert seen_messages == [[{"role": "user", "content": "responses-api content"}]]
@pytest.mark.asyncio
async def test_lakera_v2_responses_api_input_redacted_writeback(
user_api_key, monkeypatch
):
"""Greptile P1: when input arrives via Responses-API ``data["input"]``
(string) and Lakera flags PII, the redacted content must be written
back to ``data["input"]`` the Responses-API backend reads from
``input``, so writing only to ``messages`` would let unredacted PII
reach the LLM."""
monkeypatch.setenv("LAKERA_API_KEY", "lk-test")
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai_v2 import (
LakeraAIGuardrail,
)
guard = LakeraAIGuardrail(api_key="lk-test", on_flagged="block")
async def fake_call_v2_guard(messages, request_data, event_type):
return ({"flagged": True, "payload": []}, {"EMAIL": 1})
def fake_mask(messages, lakera_response, masked_entity_count):
return [{"role": "user", "content": "[REDACTED EMAIL]"}]
with (
patch.object(guard, "call_v2_guard", side_effect=fake_call_v2_guard),
patch.object(guard, "_is_only_pii_violation", return_value=True),
patch.object(guard, "_mask_pii_in_messages", side_effect=fake_mask),
):
data = {"input": "user@example.com leaked"}
await guard.async_pre_call_hook(
user_api_key_dict=user_api_key,
cache=DualCache(),
data=data,
call_type="responses",
)
assert data["input"] == "[REDACTED EMAIL]"
@pytest.mark.asyncio
async def test_aim_responses_api_input_anonymize_writeback(user_api_key, monkeypatch):
"""Greptile P1: Aim's anonymize action must redact ``data["input"]``
for Responses-API requests, not just ``data["messages"]``."""
monkeypatch.setenv("AIM_API_KEY", "hs-aim-key")
from litellm.proxy.guardrails.guardrail_hooks.aim.aim import AimGuardrail
guard = AimGuardrail()
aim_response_body = {
"required_action": {"action_type": "anonymize_action"},
"redacted_chat": {
"all_redacted_messages": [
{"role": "user", "content": "[REDACTED] anonymised"}
]
},
}
async def capture(url, headers, json):
return Response(
status_code=200,
json=aim_response_body,
request=Request("POST", "https://api.aim.security/fw/v1/analyze"),
)
with patch.object(guard.async_handler, "post", side_effect=capture):
data = {"input": "user@example.com leaked"}
await guard.async_pre_call_hook(
user_api_key_dict=user_api_key,
cache=DualCache(),
data=data,
call_type="responses",
)
assert data["input"] == "[REDACTED] anonymised"
@pytest.mark.asyncio
async def test_lakera_v2_multimodal_pii_degrades_to_block(user_api_key, monkeypatch):
"""Mask-in-place uses Lakera offsets and cannot preserve image/audio
parts of multimodal input. When PII is detected on a multimodal
request, the hook must raise the block exception instead of silently
flattening ``data["messages"]`` to text-only."""
monkeypatch.setenv("LAKERA_API_KEY", "lk-test")
from fastapi import HTTPException
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai_v2 import (
LakeraAIGuardrail,
)
guard = LakeraAIGuardrail(api_key="lk-test", on_flagged="block")
async def fake_call_v2_guard(messages, request_data, event_type):
return (
{
"flagged": True,
"payload": [{"detector_type": "pii/email", "start": 0, "end": 5}],
},
{"EMAIL": 1},
)
with (
patch.object(guard, "call_v2_guard", side_effect=fake_call_v2_guard),
patch.object(guard, "_is_only_pii_violation", return_value=True),
patch.object(
guard,
"_get_http_exception_for_blocked_guardrail",
return_value=HTTPException(status_code=400, detail="blocked"),
),
):
with pytest.raises(HTTPException):
await guard.async_pre_call_hook(
user_api_key_dict=user_api_key,
cache=DualCache(),
data={
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "leak"},
{"type": "image_url", "image_url": {"url": "..."}},
],
}
]
},
call_type="acompletion",
)
@pytest.mark.asyncio
async def test_lakera_v2_inspects_multimodal_list_content(user_api_key, monkeypatch):
monkeypatch.setenv("LAKERA_API_KEY", "lk-test")
from litellm.proxy.guardrails.guardrail_hooks.lakera_ai_v2 import (
LakeraAIGuardrail,
)
guard = LakeraAIGuardrail(api_key="lk-test", on_flagged="monitor")
seen_messages = []
async def fake_call_v2_guard(messages, request_data, event_type):
seen_messages.append(messages)
return {"flagged": False}, {}
with patch.object(guard, "call_v2_guard", side_effect=fake_call_v2_guard):
await guard.async_pre_call_hook(
user_api_key_dict=user_api_key,
cache=DualCache(),
data={
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "AKIAEXAMPLE"},
{"type": "image_url", "image_url": {"url": "..."}},
],
}
]
},
call_type="acompletion",
)
assert seen_messages == [[{"role": "user", "content": "AKIAEXAMPLE"}]]
# ── Lasso ─────────────────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_lasso_multimodal_falls_back_to_classify(user_api_key, monkeypatch):
"""Lasso's classifix (mask) endpoint returns text that overwrites
``data["messages"]``. For multimodal input that would silently strip
image parts the hook must use the classify endpoint instead and
leave the original payload intact."""
monkeypatch.setenv("LASSO_API_KEY", "ls-test")
from litellm.proxy.guardrails.guardrail_hooks.lasso.lasso import LassoGuardrail
guard = LassoGuardrail(lasso_api_key="ls-test", mask=True)
masking_called = False
classify_called = False
async def fake_masking(data, cache, message_type, messages):
nonlocal masking_called
masking_called = True
return data
async def fake_classification(data, cache, message_type, messages):
nonlocal classify_called
classify_called = True
return data
with (
patch.object(guard, "_handle_masking", side_effect=fake_masking),
patch.object(guard, "_handle_classification", side_effect=fake_classification),
):
await guard._run_lasso_guardrail(
data={
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "hello"},
{"type": "image_url", "image_url": {"url": "..."}},
],
}
]
},
cache=DualCache(),
message_type="PROMPT",
)
assert classify_called is True
assert masking_called is False
@pytest.mark.asyncio
async def test_lasso_inspects_responses_api_input(user_api_key, monkeypatch):
monkeypatch.setenv("LASSO_API_KEY", "ls-test")
from litellm.proxy.guardrails.guardrail_hooks.lasso.lasso import LassoGuardrail
guard = LassoGuardrail(lasso_api_key="ls-test")
seen_messages = []
async def fake_handle_classification(data, cache, message_type, messages):
seen_messages.append(messages)
return data
with patch.object(
guard, "_handle_classification", side_effect=fake_handle_classification
):
await guard._run_lasso_guardrail(
data={"input": "responses-api content"},
cache=DualCache(),
message_type="PROMPT",
)
assert seen_messages == [[{"role": "user", "content": "responses-api content"}]]
@pytest.mark.asyncio
async def test_lasso_masking_writes_back_responses_api_input(user_api_key, monkeypatch):
"""Krrish blocker: Lasso classifix masking must update ``data["input"]``
for Responses-API requests, not only ``data["messages"]``."""
monkeypatch.setenv("LASSO_API_KEY", "ls-test")
from litellm.proxy.guardrails.guardrail_hooks.lasso.lasso import LassoGuardrail
guard = LassoGuardrail(lasso_api_key="ls-test", mask=True)
lasso_response = {
"violations_detected": True,
"deputies": {"pii": True},
"findings": {"pii": [{"action": "AUTO_MASKING"}]},
"messages": [{"role": "user", "content": "[REDACTED]"}],
}
async def fake_call_lasso_api(headers, payload, api_url=None):
return lasso_response
data = {"input": "user@example.com leaked"}
with patch.object(guard, "_call_lasso_api", side_effect=fake_call_lasso_api):
await guard._run_lasso_guardrail(
data=data,
cache=DualCache(),
message_type="PROMPT",
)
assert data["input"] == "[REDACTED]"
# ── Banned Keywords ───────────────────────────────────────────────────────────
def test_banned_keywords_blocks_multimodal_content(monkeypatch):
"""VERIA-11: a banned word hidden in a multimodal text part is now caught.
Uses ``acompletion`` the value the proxy ingress actually passes
for ``/v1/chat/completions``. Asserting against the literal sync
``"completion"`` would pass even if the hook's call-type gate were
misaligned with the runtime, so the test wouldn't catch regressions.
"""
monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
from fastapi import HTTPException
guard = _ENTERPRISE_BannedKeywords()
async def _run():
await guard.async_pre_call_hook(
user_api_key_dict=UserAPIKeyAuth(api_key="hashed", user_id="u"),
cache=DualCache(),
data={
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "forbidden word here"},
{"type": "image_url", "image_url": {"url": "..."}},
],
}
]
},
call_type="acompletion",
)
import asyncio
with pytest.raises(HTTPException) as exc:
asyncio.run(_run())
assert "forbidden" in str(exc.value.detail).lower()
def test_banned_keywords_blocks_responses_api_input(monkeypatch):
monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
from fastapi import HTTPException
guard = _ENTERPRISE_BannedKeywords()
async def _run():
await guard.async_pre_call_hook(
user_api_key_dict=UserAPIKeyAuth(api_key="hashed", user_id="u"),
cache=DualCache(),
data={"input": "this contains forbidden content"},
call_type="aresponses",
)
import asyncio
with pytest.raises(HTTPException):
asyncio.run(_run())
@pytest.mark.parametrize("call_type", ["completion", "acompletion", "aresponses"])
def test_banned_keywords_fires_on_text_content_call_types(monkeypatch, call_type):
"""Locks the call-type gate to the runtime ``route_type`` values the
proxy actually emits pinning a regression where the hook had
``call_type == "completion"`` and silently no-op'd both
``acompletion`` (chat completions) and ``aresponses`` (Responses API).
"""
monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
from fastapi import HTTPException
guard = _ENTERPRISE_BannedKeywords()
import asyncio
with pytest.raises(HTTPException):
asyncio.run(
guard.async_pre_call_hook(
user_api_key_dict=UserAPIKeyAuth(api_key="hashed", user_id="u"),
cache=DualCache(),
data={
"messages": [{"role": "user", "content": "forbidden text"}],
"input": "forbidden text",
},
call_type=call_type,
)
)
def test_banned_keywords_skips_non_text_call_types(monkeypatch):
"""Embedding / moderation / audio paths don't carry chat text and
aren't in the text-guardrail scope. They must not trigger the hook
even when the request body otherwise looks like a chat payload.
"""
monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
guard = _ENTERPRISE_BannedKeywords()
import asyncio
for call_type in ("aembedding", "amoderation", "aspeech", "atranscription"):
# Should return without raising, even though the data carries the banned word.
asyncio.run(
guard.async_pre_call_hook(
user_api_key_dict=UserAPIKeyAuth(api_key="hashed", user_id="u"),
cache=DualCache(),
data={"input": "forbidden text"},
call_type=call_type,
)
)
@pytest.mark.asyncio
async def test_banned_keywords_post_call_checks_all_choices(monkeypatch, user_api_key):
"""Krrish blocker: ``n>1`` responses must not bypass post-call checks by
placing the banned text in ``choices[1+]``."""
monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
from fastapi import HTTPException
guard = _ENTERPRISE_BannedKeywords()
response = ModelResponse(
choices=[
Choices(index=0, message=Message(role="assistant", content="clean")),
Choices(index=1, message=Message(role="assistant", content="forbidden")),
]
)
with pytest.raises(HTTPException) as exc:
await guard.async_post_call_success_hook(
data={},
user_api_key_dict=user_api_key,
response=response,
)
assert "forbidden" in str(exc.value.detail).lower()
# ── Azure Content Safety ──────────────────────────────────────────────────────
@pytest.mark.asyncio
@pytest.mark.parametrize(
"call_type, data",
[
(
"acompletion",
{
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "scan me"},
{"type": "image_url", "image_url": {"url": "..."}},
],
}
]
},
),
("aresponses", {"input": "scan me"}),
],
)
async def test_azure_content_safety_pre_call_fires_on_runtime_call_types(
user_api_key, call_type, data
):
"""The proxy ingress passes ``route_type`` straight through as
``call_type`` ``acompletion`` for chat completions and
``aresponses`` for the Responses API. The hook must inspect text
fragments under both, not only the literal ``"completion"`` string
used by some SDK callers."""
from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
guard = _PROXY_AzureContentSafety.__new__(_PROXY_AzureContentSafety)
seen = []
async def fake_test_violation(content, source=None):
seen.append((content, source))
guard.test_violation = fake_test_violation
await guard.async_pre_call_hook(
user_api_key_dict=user_api_key,
cache=DualCache(),
data=data,
call_type=call_type,
)
assert ("scan me", "input") in seen
@pytest.mark.asyncio
async def test_azure_content_safety_post_call_checks_all_choices(user_api_key):
"""Krrish blocker: ``n>1`` responses must not bypass Azure Content Safety
by placing the unsafe text in ``choices[1+]``."""
from fastapi import HTTPException
from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
guard = _PROXY_AzureContentSafety.__new__(_PROXY_AzureContentSafety)
seen_outputs = []
async def fake_test_violation(content, source=None):
seen_outputs.append((content, source))
if "unsafe" in content:
raise HTTPException(status_code=400, detail={"error": "unsafe"})
guard.test_violation = fake_test_violation
response = ModelResponse(
choices=[
Choices(index=0, message=Message(role="assistant", content="clean")),
Choices(index=1, message=Message(role="assistant", content="unsafe")),
Choices(index=2, message=Message(role="assistant", content="later")),
]
)
with pytest.raises(HTTPException):
await guard.async_post_call_success_hook(
data={},
user_api_key_dict=user_api_key,
response=response,
)
assert seen_outputs == [("clean", "output"), ("unsafe", "output")]
# ── Secret Detection ──────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_secret_detection_redacts_multimodal_text_parts(user_api_key):
from enterprise.litellm_enterprise.enterprise_callbacks.secret_detection import (
_ENTERPRISE_SecretDetection,
)
guard = _ENTERPRISE_SecretDetection()
data = {
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "AKIAIOSFODNN7EXAMPLE is the key",
},
{"type": "image_url", "image_url": {"url": "..."}},
],
}
]
}
await guard.async_pre_call_hook(
user_api_key_dict=user_api_key,
cache=DualCache(),
data=data,
call_type="completion",
)
parts = data["messages"][0]["content"]
assert "AKIAIOSFODNN7EXAMPLE" not in parts[0]["text"]
assert "[REDACTED]" in parts[0]["text"]
# Non-text part is preserved untouched.
assert parts[1] == {"type": "image_url", "image_url": {"url": "..."}}
@pytest.mark.asyncio
async def test_secret_detection_redacts_responses_api_input(user_api_key):
from enterprise.litellm_enterprise.enterprise_callbacks.secret_detection import (
_ENTERPRISE_SecretDetection,
)
guard = _ENTERPRISE_SecretDetection()
data = {"input": "leak: AKIAIOSFODNN7EXAMPLE"}
await guard.async_pre_call_hook(
user_api_key_dict=user_api_key,
cache=DualCache(),
data=data,
call_type="moderation",
)
assert "AKIAIOSFODNN7EXAMPLE" not in data["input"]
assert "[REDACTED]" in data["input"]
# ── OpenAI Moderation ─────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_openai_moderation_inspects_multimodal_content(monkeypatch, user_api_key):
"""The aggregated text passed to ``llm_router.amoderation`` must include
list-format text parts and Responses-API input without this, multimodal
content silently passed moderation."""
from enterprise.enterprise_hooks.openai_moderation import (
_ENTERPRISE_OpenAI_Moderation,
)
guard = _ENTERPRISE_OpenAI_Moderation()
seen_inputs = []
class FakeModeration:
results = [type("R", (), {"flagged": False})()]
async def fake_amoderation(model, input):
seen_inputs.append(input)
return FakeModeration()
fake_router = MagicMock()
fake_router.amoderation = AsyncMock(side_effect=fake_amoderation)
monkeypatch.setattr(
"litellm.proxy.proxy_server.llm_router", fake_router, raising=False
)
await guard.async_moderation_hook(
data={
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "alpha "},
{"type": "image_url", "image_url": {"url": "..."}},
{"type": "text", "text": "beta"},
],
}
]
},
user_api_key_dict=user_api_key,
call_type="acompletion",
)
assert seen_inputs == ["alpha beta"]
# ── Google Text Moderation ────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_google_text_moderation_inspects_multimodal_content(user_api_key):
"""The text passed to Google's moderation client must include list-format
text parts."""
from enterprise.enterprise_hooks.google_text_moderation import (
_ENTERPRISE_GoogleTextModeration,
)
guard = _ENTERPRISE_GoogleTextModeration.__new__(_ENTERPRISE_GoogleTextModeration)
seen_documents = []
def fake_language_document(content, type_):
seen_documents.append(content)
return MagicMock()
fake_response = MagicMock()
fake_response.moderation_categories = []
guard.language_document = fake_language_document
guard.moderate_text_request = MagicMock(return_value=MagicMock())
guard.document_type = MagicMock()
guard.client = MagicMock()
guard.client.moderate_text = MagicMock(return_value=fake_response)
await guard.async_moderation_hook(
data={
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": "hello "},
{"type": "image_url", "image_url": {"url": "..."}},
{"type": "text", "text": "world"},
],
}
]
},
user_api_key_dict=user_api_key,
call_type="acompletion",
)
assert seen_documents == ["hello world"]

View File

@ -11,10 +11,14 @@ sys.path.insert(
import httpx
import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from prisma.errors import ClientNotConnectedError, HTTPClientClosedError, PrismaError
import litellm.proxy.health_endpoints._health_endpoints as _health_endpoints_module
from litellm.proxy._types import LitellmUserRoles, UserAPIKeyAuth
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
from litellm.proxy.health_endpoints._health_endpoints import (
_db_health_readiness_check,
get_callback_identifier,
@ -512,7 +516,7 @@ def proxy_client(monkeypatch):
Redis cache:
- If REDIS_HOST is set in environment, Redis cache will be automatically configured
- Cache configuration is included in /health/readiness endpoint response
- Cache diagnostics are included in the authenticated /health/readiness/details response
"""
client = create_proxy_test_client(monkeypatch)
with client:
@ -588,11 +592,7 @@ def test_health_liveness_endpoint(proxy_client):
def test_health_readiness(proxy_client):
"""
Test /health/readiness endpoint.
Database and Redis are optional - the endpoint should work whether they're available or not.
If DATABASE_URL is set, the endpoint will check database connectivity.
If REDIS_HOST is set, the endpoint will report cache status.
If neither is set, the endpoint should still return a valid health status.
Database and Redis are optional - the public endpoint should work whether they're available or not.
"""
# Measure the time taken for the health check call
start_time = time.perf_counter()
@ -614,40 +614,57 @@ def test_health_readiness(proxy_client):
duration_ms < 500
), f"Health check took {duration_ms:.2f}ms, expected < 500ms for readiness endpoint"
# Assert response contains expected fields
# Assert response contains only low-detail public probe fields
response_data = response.json()
assert "status" in response_data, "Response should contain 'status' field"
assert (
"litellm_version" in response_data
), "Response should contain 'litellm_version' field"
# Display all health endpoint response fields (matches what /health/readiness returns)
print("\n" + "-" * 60)
print("HEALTH ENDPOINT RESPONSE")
print("-" * 60)
print(f"Status: {response_data.get('status', 'unknown')}")
print(f"Database: {response_data.get('db', 'not reported')}")
print(f"LiteLLM Version: {response_data.get('litellm_version', 'unknown')}")
print(f"Success Callbacks: {response_data.get('success_callbacks', [])}")
print(f"Cache: {response_data.get('cache', 'none')}")
print(
f"Use AioHTTP Transport: {response_data.get('use_aiohttp_transport', 'unknown')}"
)
assert response_data == {"status": "healthy"}
print(f"Response time: {duration_ms:.2f}ms")
# If database status is reported, verify it's a valid status
# Database may be "connected", "disconnected", "unknown", or "Not connected" (when prisma_client is None)
if "db" in response_data:
db_status = response_data["db"]
# Database status can be any of these valid states
assert db_status in [
"connected",
"disconnected",
"unknown",
"Not connected",
], f"Unexpected db status: {db_status}"
print("=" * 60 + "\n")
def test_health_readiness_details_returns_diagnostic_fields(monkeypatch):
"""
Detailed readiness diagnostics stay available behind the auth dependency.
"""
app = FastAPI()
app.include_router(_health_endpoints_module.router)
app.dependency_overrides[user_api_key_auth] = lambda: UserAPIKeyAuth(
user_role=LitellmUserRoles.PROXY_ADMIN
)
client = TestClient(app)
monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", None)
response = client.get("/health/readiness/details")
assert response.status_code == 200, response.text
response_data = response.json()
assert response_data["status"] == "healthy"
assert "litellm_version" in response_data
assert "success_callbacks" in response_data
assert "cache" in response_data
def test_health_readiness_allows_explicit_legacy_public_details(monkeypatch):
"""
Operators can explicitly preserve the legacy public readiness payload.
"""
app = FastAPI()
app.include_router(_health_endpoints_module.router)
client = TestClient(app)
monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", None)
monkeypatch.setattr(
"litellm.proxy.proxy_server.general_settings",
{"allow_public_health_readiness_details": True},
)
response = client.get("/health/readiness")
assert response.status_code == 200, response.text
response_data = response.json()
assert response_data["status"] == "healthy"
assert "litellm_version" in response_data
assert "success_callbacks" in response_data
assert "cache" in response_data
def test_get_callback_identifier_string_and_object_with_callback_name():
@ -1503,8 +1520,7 @@ async def test_health_readiness_returns_503_when_db_disconnected():
result = await health_readiness(response=response)
assert response.status_code == 503
assert result["db"] == "disconnected"
assert result["status"] == "healthy" # body shape unchanged for back-compat
assert result == {"status": "healthy"}
@pytest.mark.asyncio
@ -1527,7 +1543,7 @@ async def test_health_readiness_returns_200_when_db_connected():
result = await health_readiness(response=response)
assert response.status_code == 200
assert result["db"] == "connected"
assert result == {"status": "healthy"}
@pytest.mark.asyncio
@ -1546,7 +1562,7 @@ async def test_health_readiness_returns_200_when_no_db_configured():
result = await health_readiness(response=response)
assert response.status_code == 200
assert result["db"] == "Not connected"
assert result == {"status": "healthy"}
def test_clean_endpoint_data_strips_credentials_keeps_routing_fields():

View File

@ -13,7 +13,13 @@ from fastapi.testclient import TestClient
sys.path.insert(0, os.path.abspath("../../../.."))
from litellm.proxy import proxy_server
from litellm.proxy._types import LitellmUserRoles, UserAPIKeyAuth
from litellm.proxy.management_endpoints.router_settings_endpoints import (
get_router_settings,
)
from litellm.proxy.proxy_server import app
from litellm.router import Router
client = TestClient(app)
@ -71,3 +77,48 @@ class TestRouterSettingsEndpoints:
assert "options" in routing_strategy_field
assert isinstance(routing_strategy_field["options"], list)
assert len(routing_strategy_field["options"]) > 0
@pytest.mark.asyncio
async def test_get_router_settings_includes_routing_groups_from_live_router(
self, monkeypatch
):
"""GET /router/settings returns routing_groups from the live router."""
groups = [
{
"group_name": "test-group",
"models": ["latency-model"],
"routing_strategy": "latency-based-routing",
"routing_strategy_args": {},
}
]
llm_router = Router(
model_list=[
{
"model_name": "latency-model",
"litellm_params": {
"model": "openai/gpt-4o",
"api_key": "sk-x",
},
}
],
routing_groups=groups,
)
monkeypatch.setattr(proxy_server, "llm_router", llm_router)
async def fake_get_config(self, config_file_path=None):
return {}
monkeypatch.setattr(
proxy_server.ProxyConfig, "get_config", fake_get_config, raising=True
)
admin_user = UserAPIKeyAuth(
user_role=LitellmUserRoles.PROXY_ADMIN, api_key="sk-x"
)
response = await get_router_settings(user_api_key_dict=admin_user)
assert response.current_values.get("routing_groups") == groups
rg_field = next(f for f in response.fields if f.field_name == "routing_groups")
assert rg_field.field_value == groups

View File

@ -1,18 +1,5 @@
import json
import os
import sys
import pytest
from fastapi.testclient import TestClient
sys.path.insert(
0, os.path.abspath("../../..")
) # Adds the parent directory to the system path
import pytest
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from fastapi.testclient import TestClient
import litellm
@ -21,7 +8,7 @@ from litellm.proxy.middleware.prometheus_auth_middleware import PrometheusAuthMi
# Fake auth functions to simulate valid and invalid auth behavior.
async def fake_valid_auth(request, api_key):
async def fake_valid_auth(request, api_key, **kwargs):
# Simulate valid authentication: do nothing (i.e. pass)
return
@ -35,15 +22,11 @@ async def fake_valid_auth_reads_body(request, api_key, **kwargs):
return
async def fake_invalid_auth(request, api_key):
print("running fake invalid auth", request, api_key)
async def fake_invalid_auth(request, api_key, **kwargs):
# Simulate invalid auth by raising an exception.
raise Exception("Invalid API key")
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
@pytest.fixture
def app_with_middleware():
"""Create a FastAPI app with the PrometheusAuthMiddleware and dummy endpoints."""
@ -98,7 +81,7 @@ def test_valid_auth_metrics(app_with_middleware, monkeypatch):
Test that a request to /metrics (and /metrics/) with valid auth headers passes.
"""
# Enable auth on metrics endpoints.
litellm.require_auth_for_metrics_endpoint = True
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
# Patch the auth function to simulate a valid authentication.
monkeypatch.setattr(
"litellm.proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
@ -123,7 +106,7 @@ def test_invalid_auth_metrics(app_with_middleware, monkeypatch):
"""
Test that a request to /metrics with invalid auth headers fails with a 401.
"""
litellm.require_auth_for_metrics_endpoint = True
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
# Patch the auth function to simulate a failed authentication.
monkeypatch.setattr(
"litellm.proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
@ -138,12 +121,48 @@ def test_invalid_auth_metrics(app_with_middleware, monkeypatch):
assert "Unauthorized access to metrics endpoint" in response.text
def test_metrics_auth_uses_real_auth_when_route_is_public(
app_with_middleware, monkeypatch
):
"""
Regression: /metrics is statically public, but require_auth_for_metrics_endpoint
must still force the real auth path.
"""
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
monkeypatch.setattr("litellm.proxy.proxy_server.master_key", "sk-master")
monkeypatch.setattr("litellm.proxy.proxy_server.general_settings", {})
client = TestClient(app_with_middleware)
response = client.get("/metrics")
assert response.status_code == 401, response.text
assert "Unauthorized access to metrics endpoint" in response.text
def test_metrics_auth_is_required_by_default(app_with_middleware, monkeypatch):
"""
Metrics should require auth unless explicitly configured as public.
"""
monkeypatch.setattr(
"litellm.proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
fake_invalid_auth,
)
client = TestClient(app_with_middleware)
response = client.get("/metrics")
assert response.status_code == 401, response.text
assert "Unauthorized access to metrics endpoint" in response.text
def test_no_auth_metrics_when_disabled(app_with_middleware, monkeypatch):
"""
Test that when require_auth_for_metrics_endpoint is False, requests to /metrics
bypass the auth check.
"""
litellm.require_auth_for_metrics_endpoint = False
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", False)
# To ensure auth is not run, patch the auth function with one that will raise if called.
def should_not_be_called(*args, **kwargs):
@ -160,11 +179,11 @@ def test_no_auth_metrics_when_disabled(app_with_middleware, monkeypatch):
assert response.json() == {"msg": "metrics OK"}
def test_non_metrics_requests_pass_through(app_with_middleware):
def test_non_metrics_requests_pass_through(app_with_middleware, monkeypatch):
"""
Test that non-metrics endpoints pass through the middleware unaffected.
"""
litellm.require_auth_for_metrics_endpoint = True
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
client = TestClient(app_with_middleware)
@ -182,7 +201,7 @@ def test_non_metrics_requests_dont_trigger_auth(app_with_middleware, monkeypatch
Test that non-metrics requests never trigger auth, even when auth is enabled
and the auth function would reject the request.
"""
litellm.require_auth_for_metrics_endpoint = True
monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
def should_not_be_called(*args, **kwargs):
raise Exception("Auth should not be called for non-metrics requests")

View File

@ -91,6 +91,19 @@ def test_get_litellm_model_cost_map_returns_cost_map():
)
def test_public_ai_hub_info_is_public_by_default(monkeypatch):
app = FastAPI()
app.include_router(router)
client = TestClient(app)
monkeypatch.setattr("litellm.proxy.proxy_server.general_settings", {})
monkeypatch.setattr("litellm.proxy.proxy_server.master_key", "sk-master")
response = client.get("/public/model_hub/info")
assert response.status_code == 200, response.text
def test_watsonx_provider_fields():
"""Test that Watsonx provider has all required credential fields including multiple auth options."""
app = FastAPI()
@ -166,9 +179,9 @@ def test_anthropic_provider_fields_support_byok():
"Anthropic api_key must be optional so admins can configure BYOK models "
"without entering a key. See BYOK tutorial."
)
assert fields_by_key["api_key"].get("tooltip"), (
"Anthropic api_key must have a tooltip explaining the BYOK use case."
)
assert fields_by_key["api_key"].get(
"tooltip"
), "Anthropic api_key must have a tooltip explaining the BYOK use case."
assert "api_base" in fields_by_key, (
"Anthropic provider form must expose api_base so cloud customers "
"can override the upstream URL without env var access."
@ -176,16 +189,16 @@ def test_anthropic_provider_fields_support_byok():
api_base_field = fields_by_key["api_base"]
assert api_base_field["required"] is False
assert api_base_field["field_type"] == "text"
assert api_base_field.get("tooltip"), (
"api_base should have a tooltip explaining it is optional."
)
assert api_base_field.get(
"tooltip"
), "api_base should have a tooltip explaining it is optional."
# UI forms render fields in credential_fields order; api_base should come first
# so an admin sees the URL override before the key field.
field_order = [f["key"] for f in anthropic["credential_fields"]]
assert field_order.index("api_base") < field_order.index("api_key"), (
"api_base must appear before api_key in credential_fields (matches AI21 and ANTHROPIC_TEXT convention)."
)
assert field_order.index("api_base") < field_order.index(
"api_key"
), "api_base must appear before api_key in credential_fields (matches AI21 and ANTHROPIC_TEXT convention)."
def test_public_model_hub_with_healthy_model():

View File

@ -0,0 +1,312 @@
"""Proxy strips client-supplied pricing parameters from request bodies.
`litellm.completion` accepts pricing fields (`input_cost_per_token`,
`output_cost_per_token`, the rest of `CustomPricingLiteLLMParams`,
`metadata.model_info`) as part of its kwarg surface. On direct SDK use that
is intentional. On the proxy, those same fields would let any caller rewrite
their own per-request cost and via `litellm.register_model` mutate
`litellm.model_cost` for every subsequent caller in the worker. The proxy
strips them at the boundary; an opt-in key/team flag preserves the override
for operators who actually want it.
"""
import os
import sys
from unittest.mock import MagicMock
import pytest
from fastapi import Request
import litellm
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.litellm_pre_call_utils import (
_CLIENT_PRICING_CONTROL_FIELDS,
_CLIENT_PRICING_METADATA_FIELDS,
_strip_client_pricing_overrides,
add_litellm_data_to_request,
)
from litellm.types.utils import CustomPricingLiteLLMParams
sys.path.insert(0, os.path.abspath("../../.."))
def _make_request_mock() -> Request:
request_mock = MagicMock(spec=Request)
request_mock.url.path = "/v1/chat/completions"
request_mock.url = MagicMock()
request_mock.url.__str__.return_value = "http://localhost/v1/chat/completions"
request_mock.method = "POST"
request_mock.query_params = {}
request_mock.headers = {"Content-Type": "application/json"}
request_mock.client = MagicMock()
request_mock.client.host = "127.0.0.1"
return request_mock
def _user_api_key_auth(metadata=None, team_metadata=None) -> UserAPIKeyAuth:
return UserAPIKeyAuth(
api_key="hashed-key",
metadata=metadata or {},
team_metadata=team_metadata or {},
spend=0.0,
max_budget=100.0,
model_max_budget={},
team_spend=0.0,
team_max_budget=200.0,
)
class TestStripClientPricingOverrides:
def test_pricing_field_set_tracks_pydantic_model(self):
# The strip set is built from the model so additions are picked up
# automatically — this test guards against the model and the strip
# set drifting apart if someone replaces the auto-derivation later.
assert _CLIENT_PRICING_CONTROL_FIELDS == frozenset(
CustomPricingLiteLLMParams.model_fields.keys()
)
# Sanity: the obvious top-level pricing fields are in the set.
for field in (
"input_cost_per_token",
"output_cost_per_token",
"input_cost_per_second",
"cache_creation_input_token_cost",
):
assert field in _CLIENT_PRICING_CONTROL_FIELDS
def test_root_pricing_fields_dropped(self):
data = {
"model": "gpt-4",
"messages": [{"role": "user", "content": "hi"}],
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"cache_creation_input_token_cost": 0.0,
}
_strip_client_pricing_overrides(data)
assert data == {
"model": "gpt-4",
"messages": [{"role": "user", "content": "hi"}],
}
def test_metadata_model_info_dropped(self):
data = {
"model": "gpt-4",
"metadata": {
"user_session": "keep-me",
"model_info": {"input_cost_per_token": 0.0},
},
"litellm_metadata": {
"model_info": {"output_cost_per_token": 0.0},
},
}
_strip_client_pricing_overrides(data)
assert data["metadata"] == {"user_session": "keep-me"}
assert data["litellm_metadata"] == {}
def test_non_pricing_fields_untouched(self):
data = {
"model": "gpt-4",
"temperature": 0.7,
"max_tokens": 100,
"tools": [{"type": "function"}],
"metadata": {"trace_id": "abc"},
}
snapshot = {
"model": "gpt-4",
"temperature": 0.7,
"max_tokens": 100,
"tools": [{"type": "function"}],
"metadata": {"trace_id": "abc"},
}
_strip_client_pricing_overrides(data)
assert data == snapshot
def test_metadata_strip_handles_non_dict_metadata(self):
# Defensive — Pydantic validation would normally reject non-dict
# metadata, but the strip mustn't crash if a malformed body sneaks in.
_strip_client_pricing_overrides({"metadata": "not-a-dict"})
_strip_client_pricing_overrides({"metadata": None})
_strip_client_pricing_overrides({"litellm_metadata": ["a", "b"]})
def test_metadata_field_set_contains_model_info(self):
assert "model_info" in _CLIENT_PRICING_METADATA_FIELDS
def test_strip_emits_debug_log_listing_dropped_fields(self, caplog):
# Operators need a paper trail so they can diagnose why a previously
# working override stopped applying after the strip landed.
import logging
from litellm._logging import verbose_proxy_logger
verbose_proxy_logger.setLevel(logging.DEBUG)
with caplog.at_level(logging.DEBUG, logger=verbose_proxy_logger.name):
_strip_client_pricing_overrides(
{
"model": "gpt-4",
"input_cost_per_token": 0.0,
"metadata": {"model_info": {"output_cost_per_token": 0.0}},
}
)
log_text = " ".join(record.getMessage() for record in caplog.records)
assert "input_cost_per_token" in log_text
assert "metadata.model_info" in log_text
assert "allow_client_pricing_override" in log_text
def test_strip_does_not_log_when_no_fields_present(self, caplog):
# No-op strips must stay silent so the log isn't filled with noise on
# every legitimate request.
import logging
from litellm._logging import verbose_proxy_logger
verbose_proxy_logger.setLevel(logging.DEBUG)
with caplog.at_level(logging.DEBUG, logger=verbose_proxy_logger.name):
_strip_client_pricing_overrides({"model": "gpt-4", "temperature": 0.7})
assert not any(
"pricing" in record.getMessage().lower() for record in caplog.records
)
@pytest.mark.asyncio
async def test_add_litellm_data_to_request_strips_root_pricing_fields():
data = {
"model": "gpt-4",
"messages": [{"role": "user", "content": "hi"}],
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
}
updated = await add_litellm_data_to_request(
data=data,
request=_make_request_mock(),
user_api_key_dict=_user_api_key_auth(),
proxy_config=MagicMock(),
general_settings={},
version="test-version",
)
assert "input_cost_per_token" not in updated
assert "output_cost_per_token" not in updated
@pytest.mark.asyncio
async def test_add_litellm_data_to_request_strips_metadata_model_info():
data = {
"model": "gpt-4",
"messages": [{"role": "user", "content": "hi"}],
"metadata": {"model_info": {"input_cost_per_token": 0.0}},
}
updated = await add_litellm_data_to_request(
data=data,
request=_make_request_mock(),
user_api_key_dict=_user_api_key_auth(),
proxy_config=MagicMock(),
general_settings={},
version="test-version",
)
assert "model_info" not in updated.get("metadata", {})
@pytest.mark.asyncio
async def test_add_litellm_data_to_request_skips_strip_with_key_opt_in():
data = {
"model": "gpt-4",
"messages": [{"role": "user", "content": "hi"}],
"input_cost_per_token": 0.0001,
"metadata": {"model_info": {"output_cost_per_token": 0.0002}},
}
user_auth = _user_api_key_auth(metadata={"allow_client_pricing_override": True})
updated = await add_litellm_data_to_request(
data=data,
request=_make_request_mock(),
user_api_key_dict=user_auth,
proxy_config=MagicMock(),
general_settings={},
version="test-version",
)
assert updated["input_cost_per_token"] == 0.0001
assert updated["metadata"]["model_info"] == {"output_cost_per_token": 0.0002}
@pytest.mark.asyncio
async def test_add_litellm_data_to_request_strips_json_string_litellm_metadata():
"""``litellm_metadata`` may arrive as a JSON-encoded string (multipart/
form-data or ``extra_body``). The strip has to run after the proxy parses
it into a dict; otherwise the ``isinstance(dict)`` guard skips the field
and ``model_info`` survives the strip via the string path.
"""
import json
data = {
"model": "gpt-4",
"messages": [{"role": "user", "content": "hi"}],
"litellm_metadata": json.dumps({"model_info": {"input_cost_per_token": 0.0}}),
}
updated = await add_litellm_data_to_request(
data=data,
request=_make_request_mock(),
user_api_key_dict=_user_api_key_auth(),
proxy_config=MagicMock(),
general_settings={},
version="test-version",
)
parsed_metadata = updated.get("litellm_metadata")
assert isinstance(parsed_metadata, dict)
assert "model_info" not in parsed_metadata
@pytest.mark.asyncio
async def test_add_litellm_data_to_request_skips_strip_with_team_opt_in():
data = {
"model": "gpt-4",
"messages": [{"role": "user", "content": "hi"}],
"input_cost_per_token": 0.0001,
}
user_auth = _user_api_key_auth(
team_metadata={"allow_client_pricing_override": True}
)
updated = await add_litellm_data_to_request(
data=data,
request=_make_request_mock(),
user_api_key_dict=user_auth,
proxy_config=MagicMock(),
general_settings={},
version="test-version",
)
assert updated["input_cost_per_token"] == 0.0001
@pytest.mark.asyncio
async def test_global_model_cost_unmutated_after_stripped_request(monkeypatch):
"""After a stripped request, ``litellm.model_cost`` must not carry the
caller's submitted pricing for the model. The mutation only happens when
the pricing fields reach ``litellm.completion``; the strip prevents that."""
snapshot = dict(litellm.model_cost)
data = {
"model": "test-pricing-canary-model",
"messages": [{"role": "user", "content": "hi"}],
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
}
await add_litellm_data_to_request(
data=data,
request=_make_request_mock(),
user_api_key_dict=_user_api_key_auth(),
proxy_config=MagicMock(),
general_settings={},
version="test-version",
)
# The strip prevents the pricing fields from ever reaching the path that
# would mutate the global model_cost map.
assert "test-pricing-canary-model" not in litellm.model_cost
# And no other entries were mutated as a side effect.
assert litellm.model_cost == snapshot

View File

@ -0,0 +1,34 @@
from fastapi.routing import APIRoute
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
from litellm.proxy.common_utils.debug_utils import router as debug_router
from litellm.proxy.spend_tracking.spend_management_endpoints import (
router as spend_router,
)
def _get_route_dependency_calls(router, path: str, method: str):
for route in router.routes:
if (
isinstance(route, APIRoute)
and route.path == path
and method in route.methods
):
return [dependency.call for dependency in route.dependant.dependencies]
raise AssertionError(f"Route {method} {path} not found")
def test_sensitive_debug_routes_require_auth_dependency():
for path, method in (
("/debug/asyncio-tasks", "GET"),
("/otel-spans", "GET"),
):
assert user_api_key_auth in _get_route_dependency_calls(
debug_router, path, method
)
def test_provider_budgets_requires_auth_dependency():
assert user_api_key_auth in _get_route_dependency_calls(
spend_router, "/provider/budgets", "GET"
)

View File

@ -868,6 +868,7 @@ class TestProxySettingEndpoints:
mock_db_record = MagicMock()
mock_db_record.ui_settings = {
"disable_model_add_for_internal_users": True,
"require_auth_for_public_ai_hub": True,
"unexpected_flag": True,
}
mock_prisma.db.litellm_uisettings.find_unique = AsyncMock(
@ -880,10 +881,12 @@ class TestProxySettingEndpoints:
assert response.status_code == 200
data = response.json()
assert data["values"]["disable_model_add_for_internal_users"] is True
assert data["values"]["require_auth_for_public_ai_hub"] is True
assert "unexpected_flag" not in data["values"]
assert (
"disable_model_add_for_internal_users" in data["field_schema"]["properties"]
)
assert "require_auth_for_public_ai_hub" in data["field_schema"]["properties"]
mock_prisma.db.litellm_uisettings.find_unique.assert_called_once_with(
where={"id": "ui_settings"}
)
@ -1070,6 +1073,43 @@ class TestProxySettingEndpoints:
assert "unsupported_flag" not in stored_settings
assert stored_settings["disable_model_add_for_internal_users"] is False
def test_update_ui_settings_preserves_public_ai_hub_auth_flag(
self, mock_auth, monkeypatch
):
"""Public AI Hub auth is an existing UI setting and must remain writable."""
from unittest.mock import AsyncMock, MagicMock
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
mock_user_auth = UserAPIKeyAuth(
user_id="test-user-123",
user_role=LitellmUserRoles.PROXY_ADMIN,
)
app.dependency_overrides[user_api_key_auth] = lambda: mock_user_auth
monkeypatch.setattr("litellm.proxy.proxy_server.store_model_in_db", True)
mock_prisma = MagicMock()
mock_prisma.db.litellm_uisettings.upsert = AsyncMock()
mock_prisma.db.litellm_uisettings.find_unique = AsyncMock(return_value=None)
monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", mock_prisma)
payload = {"require_auth_for_public_ai_hub": True}
try:
response = client.patch("/update/ui_settings", json=payload)
finally:
app.dependency_overrides.clear()
assert response.status_code == 200
data = response.json()
assert data["status"] == "success"
assert data["settings"]["require_auth_for_public_ai_hub"] is True
call_args = mock_prisma.db.litellm_uisettings.upsert.call_args
stored_settings = json.loads(call_args.kwargs["data"]["create"]["ui_settings"])
assert stored_settings["require_auth_for_public_ai_hub"] is True
def test_update_ui_settings_persists_forward_llm_provider_auth_headers(
self, mock_auth, monkeypatch
):
@ -1147,6 +1187,43 @@ class TestProxySettingEndpoints:
assert response.status_code == 200
assert general_settings.get("forward_llm_provider_auth_headers") is True
def test_update_ui_settings_syncs_public_health_readiness_details_to_general_settings(
self, mock_auth, monkeypatch
):
"""Public readiness details flag must be synced so the health route sees it."""
from unittest.mock import AsyncMock, MagicMock
from litellm.proxy._types import UserAPIKeyAuth
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
mock_user_auth = UserAPIKeyAuth(
user_id="test-user-123",
user_role=LitellmUserRoles.PROXY_ADMIN,
)
app.dependency_overrides[user_api_key_auth] = lambda: mock_user_auth
monkeypatch.setattr("litellm.proxy.proxy_server.store_model_in_db", True)
general_settings: dict = {}
monkeypatch.setattr(
"litellm.proxy.proxy_server.general_settings", general_settings
)
mock_prisma = MagicMock()
mock_prisma.db.litellm_uisettings.upsert = AsyncMock()
mock_prisma.db.litellm_uisettings.find_unique = AsyncMock(return_value=None)
monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", mock_prisma)
payload = {"allow_public_health_readiness_details": True}
try:
response = client.patch("/update/ui_settings", json=payload)
finally:
app.dependency_overrides.clear()
assert response.status_code == 200
assert general_settings.get("allow_public_health_readiness_details") is True
def test_update_ui_settings_persists_and_syncs_disable_key_generate_for_org_admin(
self, mock_auth, monkeypatch
):

View File

@ -8,12 +8,12 @@
"name": "litellm-dashboard",
"version": "0.1.0",
"dependencies": {
"@anthropic-ai/sdk": "0.54.0",
"@anthropic-ai/sdk": "0.92.0",
"@headlessui/tailwindcss": "0.2.2",
"@heroicons/react": "1.0.6",
"@remixicon/react": "4.9.0",
"@tanstack/react-pacer": "0.2.0",
"@tanstack/react-query": "5.90.20",
"@tanstack/react-query": "5.100.7",
"@tanstack/react-table": "8.21.3",
"@tremor/react": "3.18.7",
"@types/papaparse": "5.5.2",
@ -23,18 +23,18 @@
"jwt-decode": "4.0.0",
"lucide-react": "0.513.0",
"moment": "2.30.1",
"next": "16.1.7",
"next": "16.2.4",
"openai": "4.104.0",
"papaparse": "5.5.3",
"react": "18.3.1",
"react-copy-to-clipboard": "5.1.0",
"react-copy-to-clipboard": "5.1.1",
"react-dom": "18.3.1",
"react-json-view-lite": "2.5.0",
"react-markdown": "9.1.0",
"react-syntax-highlighter": "15.6.6",
"remark-gfm": "4.0.1",
"tailwind-merge": "3.4.0",
"uuid": "11.1.0"
"uuid": "14.0.0"
},
"devDependencies": {
"@playwright/test": "1.58.1",
@ -61,7 +61,7 @@
"eslint-plugin-unused-imports": "4.3.0",
"jsdom": "27.4.0",
"knip": "5.83.1",
"postcss": "8.5.6",
"postcss": "8.5.13",
"prettier": "3.2.5",
"tailwindcss": "3.4.19",
"typescript": "5.9.3",
@ -69,7 +69,7 @@
"vitest": "3.2.4"
},
"engines": {
"node": ">=18.17.0",
"node": ">=20.9.0",
"npm": ">=8.3.0"
}
},
@ -211,12 +211,23 @@
}
},
"node_modules/@anthropic-ai/sdk": {
"version": "0.54.0",
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.54.0.tgz",
"integrity": "sha512-xyoCtHJnt/qg5GG6IgK+UJEndz8h8ljzt/caKXmq3LfBF81nC/BW6E4x2rOWCZcvsLyVW+e8U5mtIr6UCE/kJw==",
"version": "0.92.0",
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.92.0.tgz",
"integrity": "sha512-l653JFC83wCglH8H83t1xpgDurCyPyslYW1maPRdCsfuNuGbLvQjQ81sWd3Go3LWRm0jNspzAhuqAYV8r9joSw==",
"license": "MIT",
"dependencies": {
"json-schema-to-ts": "^3.1.1"
},
"bin": {
"anthropic-ai-sdk": "bin/cli"
},
"peerDependencies": {
"zod": "^3.25.0 || ^4.0.0"
},
"peerDependenciesMeta": {
"zod": {
"optional": true
}
}
},
"node_modules/@asamuzakjp/css-color": {
@ -1817,9 +1828,9 @@
}
},
"node_modules/@next/env": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
"integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
"version": "16.2.4",
"resolved": "https://registry.npmjs.org/@next/env/-/env-16.2.4.tgz",
"integrity": "sha512-dKkkOzOSwFYe5RX6y26fZgkSpVAlIOJKQHIiydQcrWH6y/97+RceSOAdjZ14Qa3zLduVUy0TXcn+EiM6t4rPgw==",
"license": "MIT"
},
"node_modules/@next/eslint-plugin-next": {
@ -1833,9 +1844,9 @@
}
},
"node_modules/@next/swc-darwin-arm64": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
"integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
"version": "16.2.4",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.2.4.tgz",
"integrity": "sha512-OXTFFox5EKN1Ym08vfrz+OXxmCcEjT4SFMbNRsWZE99dMqt2Kcusl5MqPXcW232RYkMLQTy0hqgAMEsfEd/l2A==",
"cpu": [
"arm64"
],
@ -1849,9 +1860,9 @@
}
},
"node_modules/@next/swc-darwin-x64": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
"integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
"version": "16.2.4",
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.2.4.tgz",
"integrity": "sha512-XhpVnUfmYWvD3YrXu55XdcAkQtOnvaI6wtQa8fuF5fGoKoxIUZ0kWPtcOfqJEWngFF/lOS9l3+O9CcownhiQxQ==",
"cpu": [
"x64"
],
@ -1865,12 +1876,15 @@
}
},
"node_modules/@next/swc-linux-arm64-gnu": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
"integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
"version": "16.2.4",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.2.4.tgz",
"integrity": "sha512-Mx/tjlNA3G8kg14QvuGAJ4xBwPk1tUHq56JxZ8CXnZwz1Etz714soCEzGQQzVMz4bEnGPowzkV6Xrp6wAkEWOQ==",
"cpu": [
"arm64"
],
"libc": [
"glibc"
],
"license": "MIT",
"optional": true,
"os": [
@ -1881,12 +1895,15 @@
}
},
"node_modules/@next/swc-linux-arm64-musl": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
"integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
"version": "16.2.4",
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.2.4.tgz",
"integrity": "sha512-iVMMp14514u7Nup2umQS03nT/bN9HurK8ufylC3FZNykrwjtx7V1A7+4kvhbDSCeonTVqV3Txnv0Lu+m2oDXNg==",
"cpu": [
"arm64"
],
"libc": [
"musl"
],
"license": "MIT",
"optional": true,
"os": [
@ -1897,12 +1914,15 @@
}
},
"node_modules/@next/swc-linux-x64-gnu": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
"integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
"version": "16.2.4",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.2.4.tgz",
"integrity": "sha512-EZOvm1aQWgnI/N/xcWOlnS3RQBk0VtVav5Zo7n4p0A7UKyTDx047k8opDbXgBpHl4CulRqRfbw3QrX2w5UOXMQ==",
"cpu": [
"x64"
],
"libc": [
"glibc"
],
"license": "MIT",
"optional": true,
"os": [
@ -1913,12 +1933,15 @@
}
},
"node_modules/@next/swc-linux-x64-musl": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
"integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
"version": "16.2.4",
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.2.4.tgz",
"integrity": "sha512-h9FxsngCm9cTBf71AR4fGznDEDx1hS7+kSEiIRjq5kO1oXWm07DxVGZjCvk0SGx7TSjlUqhI8oOyz7NfwAdPoA==",
"cpu": [
"x64"
],
"libc": [
"musl"
],
"license": "MIT",
"optional": true,
"os": [
@ -1929,9 +1952,9 @@
}
},
"node_modules/@next/swc-win32-arm64-msvc": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
"integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
"version": "16.2.4",
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.2.4.tgz",
"integrity": "sha512-3NdJV5OXMSOeJYijX+bjaLge3mJBlh4ybydbT4GFoB/2hAojWHtMhl3CYlYoMrjPuodp0nzFVi4Tj2+WaMg+Ow==",
"cpu": [
"arm64"
],
@ -1945,9 +1968,9 @@
}
},
"node_modules/@next/swc-win32-x64-msvc": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
"integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
"version": "16.2.4",
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.2.4.tgz",
"integrity": "sha512-kMVGgsqhO5YTYODD9IPGGhA6iprWidQckK3LmPeW08PIFENRmgfb4MjXHO+p//d+ts2rpjvK5gXWzXSMrPl9cw==",
"cpu": [
"x64"
],
@ -2981,9 +3004,9 @@
}
},
"node_modules/@tanstack/query-core": {
"version": "5.90.20",
"resolved": "https://registry.npmjs.org/@tanstack/query-core/-/query-core-5.90.20.tgz",
"integrity": "sha512-OMD2HLpNouXEfZJWcKeVKUgQ5n+n3A2JFmBaScpNDUqSrQSjiveC7dKMe53uJUg1nDG16ttFPz2xfilz6i2uVg==",
"version": "5.100.7",
"resolved": "https://registry.npmjs.org/@tanstack/query-core/-/query-core-5.100.7.tgz",
"integrity": "sha512-5R7i6ENJLhVeeJrrUz7jKBXUXv/BJrxf9FQJSkR13bPrb3zOcE8A0Z0PxYCcsKPOsiIlTibrBL/zZbtUO1TFyQ==",
"license": "MIT",
"funding": {
"type": "github",
@ -3011,12 +3034,12 @@
}
},
"node_modules/@tanstack/react-query": {
"version": "5.90.20",
"resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.90.20.tgz",
"integrity": "sha512-vXBxa+qeyveVO7OA0jX1z+DeyCA4JKnThKv411jd5SORpBKgkcVnYKCiBgECvADvniBX7tobwBmg01qq9JmMJw==",
"version": "5.100.7",
"resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.100.7.tgz",
"integrity": "sha512-LoISYWz8dOOuQbeIctF8K6yi42TWtR1WPGpwGuRUpF3u79JVVIg/PVR0MQdIA0VSHqD/ydf/b7PhKTkg3I4fLQ==",
"license": "MIT",
"dependencies": {
"@tanstack/query-core": "5.90.20"
"@tanstack/query-core": "5.100.7"
},
"funding": {
"type": "github",
@ -7872,6 +7895,19 @@
"dev": true,
"license": "MIT"
},
"node_modules/json-schema-to-ts": {
"version": "3.1.1",
"resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz",
"integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==",
"license": "MIT",
"dependencies": {
"@babel/runtime": "^7.18.3",
"ts-algebra": "^2.0.0"
},
"engines": {
"node": ">=16"
}
},
"node_modules/json-schema-traverse": {
"version": "0.4.1",
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
@ -9299,12 +9335,12 @@
"license": "MIT"
},
"node_modules/next": {
"version": "16.1.7",
"resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
"integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
"version": "16.2.4",
"resolved": "https://registry.npmjs.org/next/-/next-16.2.4.tgz",
"integrity": "sha512-kPvz56wF5frc+FxlHI5qnklCzbq53HTwORaWBGdT0vNoKh1Aya9XC8aPauH4NJxqtzbWsS5mAbctm4cr+EkQ2Q==",
"license": "MIT",
"dependencies": {
"@next/env": "16.1.7",
"@next/env": "16.2.4",
"@swc/helpers": "0.5.15",
"baseline-browser-mapping": "^2.9.19",
"caniuse-lite": "^1.0.30001579",
@ -9318,15 +9354,15 @@
"node": ">=20.9.0"
},
"optionalDependencies": {
"@next/swc-darwin-arm64": "16.1.7",
"@next/swc-darwin-x64": "16.1.7",
"@next/swc-linux-arm64-gnu": "16.1.7",
"@next/swc-linux-arm64-musl": "16.1.7",
"@next/swc-linux-x64-gnu": "16.1.7",
"@next/swc-linux-x64-musl": "16.1.7",
"@next/swc-win32-arm64-msvc": "16.1.7",
"@next/swc-win32-x64-msvc": "16.1.7",
"sharp": "^0.34.4"
"@next/swc-darwin-arm64": "16.2.4",
"@next/swc-darwin-x64": "16.2.4",
"@next/swc-linux-arm64-gnu": "16.2.4",
"@next/swc-linux-arm64-musl": "16.2.4",
"@next/swc-linux-x64-gnu": "16.2.4",
"@next/swc-linux-x64-musl": "16.2.4",
"@next/swc-win32-arm64-msvc": "16.2.4",
"@next/swc-win32-x64-msvc": "16.2.4",
"sharp": "^0.34.5"
},
"peerDependencies": {
"@opentelemetry/api": "^1.1.0",
@ -9360,34 +9396,6 @@
"tslib": "^2.8.0"
}
},
"node_modules/next/node_modules/postcss": {
"version": "8.4.31",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz",
"integrity": "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==",
"funding": [
{
"type": "opencollective",
"url": "https://opencollective.com/postcss/"
},
{
"type": "tidelift",
"url": "https://tidelift.com/funding/github/npm/postcss"
},
{
"type": "github",
"url": "https://github.com/sponsors/ai"
}
],
"license": "MIT",
"dependencies": {
"nanoid": "^3.3.6",
"picocolors": "^1.0.0",
"source-map-js": "^1.0.2"
},
"engines": {
"node": "^10 || ^12 || >=14"
}
},
"node_modules/node-domexception": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
@ -9938,9 +9946,9 @@
}
},
"node_modules/postcss": {
"version": "8.5.6",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz",
"integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==",
"version": "8.5.13",
"resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.13.tgz",
"integrity": "sha512-qif0+jGGZoLWdHey3UFHHWP0H7Gbmsk8T5VEqyYFbWqPr1XqvLGBbk/sl8V5exGmcYJklJOhOQq1pV9IcsiFag==",
"funding": [
{
"type": "opencollective",
@ -10838,16 +10846,16 @@
}
},
"node_modules/react-copy-to-clipboard": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.0.tgz",
"integrity": "sha512-k61RsNgAayIJNoy9yDsYzDe/yAZAzEbEgcz3DZMhF686LEyukcE1hzurxe85JandPUG+yTfGVFzuEw3xt8WP/A==",
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.1.tgz",
"integrity": "sha512-s+HrzLyJBxrpGTYXF15dTgMjAJpEPZT/Yp6NytAtZMRngejxt6Pt5WrfFxLAcsqUDU6sY1Jz6tyHwIicE1U2Xg==",
"license": "MIT",
"dependencies": {
"copy-to-clipboard": "^3.3.1",
"copy-to-clipboard": "^3.3.3",
"prop-types": "^15.8.1"
},
"peerDependencies": {
"react": "^15.3.0 || 16 || 17 || 18"
"react": ">=15.3.0"
}
},
"node_modules/react-day-picker": {
@ -12374,6 +12382,12 @@
"url": "https://github.com/sponsors/wooorm"
}
},
"node_modules/ts-algebra": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz",
"integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==",
"license": "MIT"
},
"node_modules/ts-api-utils": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.4.0.tgz",
@ -12712,16 +12726,16 @@
"license": "MIT"
},
"node_modules/uuid": {
"version": "11.1.0",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-11.1.0.tgz",
"integrity": "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==",
"version": "14.0.0",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-14.0.0.tgz",
"integrity": "sha512-Qo+uWgilfSmAhXCMav1uYFynlQO7fMFiMVZsQqZRMIXp0O7rR7qjkj+cPvBHLgBqi960QCoo/PH2/6ZtVqKvrg==",
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
],
"license": "MIT",
"bin": {
"uuid": "dist/esm/bin/uuid"
"uuid": "dist-node/bin/uuid"
}
},
"node_modules/vfile": {

View File

@ -20,12 +20,12 @@
"knip:fix": "knip --fix"
},
"dependencies": {
"@anthropic-ai/sdk": "0.54.0",
"@anthropic-ai/sdk": "0.92.0",
"@headlessui/tailwindcss": "0.2.2",
"@heroicons/react": "1.0.6",
"@remixicon/react": "4.9.0",
"@tanstack/react-pacer": "0.2.0",
"@tanstack/react-query": "5.90.20",
"@tanstack/react-query": "5.100.7",
"@tanstack/react-table": "8.21.3",
"@tremor/react": "3.18.7",
"@types/papaparse": "5.5.2",
@ -35,18 +35,18 @@
"jwt-decode": "4.0.0",
"lucide-react": "0.513.0",
"moment": "2.30.1",
"next": "16.1.7",
"next": "16.2.4",
"openai": "4.104.0",
"papaparse": "5.5.3",
"react": "18.3.1",
"react-copy-to-clipboard": "5.1.0",
"react-copy-to-clipboard": "5.1.1",
"react-dom": "18.3.1",
"react-json-view-lite": "2.5.0",
"react-markdown": "9.1.0",
"react-syntax-highlighter": "15.6.6",
"remark-gfm": "4.0.1",
"tailwind-merge": "3.4.0",
"uuid": "11.1.0"
"uuid": "14.0.0"
},
"devDependencies": {
"@playwright/test": "1.58.1",
@ -73,7 +73,7 @@
"eslint-plugin-unused-imports": "4.3.0",
"jsdom": "27.4.0",
"knip": "5.83.1",
"postcss": "8.5.6",
"postcss": "8.5.13",
"prettier": "3.2.5",
"tailwindcss": "3.4.19",
"typescript": "5.9.3",
@ -88,10 +88,11 @@
"lodash": "4.18.1",
"ws": "8.19.0",
"braces": "3.0.3",
"axios": "1.13.6"
"axios": "1.13.6",
"postcss": "8.5.13"
},
"engines": {
"node": ">=18.17.0",
"node": ">=20.9.0",
"npm": ">=8.3.0"
}
}

View File

@ -0,0 +1,51 @@
"use client";
import useAuthorized from "@/app/(dashboard)/hooks/useAuthorized";
import { useMutation, useQuery, useQueryClient, UseMutationResult, UseQueryResult } from "@tanstack/react-query";
import { getRouterSettingsCall, setCallbacksCall } from "@/components/networking";
import { createQueryKeys } from "../common/queryKeysFactory";
import type { RoutingGroup } from "@/components/routing_groups/types";
const routingGroupsKeys = createQueryKeys("routingGroups");
interface RoutingGroupsQueryData {
routingGroups: RoutingGroup[];
routingStrategy: string | null;
availableStrategies: string[];
}
const fetchRoutingGroups = async (accessToken: string): Promise<RoutingGroupsQueryData> => {
const data = await getRouterSettingsCall(accessToken);
const currentValues = data?.current_values ?? {};
const fields = Array.isArray(data?.fields) ? data.fields : [];
const routingStrategyField = fields.find((f: any) => f?.field_name === "routing_strategy");
return {
routingGroups: Array.isArray(currentValues.routing_groups) ? currentValues.routing_groups : [],
routingStrategy: currentValues.routing_strategy ?? null,
availableStrategies: Array.isArray(routingStrategyField?.options) ? routingStrategyField.options : [],
};
};
export const useRoutingGroups = (): UseQueryResult<RoutingGroupsQueryData> => {
const { accessToken, userId, userRole } = useAuthorized();
return useQuery<RoutingGroupsQueryData>({
queryKey: routingGroupsKeys.lists(),
queryFn: () => fetchRoutingGroups(accessToken!),
enabled: Boolean(accessToken && userId && userRole),
});
};
export const useSaveRoutingGroups = (): UseMutationResult<unknown, Error, RoutingGroup[]> => {
const { accessToken } = useAuthorized();
const queryClient = useQueryClient();
return useMutation({
mutationFn: (routingGroups: RoutingGroup[]) =>
setCallbacksCall(accessToken!, {
router_settings: { routing_groups: routingGroups },
}),
onSuccess: () => {
queryClient.invalidateQueries({ queryKey: routingGroupsKeys.lists() });
},
});
};

View File

@ -24,6 +24,7 @@ import { TrashIcon, CheckCircleIcon } from "@heroicons/react/outline";
import RouterSettings from "./router_settings";
import Fallbacks from "./Settings/RouterSettings/Fallbacks/Fallbacks";
import RoutingGroups from "./routing_groups";
interface GeneralSettingsPageProps {
accessToken: string | null;
userRole: string | null;
@ -110,8 +111,9 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({ accessToken, user
<TabGroup className="h-[75vh] w-full">
<TabList variant="line" defaultValue="1" className="px-8 pt-4">
<Tab value="1">Loadbalancing</Tab>
<Tab value="2">Fallbacks</Tab>
<Tab value="3">General</Tab>
<Tab value="2">Routing Groups</Tab>
<Tab value="3">Fallbacks</Tab>
<Tab value="4">General</Tab>
</TabList>
<TabPanels className="px-8 py-6">
<TabPanel>
@ -122,6 +124,9 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({ accessToken, user
modelData={modelData}
/>
</TabPanel>
<TabPanel>
<RoutingGroups />
</TabPanel>
<TabPanel>
<Fallbacks
accessToken={accessToken}

View File

@ -0,0 +1,194 @@
"use client";
import React, { useMemo } from "react";
import { Form, Input, Modal, Select, Space, Typography } from "antd";
import type { RoutingGroup, RoutingStrategy } from "./types";
const { Text, Paragraph } = Typography;
interface RoutingGroupModalProps {
open: boolean;
mode: "create" | "edit";
initialValue: RoutingGroup | null;
availableStrategies: string[];
strategyDescriptions: Record<string, string>;
modelOptions: string[];
existingGroupNames: string[];
onClose: () => void;
onSubmit: (group: RoutingGroup) => Promise<void> | void;
saving?: boolean;
}
interface FormValues {
group_name: string;
models: string[];
routing_strategy: RoutingStrategy | string;
routing_strategy_args?: string;
}
const STRATEGIES_WITH_ARGS = new Set<string>(["latency-based-routing", "usage-based-routing"]);
const GROUP_NAME_PATTERN = /^[A-Za-z0-9._-]+$/;
const GROUP_NAME_MAX_LENGTH = 64;
const RoutingGroupModal: React.FC<RoutingGroupModalProps> = ({
open,
mode,
initialValue,
availableStrategies,
strategyDescriptions,
modelOptions,
existingGroupNames,
onClose,
onSubmit,
saving,
}) => {
const [form] = Form.useForm<FormValues>();
const selectedStrategy = Form.useWatch("routing_strategy", form);
const initialValues: FormValues = {
group_name: initialValue?.group_name ?? "",
models: initialValue?.models ?? [],
routing_strategy: initialValue?.routing_strategy ?? availableStrategies[0] ?? "simple-shuffle",
routing_strategy_args: initialValue?.routing_strategy_args
? JSON.stringify(initialValue.routing_strategy_args, null, 2)
: "",
};
const reservedNames = useMemo(() => {
const others = existingGroupNames.filter((n) => n !== initialValue?.group_name);
return new Set(others.map((n) => n.toLowerCase()));
}, [existingGroupNames, initialValue]);
const handleSubmit = async () => {
const values = await form.validateFields();
const strategySupportsArgs = STRATEGIES_WITH_ARGS.has(String(values.routing_strategy));
let parsedArgs: Record<string, unknown> | null = null;
if (strategySupportsArgs && values.routing_strategy_args && values.routing_strategy_args.trim()) {
try {
parsedArgs = JSON.parse(values.routing_strategy_args);
} catch {
form.setFields([
{
name: "routing_strategy_args",
errors: ["Must be valid JSON"],
},
]);
return;
}
}
await onSubmit({
group_name: values.group_name.trim(),
models: values.models,
routing_strategy: values.routing_strategy,
routing_strategy_args: parsedArgs,
});
};
return (
<Modal
title={mode === "create" ? "Create Routing Group" : `Edit ${initialValue?.group_name ?? ""}`}
open={open}
onCancel={onClose}
onOk={handleSubmit}
okText={mode === "create" ? "Create Group" : "Save Changes"}
cancelText="Cancel"
confirmLoading={saving}
destroyOnClose
width={560}
>
<Form<FormValues>
key={mode === "edit" ? `edit-${initialValue?.group_name ?? ""}` : "create"}
form={form}
layout="vertical"
preserve={false}
initialValues={initialValues}
>
<Form.Item
label="Group Name"
name="group_name"
rules={[
{ required: true, message: "Group name is required" },
{ max: GROUP_NAME_MAX_LENGTH, message: `Must be ${GROUP_NAME_MAX_LENGTH} characters or fewer` },
{
pattern: GROUP_NAME_PATTERN,
message: "Only letters, numbers, dot, underscore, and dash are allowed",
},
{
validator: (_, value: string) => {
if (!value) return Promise.resolve();
if (reservedNames.has(value.trim().toLowerCase())) {
return Promise.reject(new Error("A group with this name already exists"));
}
return Promise.resolve();
},
},
]}
extra="Use this name as the model in API calls — LiteLLM routes the request to one of the group's models."
>
<Input placeholder="fast-chat" disabled={mode === "edit"} />
</Form.Item>
<Form.Item
label="Models"
name="models"
rules={[{ required: true, message: "Select at least one model" }]}
extra="Models from your model list that this group routes between."
>
<Select
mode="multiple"
allowClear
placeholder="Select models"
options={modelOptions.map((m) => ({ label: m, value: m }))}
optionFilterProp="label"
/>
</Form.Item>
<Form.Item
label="Routing Strategy"
name="routing_strategy"
rules={[{ required: true, message: "Strategy is required" }]}
>
<Select
options={availableStrategies.map((s) => ({ label: s, value: s }))}
placeholder="Select strategy"
/>
</Form.Item>
{selectedStrategy && strategyDescriptions[selectedStrategy] && (
<Paragraph className="text-xs text-gray-500 -mt-2 mb-4">
{strategyDescriptions[selectedStrategy]}
</Paragraph>
)}
{STRATEGIES_WITH_ARGS.has(String(selectedStrategy)) && (
<Form.Item
label="Strategy Arguments (JSON)"
name="routing_strategy_args"
extra={
selectedStrategy === "latency-based-routing"
? "Example: { \"ttl\": 3600, \"lowest_latency_buffer\": 0 }"
: "Example: { \"ttl\": 60 }"
}
>
<Input.TextArea
rows={4}
placeholder='{ "ttl": 3600 }'
className="font-mono text-xs"
/>
</Form.Item>
)}
<Space direction="vertical" className="w-full mt-2">
<Text type="secondary" className="text-xs">
Models not claimed by an explicit group fall through to the proxy&apos;s top-level routing
strategy.
</Text>
</Space>
</Form>
</Modal>
);
};
export default RoutingGroupModal;

View File

@ -0,0 +1,241 @@
"use client";
import React, { useState } from "react";
import { Flex, Table, Tabs, Tag, Tooltip, Typography, Button } from "antd";
import type { ColumnsType } from "antd/es/table";
import { BranchesOutlined, DeleteOutlined, EditOutlined, CodeOutlined } from "@ant-design/icons";
import type { RoutingGroup } from "./types";
const { Text, Paragraph } = Typography;
interface RoutingGroupsTableProps {
groups: RoutingGroup[];
loading?: boolean;
onEdit: (group: RoutingGroup) => void;
onDelete: (group: RoutingGroup) => void;
proxyBaseUrl?: string;
}
const formatStrategyLabel = (strategy: string): string => {
switch (strategy) {
case "simple-shuffle":
return "Simple Shuffle";
case "least-busy":
return "Least Busy";
case "usage-based-routing":
return "Usage Based";
case "latency-based-routing":
return "Latency Based";
default:
return strategy;
}
};
const resolveBaseUrl = (proxyBaseUrl?: string): string => {
if (proxyBaseUrl && proxyBaseUrl.trim()) return proxyBaseUrl;
if (typeof window !== "undefined" && window.location?.origin) return window.location.origin;
return "<your_proxy_base_url>";
};
const exampleModel = (group: RoutingGroup): string => group.models[0] ?? "<your-model>";
const buildCurlSnippet = (group: RoutingGroup, baseUrl: string): string =>
`curl -X POST '${baseUrl}/v1/chat/completions' \\
-H 'Content-Type: application/json' \\
-H 'Authorization: Bearer $LITELLM_API_KEY' \\
-d '{
"model": "${exampleModel(group)}",
"messages": [{"role": "user", "content": "Hello!"}]
}'`;
const buildPythonSnippet = (group: RoutingGroup, baseUrl: string): string =>
`from openai import OpenAI
client = OpenAI(
api_key="$LITELLM_API_KEY",
base_url="${baseUrl}",
)
response = client.chat.completions.create(
model="${exampleModel(group)}",
messages=[{"role": "user", "content": "Hello!"}],
)
print(response)`;
const buildJsSnippet = (group: RoutingGroup, baseUrl: string): string =>
`import OpenAI from "openai";
const client = new OpenAI({
apiKey: process.env.LITELLM_API_KEY,
baseURL: "${baseUrl}",
});
const response = await client.chat.completions.create({
model: "${exampleModel(group)}",
messages: [{ role: "user", content: "Hello!" }],
});
console.log(response);`;
interface RoutingGroupSnippetProps {
group: RoutingGroup;
baseUrl: string;
}
const SNIPPET_BLOCK_STYLE: React.CSSProperties = {
backgroundColor: "#111827",
color: "#f3f4f6",
borderRadius: 6,
padding: 16,
fontSize: 12,
whiteSpace: "pre",
overflowX: "auto",
};
const RoutingGroupSnippet: React.FC<RoutingGroupSnippetProps> = ({ group, baseUrl }) => {
const snippets = {
curl: buildCurlSnippet(group, baseUrl),
python: buildPythonSnippet(group, baseUrl),
javascript: buildJsSnippet(group, baseUrl),
} as const;
type SnippetKey = keyof typeof snippets;
const [activeKey, setActiveKey] = useState<SnippetKey>("curl");
const items = [
{ key: "curl", label: "cURL" },
{ key: "python", label: "Python (OpenAI SDK)" },
{ key: "javascript", label: "JavaScript (OpenAI SDK)" },
].map(({ key, label }) => ({
key,
label,
children: (
<Paragraph code className="!mb-0" style={SNIPPET_BLOCK_STYLE}>
{snippets[key as SnippetKey]}
</Paragraph>
),
}));
return (
<Tabs
size="small"
activeKey={activeKey}
onChange={(k) => setActiveKey(k as SnippetKey)}
items={items}
tabBarExtraContent={
<Paragraph
copyable={{ text: snippets[activeKey], tooltips: ["Copy", "Copied"] }}
className="!mb-0"
/>
}
/>
);
};
const RoutingGroupsTable: React.FC<RoutingGroupsTableProps> = ({
groups,
loading,
onEdit,
onDelete,
proxyBaseUrl,
}) => {
const [expandedRowKeys, setExpandedRowKeys] = useState<React.Key[]>([]);
const baseUrl = resolveBaseUrl(proxyBaseUrl);
const columns: ColumnsType<RoutingGroup> = [
{
title: "GROUP NAME",
dataIndex: "group_name",
key: "group_name",
render: (name: string) => (
<Text strong className="text-blue-600">
{name}
</Text>
),
},
{
title: "MODELS",
dataIndex: "models",
key: "models",
render: (models: string[]) => (
<Flex wrap="wrap" gap={4}>
{models.map((m) => (
<Tag key={m}>{m}</Tag>
))}
</Flex>
),
},
{
title: "STRATEGY",
dataIndex: "routing_strategy",
key: "routing_strategy",
render: (strategy: string) => (
<span className="inline-flex items-center gap-1.5">
<BranchesOutlined className="text-gray-400" />
<Text>{formatStrategyLabel(strategy)}</Text>
</span>
),
},
{
title: "ACTIONS",
key: "actions",
width: 120,
align: "right",
render: (_, group) => (
<Flex justify="flex-end" align="center" gap={8}>
<Tooltip title="Edit">
<Button
type="text"
icon={<EditOutlined />}
onClick={(e) => {
e.stopPropagation();
onEdit(group);
}}
/>
</Tooltip>
<Tooltip title="Delete">
<Button
type="text"
danger
icon={<DeleteOutlined />}
onClick={(e) => {
e.stopPropagation();
onDelete(group);
}}
/>
</Tooltip>
</Flex>
),
},
];
return (
<Table<RoutingGroup>
rowKey="group_name"
columns={columns}
dataSource={groups}
loading={loading}
pagination={false}
expandable={{
expandedRowKeys,
onExpandedRowsChange: (keys) => setExpandedRowKeys([...keys]),
expandedRowRender: (group) => (
<div className="bg-gray-50 border border-gray-200 rounded-md p-4 my-2">
<Flex align="center" gap={8} className="mb-2">
<CodeOutlined className="text-blue-500" />
<Text strong>How routing works for this group</Text>
</Flex>
<Paragraph className="text-sm text-gray-600 mb-3">
Callers request any model in the group by name LiteLLM picks a deployment behind the
scenes using the{" "}
<Text strong>{formatStrategyLabel(group.routing_strategy)}</Text> strategy.
</Paragraph>
<RoutingGroupSnippet group={group} baseUrl={baseUrl} />
</div>
),
}}
/>
);
};
export default RoutingGroupsTable;

View File

@ -0,0 +1,177 @@
"use client";
import React, { useMemo, useState } from "react";
import { Button, Card, Flex, Input, Modal, Space, Typography } from "antd";
import { PlusOutlined, ReloadOutlined, SearchOutlined } from "@ant-design/icons";
import { useRoutingGroups, useSaveRoutingGroups } from "@/app/(dashboard)/hooks/routingGroups/useRoutingGroups";
import { useRouterFields } from "@/app/(dashboard)/hooks/router/useRouterFields";
import { useModelHub } from "@/app/(dashboard)/hooks/models/useModels";
import useProxySettings from "@/app/(dashboard)/hooks/proxySettings/useProxySettings";
import RoutingGroupsTable from "./RoutingGroupsTable";
import RoutingGroupModal from "./RoutingGroupModal";
import NotificationsManager from "../molecules/notifications_manager";
import type { RoutingGroup } from "./types";
const { Text } = Typography;
const RoutingGroups: React.FC = () => {
const { data, isLoading, refetch, isFetching } = useRoutingGroups();
const { data: routerFields } = useRouterFields();
const { data: modelHub } = useModelHub();
const proxySettings = useProxySettings();
const saveMutation = useSaveRoutingGroups();
const [searchQuery, setSearchQuery] = useState("");
const [drawerOpen, setDrawerOpen] = useState(false);
const [drawerMode, setDrawerMode] = useState<"create" | "edit">("create");
const [editingGroup, setEditingGroup] = useState<RoutingGroup | null>(null);
const [deletingGroup, setDeletingGroup] = useState<RoutingGroup | null>(null);
const groups = data?.routingGroups ?? [];
const filteredGroups = useMemo(() => {
const q = searchQuery.trim().toLowerCase();
if (!q) return groups;
return groups.filter(
(g) =>
g.group_name.toLowerCase().includes(q) ||
g.routing_strategy.toLowerCase().includes(q) ||
g.models.some((m) => m.toLowerCase().includes(q)),
);
}, [groups, searchQuery]);
const availableStrategies = useMemo(() => {
if (data?.availableStrategies?.length) return data.availableStrategies;
const fromFields = routerFields?.fields?.find((f) => f.field_name === "routing_strategy")?.options;
return fromFields ?? [];
}, [data?.availableStrategies, routerFields]);
const strategyDescriptions = routerFields?.routing_strategy_descriptions ?? {};
const modelOptions = useMemo<string[]>(() => {
const records = (modelHub?.data ?? []) as Array<{ model_group?: string }>;
const names = records.map((r) => r.model_group).filter((n): n is string => Boolean(n));
return Array.from(new Set(names));
}, [modelHub]);
const openCreate = () => {
setDrawerMode("create");
setEditingGroup(null);
setDrawerOpen(true);
};
const openEdit = (group: RoutingGroup) => {
setDrawerMode("edit");
setEditingGroup(group);
setDrawerOpen(true);
};
const handleSubmit = async (incoming: RoutingGroup) => {
const next: RoutingGroup[] =
drawerMode === "create"
? [...groups, incoming]
: groups.map((g) => (g.group_name === editingGroup?.group_name ? incoming : g));
try {
await saveMutation.mutateAsync(next);
NotificationsManager.success(
drawerMode === "create"
? `Created routing group "${incoming.group_name}"`
: `Updated routing group "${incoming.group_name}"`,
);
setDrawerOpen(false);
} catch (err) {
NotificationsManager.error(
err instanceof Error ? err.message : "Failed to save routing group",
);
}
};
const confirmDelete = async () => {
if (!deletingGroup) return;
const next = groups.filter((g) => g.group_name !== deletingGroup.group_name);
try {
await saveMutation.mutateAsync(next);
NotificationsManager.success(`Deleted routing group "${deletingGroup.group_name}"`);
setDeletingGroup(null);
} catch (err) {
NotificationsManager.error(
err instanceof Error ? err.message : "Failed to delete routing group",
);
}
};
return (
<Space direction="vertical" size={16} className="w-full">
<Card bodyStyle={{ padding: 16 }}>
<Flex justify="space-between" align="center" gap={12} className="mb-4">
<Input
allowClear
prefix={<SearchOutlined className="text-gray-400" />}
placeholder="Search groups..."
value={searchQuery}
onChange={(e) => setSearchQuery(e.target.value)}
className="max-w-sm"
/>
<Flex align="center" gap={12}>
<Button
icon={<ReloadOutlined />}
onClick={() => refetch()}
loading={isFetching && !isLoading}
>
Refresh
</Button>
<Button type="primary" icon={<PlusOutlined />} onClick={openCreate}>
Create Group
</Button>
<Text type="secondary" className="text-sm whitespace-nowrap">
Showing {filteredGroups.length} {filteredGroups.length === 1 ? "result" : "results"}
</Text>
</Flex>
</Flex>
<RoutingGroupsTable
groups={filteredGroups}
loading={isLoading}
onEdit={openEdit}
onDelete={(g) => setDeletingGroup(g)}
proxyBaseUrl={
proxySettings.LITELLM_UI_API_DOC_BASE_URL?.trim() ||
proxySettings.PROXY_BASE_URL ||
""
}
/>
</Card>
<RoutingGroupModal
open={drawerOpen}
mode={drawerMode}
initialValue={editingGroup}
availableStrategies={availableStrategies}
strategyDescriptions={strategyDescriptions}
modelOptions={modelOptions}
existingGroupNames={groups.map((g) => g.group_name)}
onClose={() => setDrawerOpen(false)}
onSubmit={handleSubmit}
saving={saveMutation.isPending}
/>
<Modal
open={Boolean(deletingGroup)}
title="Delete routing group?"
okText="Delete"
okButtonProps={{ danger: true, loading: saveMutation.isPending }}
cancelText="Cancel"
onOk={confirmDelete}
onCancel={() => setDeletingGroup(null)}
>
<Text>
Models in <Text strong>{deletingGroup?.group_name}</Text> will fall back to the proxy&apos;s
top-level routing strategy. This cannot be undone.
</Text>
</Modal>
</Space>
);
};
export default RoutingGroups;

View File

@ -0,0 +1,12 @@
export type RoutingStrategy =
| "simple-shuffle"
| "least-busy"
| "usage-based-routing"
| "latency-based-routing";
export interface RoutingGroup {
group_name: string;
models: string[];
routing_strategy: RoutingStrategy | string;
routing_strategy_args?: Record<string, unknown> | null;
}

View File

@ -3,6 +3,91 @@ import { cleanup } from "@testing-library/react";
import React from "react";
import { afterEach, vi } from "vitest";
const ensureTestLocalStorage = () => {
if (typeof window === "undefined" || typeof window.Storage === "undefined") {
return;
}
if (typeof window.localStorage?.getItem === "function" && typeof window.localStorage?.clear === "function") {
return;
}
const storageStores = new WeakMap<Storage, Map<string, string>>();
const storagePrototype = window.Storage.prototype;
const getStore = (storage: Storage) => {
let store = storageStores.get(storage);
if (store === undefined) {
store = new Map<string, string>();
storageStores.set(storage, store);
}
return store;
};
Object.defineProperties(storagePrototype, {
getItem: {
configurable: true,
writable: true,
value(this: Storage, key: string) {
const store = getStore(this);
const normalizedKey = String(key);
return store.has(normalizedKey) ? store.get(normalizedKey)! : null;
},
},
setItem: {
configurable: true,
writable: true,
value(this: Storage, key: string, value: string) {
const store = getStore(this);
store.set(String(key), String(value));
},
},
removeItem: {
configurable: true,
writable: true,
value(this: Storage, key: string) {
const store = getStore(this);
store.delete(String(key));
},
},
clear: {
configurable: true,
writable: true,
value(this: Storage) {
const store = getStore(this);
store.clear();
},
},
key: {
configurable: true,
writable: true,
value(this: Storage, index: number) {
const store = getStore(this);
return Array.from(store.keys())[index] ?? null;
},
},
});
const localStorage = Object.create(storagePrototype);
storageStores.set(localStorage, new Map<string, string>());
Object.defineProperty(localStorage, "length", {
configurable: true,
get() {
return getStore(localStorage).size;
},
});
Object.defineProperty(window, "localStorage", {
configurable: true,
value: localStorage,
});
Object.defineProperty(globalThis, "localStorage", {
configurable: true,
value: localStorage,
});
};
ensureTestLocalStorage();
// Global mock for NotificationManager to prevent React rendering issues in tests
// This avoids "window is not defined" errors when notifications try to render
// after test environment is torn down
@ -31,7 +116,15 @@ vi.mock("@tremor/react", async (importOriginal) => {
return React.createElement(React.Fragment, null, children);
},
// Render as a plain checkbox so toggle interactions are testable without Tremor internals
Switch: ({ checked, onChange, className }: { checked?: boolean; onChange?: (v: boolean) => void; className?: string }) =>
Switch: ({
checked,
onChange,
className,
}: {
checked?: boolean;
onChange?: (v: boolean) => void;
className?: string;
}) =>
React.createElement("input", {
type: "checkbox",
role: "switch",

View File

@ -14,7 +14,7 @@
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"jsx": "react-jsx",
"incremental": true,
"plugins": [
{

File diff suppressed because one or more lines are too long

687
uv.lock generated

File diff suppressed because it is too large Load Diff