Merge remote-tracking branch 'upstream/litellm_internal_staging' into codex/skills-containers-tenant-guard

# Conflicts: # litellm/proxy/auth/auth_utils.py
2026-05-05 01:41:25 +00:00 · 2026-05-05 01:41:25 +00:00 · 3dcb6bd3f9
commit 3dcb6bd3f9
parent 4699b3dc81 281296f9cf
80 changed files with 6747 additions and 1183 deletions
--- a/cookbook/litellm-ollama-docker-image/requirements.txt
+++ b/cookbook/litellm-ollama-docker-image/requirements.txt
@ -1 +1 @@
-litellm==1.83.5
+litellm==1.83.14
--- a/enterprise/enterprise_hooks/banned_keywords.py
+++ b/enterprise/enterprise_hooks/banned_keywords.py
@ -11,6 +11,10 @@ from typing import Literal
 import litellm
 from litellm.caching.caching import DualCache
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.guardrails._content_utils import (
+    is_text_content_call_type,
+    iter_message_text,
+)
 from litellm.integrations.custom_logger import CustomLogger
 from litellm._logging import verbose_proxy_logger
 from fastapi import HTTPException
@ -73,10 +77,9 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
            - check if user id part of blocked list
            """
            self.print_verbose("Inside Banned Keyword List Pre-Call Hook")
-            if call_type == "completion" and "messages" in data:
-                for m in data["messages"]:
-                    if "content" in m and isinstance(m["content"], str):
-                        self.test_violation(test_str=m["content"])
+            if is_text_content_call_type(call_type):
+                for text in iter_message_text(data):
+                    self.test_violation(test_str=text)

        except HTTPException as e:
            raise e
@ -93,11 +96,16 @@ class _ENTERPRISE_BannedKeywords(CustomLogger):
        user_api_key_dict: UserAPIKeyAuth,
        response,
    ):
-        if isinstance(response, litellm.ModelResponse) and isinstance(
-            response.choices[0], litellm.utils.Choices
-        ):
-            for word in self.banned_keywords_list:
-                self.test_violation(test_str=response.choices[0].message.content or "")
+        if not isinstance(response, litellm.ModelResponse):
+            return
+
+        for choice in response.choices:
+            if not isinstance(choice, litellm.utils.Choices):
+                continue
+            message = getattr(choice, "message", None)
+            content = getattr(message, "content", None)
+            if isinstance(content, str):
+                self.test_violation(test_str=content)

    async def async_post_call_streaming_hook(
        self,
--- a/enterprise/enterprise_hooks/google_text_moderation.py
+++ b/enterprise/enterprise_hooks/google_text_moderation.py
@ -12,6 +12,7 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.guardrails._content_utils import iter_message_text
 from litellm.types.utils import CallTypesLiteral


@ -94,11 +95,9 @@ class _ENTERPRISE_GoogleTextModeration(CustomLogger):
        - Calls Google's Text Moderation API
        - Rejects request if it fails safety check
        """
-        if "messages" in data and isinstance(data["messages"], list):
-            text = ""
-            for m in data["messages"]:  # assume messages is a list
-                if "content" in m and isinstance(m["content"], str):
-                    text += m["content"]
+        # Covers multimodal list content + Responses-API input.
+        text = "".join(iter_message_text(data))
+        if text:
            document = self.language_document(content=text, type_=self.document_type)

            request = self.moderate_text_request(
--- a/enterprise/enterprise_hooks/openai_moderation.py
+++ b/enterprise/enterprise_hooks/openai_moderation.py
@ -19,6 +19,7 @@ import litellm
 from litellm._logging import verbose_proxy_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.guardrails._content_utils import iter_message_text
 from litellm.types.utils import CallTypesLiteral


@ -37,11 +38,8 @@ class _ENTERPRISE_OpenAI_Moderation(CustomLogger):
        user_api_key_dict: UserAPIKeyAuth,
        call_type: CallTypesLiteral,
    ):
-        text = ""
-        if "messages" in data and isinstance(data["messages"], list):
-            for m in data["messages"]:  # assume messages is a list
-                if "content" in m and isinstance(m["content"], str):
-                    text += m["content"]
+        # Covers multimodal list content + Responses-API input.
+        text = "".join(iter_message_text(data))

        from litellm.proxy.proxy_server import llm_router

--- a/enterprise/litellm_enterprise/enterprise_callbacks/secret_detection.py
+++ b/enterprise/litellm_enterprise/enterprise_callbacks/secret_detection.py
@ -18,6 +18,7 @@ from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_guardrail import CustomGuardrail
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.guardrails._content_utils import walk_user_text

 GUARDRAIL_NAME = "hide_secrets"

@ -473,23 +474,19 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
        if await self.should_run_check(user_api_key_dict) is False:
            return

-        if "messages" in data and isinstance(data["messages"], list):
-            for message in data["messages"]:
-                if "content" in message and isinstance(message["content"], str):
-                    detected_secrets = self.scan_message_for_secrets(message["content"])
+        # Covers multimodal list content + Responses-API input.
+        def _redact_message_text(text: str) -> str:
+            detected_secrets = self.scan_message_for_secrets(text)
+            for secret in detected_secrets:
+                text = text.replace(secret["value"], "[REDACTED]")
+            if detected_secrets:
+                secret_types = [secret["type"] for secret in detected_secrets]
+                verbose_proxy_logger.warning(
+                    f"Detected and redacted secrets in message: {secret_types}"
+                )
+            return text

-                    for secret in detected_secrets:
-                        message["content"] = message["content"].replace(
-                            secret["value"], "[REDACTED]"
-                        )
-
-                    if len(detected_secrets) > 0:
-                        secret_types = [secret["type"] for secret in detected_secrets]
-                        verbose_proxy_logger.warning(
-                            f"Detected and redacted secrets in message: {secret_types}"
-                        )
-                    else:
-                        verbose_proxy_logger.debug("No secrets detected on input.")
+        walk_user_text(data, _redact_message_text)

        if "prompt" in data:
            if isinstance(data["prompt"], str):
@ -504,11 +501,15 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
                        f"Detected and redacted secrets in prompt: {secret_types}"
                    )
            elif isinstance(data["prompt"], list):
-                for item in data["prompt"]:
+                # Index back into the list — assigning to ``item`` would only
+                # rebind the loop variable and leave ``data["prompt"]``
+                # carrying the unredacted secret.
+                for idx, item in enumerate(data["prompt"]):
                    if isinstance(item, str):
                        detected_secrets = self.scan_message_for_secrets(item)
                        for secret in detected_secrets:
                            item = item.replace(secret["value"], "[REDACTED]")
+                        data["prompt"][idx] = item
                        if len(detected_secrets) > 0:
                            secret_types = [
                                secret["type"] for secret in detected_secrets
@ -517,31 +518,6 @@ class _ENTERPRISE_SecretDetection(CustomGuardrail):
                                f"Detected and redacted secrets in prompt: {secret_types}"
                            )

-        if "input" in data:
-            if isinstance(data["input"], str):
-                detected_secrets = self.scan_message_for_secrets(data["input"])
-                for secret in detected_secrets:
-                    data["input"] = data["input"].replace(secret["value"], "[REDACTED]")
-                if len(detected_secrets) > 0:
-                    secret_types = [secret["type"] for secret in detected_secrets]
-                    verbose_proxy_logger.warning(
-                        f"Detected and redacted secrets in input: {secret_types}"
-                    )
-            elif isinstance(data["input"], list):
-                _input_in_request = data["input"]
-                for idx, item in enumerate(_input_in_request):
-                    if isinstance(item, str):
-                        detected_secrets = self.scan_message_for_secrets(item)
-                        for secret in detected_secrets:
-                            _input_in_request[idx] = item.replace(
-                                secret["value"], "[REDACTED]"
-                            )
-                        if len(detected_secrets) > 0:
-                            secret_types = [
-                                secret["type"] for secret in detected_secrets
-                            ]
-                            verbose_proxy_logger.warning(
-                                f"Detected and redacted secrets in input: {secret_types}"
-                            )
-                verbose_proxy_logger.debug("Data after redacting input %s", data)
+        # ``data["input"]`` (Responses API and embeddings/moderation) is
+        # already covered by ``walk_user_text`` above.
        return
--- a/enterprise/pyproject.toml
+++ b/enterprise/pyproject.toml
@ -16,7 +16,7 @@ Repository = "https://github.com/BerriAI/litellm"
 Documentation = "https://docs.litellm.ai"

 [build-system]
-requires = ["uv_build==0.10.7"]
+requires = ["uv_build==0.11.8"]
 build-backend = "uv_build"

 [tool.uv]
--- a/litellm-js/proxy/package-lock.json
+++ b/litellm-js/proxy/package-lock.json
--- a/litellm-js/proxy/package.json
+++ b/litellm-js/proxy/package.json
@ -4,11 +4,11 @@
    "deploy": "wrangler deploy --minify src/index.ts"
  },
  "dependencies": {
-    "hono": "4.12.12",
+    "hono": "4.12.16",
    "openai": "4.29.2"
  },
  "devDependencies": {
-    "@cloudflare/workers-types": "4.20240208.0",
-    "wrangler": "3.32.0"
+    "@cloudflare/workers-types": "4.20260501.1",
+    "wrangler": "4.87.0"
  }
 }
--- a/litellm-js/spend-logs/package-lock.json
+++ b/litellm-js/spend-logs/package-lock.json
@ -6,7 +6,7 @@
    "": {
      "dependencies": {
        "@hono/node-server": "1.19.13",
-        "hono": "4.12.12"
+        "hono": "4.12.16"
      },
      "devDependencies": {
        "@types/node": "20.19.25",
@ -548,9 +548,9 @@
      }
    },
    "node_modules/hono": {
-      "version": "4.12.12",
-      "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.12.tgz",
-      "integrity": "sha512-p1JfQMKaceuCbpJKAPKVqyqviZdS0eUxH9v82oWo1kb9xjQ5wA6iP3FNVAPDFlz5/p7d45lO+BpSk1tuSZMF4Q==",
+      "version": "4.12.16",
+      "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.16.tgz",
+      "integrity": "sha512-jN0ZewiNAWSe5khM3EyCmBb250+b40wWbwNILNfEvq84VREWwOIkuUsFONk/3i3nqkz7Oe1PcpM2mwQEK2L9Kg==",
      "license": "MIT",
      "engines": {
        "node": ">=16.9.0"
--- a/litellm-js/spend-logs/package.json
+++ b/litellm-js/spend-logs/package.json
@ -4,7 +4,7 @@
  },
  "dependencies": {
    "@hono/node-server": "1.19.13",
-    "hono": "4.12.12"
+    "hono": "4.12.16"
  },
  "devDependencies": {
    "@types/node": "20.19.25",
--- a/litellm-proxy-extras/pyproject.toml
+++ b/litellm-proxy-extras/pyproject.toml
@ -16,7 +16,7 @@ Repository = "https://github.com/BerriAI/litellm"
 Documentation = "https://docs.litellm.ai"

 [build-system]
-requires = ["uv_build==0.10.7"]
+requires = ["uv_build==0.11.8"]
 build-backend = "uv_build"

 [tool.uv]
--- a/litellm/init.py
+++ b/litellm/init.py
@ -166,7 +166,7 @@ langfuse_default_tags: Optional[List[str]] = None
 langsmith_batch_size: Optional[int] = None
 prometheus_initialize_budget_metrics: Optional[bool] = False
 prometheus_latency_buckets: Optional[List[float]] = None
-require_auth_for_metrics_endpoint: Optional[bool] = False
+require_auth_for_metrics_endpoint: Optional[bool] = True
 argilla_batch_size: Optional[int] = None
 datadog_use_v1: Optional[bool] = False  # if you want to use v1 datadog logged payload.
 gcs_pub_sub_use_v1: Optional[bool] = (
--- a/litellm/integrations/arize/arize_phoenix_prompt_manager.py
+++ b/litellm/integrations/arize/arize_phoenix_prompt_manager.py
@ -5,7 +5,8 @@ Fetches prompt versions from Arize Phoenix and provides workspace-based access c

 from typing import Any, Dict, List, Optional, Tuple, Union

-from jinja2 import DictLoader, Environment, select_autoescape
+from jinja2 import DictLoader, select_autoescape
+from jinja2.sandbox import ImmutableSandboxedEnvironment

 from litellm.integrations.custom_prompt_management import CustomPromptManagement
 from litellm.integrations.prompt_management_base import (
@ -74,7 +75,13 @@ class ArizePhoenixTemplateManager:
            api_key=self.api_key, api_base=self.api_base
        )

-        self.jinja_env = Environment(
+        # Templates fetched from Arize Phoenix come from external workspace
+        # users; in a plain `Environment()` a malicious template could reach
+        # `__class__.__init__.__globals__` and execute arbitrary code on the
+        # proxy host. The sandbox blocks that attribute traversal while
+        # leaving normal `{{ var }}` substitution intact. Matches the
+        # dotprompt manager's hardening.
+        self.jinja_env = ImmutableSandboxedEnvironment(
            loader=DictLoader({}),
            autoescape=select_autoescape(["html", "xml"]),
            # Use Mustache/Handlebars-style delimiters
--- a/litellm/integrations/bitbucket/bitbucket_prompt_manager.py
+++ b/litellm/integrations/bitbucket/bitbucket_prompt_manager.py
@ -5,7 +5,8 @@ Fetches .prompt files from BitBucket repositories and provides team-based access

 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union

-from jinja2 import DictLoader, Environment, select_autoescape
+from jinja2 import DictLoader, select_autoescape
+from jinja2.sandbox import ImmutableSandboxedEnvironment

 from litellm.integrations.custom_prompt_management import CustomPromptManagement

@ -74,7 +75,13 @@ class BitBucketTemplateManager:
        self.prompts: Dict[str, BitBucketPromptTemplate] = {}
        self.bitbucket_client = BitBucketClient(bitbucket_config)

-        self.jinja_env = Environment(
+        # Templates fetched from a BitBucket repo are not trustworthy:
+        # anyone with repo write access can ship Jinja syntax that, in a
+        # plain `Environment()`, would reach `__class__.__init__.__globals__`
+        # and pivot into RCE on the proxy host. The sandbox blocks that
+        # attribute traversal while leaving normal `{{ var }}` substitution
+        # intact. Matches the dotprompt manager's hardening.
+        self.jinja_env = ImmutableSandboxedEnvironment(
            loader=DictLoader({}),
            autoescape=select_autoescape(["html", "xml"]),
            # Use Handlebars-style delimiters to match Dotprompt spec
--- a/litellm/integrations/gitlab/gitlab_prompt_manager.py
+++ b/litellm/integrations/gitlab/gitlab_prompt_manager.py
@ -4,7 +4,8 @@ GitLab prompt manager with configurable prompts folder.

 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union

-from jinja2 import DictLoader, Environment, select_autoescape
+from jinja2 import DictLoader, select_autoescape
+from jinja2.sandbox import ImmutableSandboxedEnvironment

 from litellm.integrations.custom_prompt_management import CustomPromptManagement

@ -90,7 +91,13 @@ class GitLabTemplateManager:
            or ""
        ).strip("/")

-        self.jinja_env = Environment(
+        # Templates fetched from a GitLab repo are not trustworthy:
+        # anyone with repo write access can ship Jinja syntax that, in a
+        # plain `Environment()`, would reach `__class__.__init__.__globals__`
+        # and pivot into RCE on the proxy host. The sandbox blocks that
+        # attribute traversal while leaving normal `{{ var }}` substitution
+        # intact. Matches the dotprompt manager's hardening.
+        self.jinja_env = ImmutableSandboxedEnvironment(
            loader=DictLoader({}),
            autoescape=select_autoescape(["html", "xml"]),
            variable_start_string="{{",
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -617,13 +617,12 @@ class LiteLLMRoutes(enum.Enum):
            "/",
            "/health/liveliness",
            "/health/liveness",
-            "/health/readiness",
            "/test",
            "/config/yaml",
-            "/metrics",
            "/litellm/.well-known/litellm-ui-config",
            "/.well-known/litellm-ui-config",
            "/public/model_hub",
+            "/public/model_hub/info",
            "/public/agent_hub",
            "/public/mcp_hub",
            "/public/skill_hub",
--- a/litellm/proxy/auth/auth_utils.py
+++ b/litellm/proxy/auth/auth_utils.py
@ -216,20 +216,15 @@ _EXTRA_BANNED_OBSERVABILITY_PARAMS: FrozenSet[str] = frozenset(
 def _build_banned_observability_params() -> FrozenSet[str]:
    """Derive the observability ban list from the canonical allowlist.

-    ``_supported_callback_params`` in
+    ``_supported_callback_params`` and ``_request_blocked_callback_params`` in
    ``litellm/litellm_core_utils/initialize_dynamic_callback_params.py`` is
-    the single place that enumerates every observability field
-    integrations resolve from kwargs/metadata. Subtract the small set of
-    informational fields (``_SAFE_CLIENT_CALLBACK_PARAMS``) and union with
-    the extras the canonical allowlist hasn't caught up to yet. New
-    integrations added to the canonical allowlist are banned by default,
-    which is the safe failure mode.
-
-    ``_request_blocked_callback_params`` (e.g. ``gcs_bucket_name``,
-    ``gcs_path_service_account``) is the GCS-logging-specific deny list
-    that lives alongside the allowlist; fold it in here so a single
-    declaration of "this field must not be caller-supplied" covers both
-    the request-body bouncer and the dynamic callback initializer.
+    the single place that enumerates every observability field integrations
+    resolve from kwargs/metadata, plus fields that integration code explicitly
+    blocks from request-supplied callback params. Subtract the small set of
+    informational fields (``_SAFE_CLIENT_CALLBACK_PARAMS``) and union with the
+    extras the canonical allowlist hasn't caught up to yet. New integrations
+    added to the canonical allowlist are banned by default, which is the safe
+    failure mode.
    """
    from litellm.litellm_core_utils.initialize_dynamic_callback_params import (
        _request_blocked_callback_params,
@ -238,8 +233,8 @@ def _build_banned_observability_params() -> FrozenSet[str]:

    return (
        (frozenset(_supported_callback_params) - _SAFE_CLIENT_CALLBACK_PARAMS)
-        | _EXTRA_BANNED_OBSERVABILITY_PARAMS
        | frozenset(_request_blocked_callback_params)
+        | _EXTRA_BANNED_OBSERVABILITY_PARAMS
    )


--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@ -87,6 +87,23 @@ except ImportError as e:

 user_api_key_service_logger_obj = ServiceLogging()  # used for tracking latency on OTEL

+
+def _normalize_public_auth_route(route: str) -> str:
+    if route != "/" and route.endswith("/"):
+        return route.rstrip("/")
+    return route
+
+
+def _route_requires_auth_despite_public(
+    route: str, general_settings: Optional[dict]
+) -> bool:
+    normalized_route = _normalize_public_auth_route(route)
+    if normalized_route == "/metrics":
+        return litellm.require_auth_for_metrics_endpoint is not False
+
+    return False
+
+
 custom_litellm_key_header = APIKeyHeader(
    name=SpecialHeaders.custom_litellm_api_key.value,
    auto_error=False,
@ -714,7 +731,9 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
        """

        ######## Route Checks Before Reading DB / Cache for "token" ################
-        if (
+        if not _route_requires_auth_despite_public(
+            route=route, general_settings=general_settings
+        ) and (
            route in LiteLLMRoutes.public_routes.value  # type: ignore
            or route_in_additonal_public_routes(current_route=route)
        ):
@ -1698,7 +1717,7 @@ async def _run_centralized_common_checks(
        user_custom_auth,
    )

-    # Public routes (e.g. /health/readiness, /metrics) are exempt from
+    # Public routes (e.g. /health/liveness) are exempt from
    # auth in the builder — the wrapper must not retroactively apply
    # authz on top, or k8s readiness probes and other unauthenticated
    # callers get 401.
--- a/litellm/proxy/common_utils/debug_utils.py
+++ b/litellm/proxy/common_utils/debug_utils.py
@ -50,7 +50,10 @@ def configure_gc_thresholds():
 configure_gc_thresholds()


-@router.get("/debug/asyncio-tasks")
+@router.get(
+    "/debug/asyncio-tasks",
+    dependencies=[Depends(user_api_key_auth)],
+)
 async def get_active_tasks_stats():
    """
    Returns:
@ -103,7 +106,11 @@ if os.environ.get("LITELLM_PROFILE", "false").lower() == "true":

    tracemalloc.start(10)

-    @router.get("/memory-usage", include_in_schema=False)
+    @router.get(
+        "/memory-usage",
+        dependencies=[Depends(user_api_key_auth)],
+        include_in_schema=False,
+    )
    async def memory_usage():
        # Take a snapshot of the current memory usage
        snapshot = tracemalloc.take_snapshot()
@ -711,7 +718,11 @@ async def configure_gc_thresholds_endpoint(
    }


-@router.get("/otel-spans", include_in_schema=False)
+@router.get(
+    "/otel-spans",
+    dependencies=[Depends(user_api_key_auth)],
+    include_in_schema=False,
+)
 async def get_otel_spans():
    from litellm.proxy.proxy_server import open_telemetry_logger

--- a/litellm/proxy/guardrails/_content_utils.py
+++ b/litellm/proxy/guardrails/_content_utils.py
@ -0,0 +1,236 @@
+"""
+Shared helpers for guardrail hooks: extract text from a request body
+regardless of whether it uses Chat Completions ``messages``, Responses-API
+``input``, or multimodal list-format ``content`` parts.
+
+Hooks that only check ``data["messages"]`` for string content silently
+skip the other shapes — these helpers normalise that so every hook sees
+every text fragment.
+"""
+
+from typing import Any, Callable, Dict, FrozenSet, Iterator, List
+
+
+# Call types whose body carries free-form chat / prompt text that
+# text-content guardrails (banned keywords, content moderation, secret
+# detection, …) should inspect. The proxy ingress passes ``route_type``
+# straight through as ``call_type``, so the literal values here are
+# what the guardrail dispatcher actually receives:
+#
+#   /v1/chat/completions   -> "acompletion"
+#   /v1/responses          -> "aresponses"
+#
+# ``"completion"`` is included for SDK / internal callers that invoke
+# ``pre_call_hook`` directly with the sync name. Embedding, moderation,
+# audio, and transcription endpoints are deliberately excluded — text
+# guardrails on those paths are a separate scope.
+TEXT_CONTENT_CALL_TYPES: FrozenSet[str] = frozenset(
+    {"completion", "acompletion", "aresponses"}
+)
+
+
+def is_text_content_call_type(call_type: str) -> bool:
+    """Return True if ``call_type`` carries free-form text that text
+    guardrails should inspect (Chat Completions or Responses API)."""
+    return call_type in TEXT_CONTENT_CALL_TYPES
+
+
+def _iter_text_parts_in_content(content: Any) -> Iterator[str]:
+    """Yield text fragments from a ``message.content`` value (string or
+    multimodal list). Non-text parts (images, audio, …) are skipped."""
+    if isinstance(content, str):
+        if content:
+            yield content
+    elif isinstance(content, list):
+        for part in content:
+            if isinstance(part, str):
+                # A bare string in a content/input list is itself a text
+                # fragment (Responses-API mixed-list shape).
+                if part:
+                    yield part
+                continue
+            if not isinstance(part, dict):
+                continue
+            if part.get("type") == "text":
+                text = part.get("text")
+                if isinstance(text, str) and text:
+                    yield text
+
+
+def _coerce_input_to_messages(input_value: Any) -> List[Dict[str, Any]]:
+    """Coerce a Responses-API ``data["input"]`` value into chat-style messages."""
+    if isinstance(input_value, str):
+        return [{"role": "user", "content": input_value}]
+    if isinstance(input_value, list):
+        if input_value and all(
+            isinstance(item, dict) and "role" in item for item in input_value
+        ):
+            return list(input_value)
+        # Mixed lists (content-part dicts + bare strings) and pure
+        # string/dict lists all become a single user message; the content
+        # iterator below handles each element type uniformly.
+        return [{"role": "user", "content": input_value}]
+    return []
+
+
+def _iter_inspection_messages(data: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
+    """Yield every message-like dict, walking ``messages`` AND ``input``."""
+    messages = data.get("messages")
+    if isinstance(messages, list):
+        yield from messages
+    yield from _coerce_input_to_messages(data.get("input"))
+
+
+def iter_message_text(data: Dict[str, Any]) -> Iterator[str]:
+    """Yield every text fragment from ``messages`` AND ``input``.
+
+    Walks every role (user, assistant, system, …) — guardrails inspect
+    the entire conversation, not just user turns.
+    """
+    for message in _iter_inspection_messages(data):
+        if not isinstance(message, dict):
+            continue
+        yield from _iter_text_parts_in_content(message.get("content"))
+
+
+def walk_user_text(data: Dict[str, Any], visit: Callable[[str], str]) -> int:
+    """Rewrite every text fragment in place via ``visit``.
+
+    Mutates ``data["messages"]`` and ``data["input"]``. Returns the number
+    of fragments visited so callers can short-circuit when nothing was
+    inspected.
+    """
+    visited = 0
+
+    def _rewrite_content(content: Any) -> Any:
+        nonlocal visited
+        if isinstance(content, str):
+            if content:
+                visited += 1
+                return visit(content)
+            return content
+        if isinstance(content, list):
+            new_parts: List[Any] = []
+            for part in content:
+                if isinstance(part, str) and part:
+                    visited += 1
+                    new_parts.append(visit(part))
+                elif (
+                    isinstance(part, dict)
+                    and part.get("type") == "text"
+                    and isinstance(part.get("text"), str)
+                    and part["text"]
+                ):
+                    visited += 1
+                    new_parts.append({**part, "text": visit(part["text"])})
+                else:
+                    new_parts.append(part)
+            return new_parts
+        return content
+
+    messages = data.get("messages")
+    if isinstance(messages, list):
+        for message in messages:
+            if isinstance(message, dict) and "content" in message:
+                message["content"] = _rewrite_content(message["content"])
+
+    input_value = data.get("input")
+    if isinstance(input_value, str):
+        if input_value:
+            visited += 1
+            data["input"] = visit(input_value)
+        return visited
+    if isinstance(input_value, list):
+        # List of full messages: rewrite each message's content.
+        if input_value and all(
+            isinstance(item, dict) and "role" in item for item in input_value
+        ):
+            for item in input_value:
+                if "content" in item:
+                    item["content"] = _rewrite_content(item["content"])
+            return visited
+        # List of content parts and/or bare strings: rewrite in place.
+        for idx, item in enumerate(input_value):
+            if isinstance(item, str) and item:
+                visited += 1
+                input_value[idx] = visit(item)
+            elif (
+                isinstance(item, dict)
+                and item.get("type") == "text"
+                and isinstance(item.get("text"), str)
+                and item["text"]
+            ):
+                visited += 1
+                input_value[idx] = {**item, "text": visit(item["text"])}
+        return visited
+
+    return visited
+
+
+def apply_redacted_messages_back(
+    data: Dict[str, Any], redacted_messages: List[Dict[str, Any]]
+) -> None:
+    """Write redacted messages back to whichever field(s) the caller used.
+
+    Mask/anonymize paths take a synthesised messages list (from
+    :func:`build_inspection_messages`), get a redacted version back from a
+    third-party guardrail, and need to rewrite the request body. Writing
+    only to ``data["messages"]`` leaves the Responses-API ``data["input"]``
+    field untouched, so the unredacted text still reaches the LLM.
+
+    This helper updates both fields when both are present.
+    """
+    if "messages" in data:
+        data["messages"] = redacted_messages
+    if isinstance(data.get("input"), str):
+        text_parts: List[str] = []
+        for msg in redacted_messages:
+            if not isinstance(msg, dict):
+                continue
+            text_parts.extend(_iter_text_parts_in_content(msg.get("content")))
+        data["input"] = "\n".join(text_parts)
+
+
+def has_non_string_content(data: Dict[str, Any]) -> bool:
+    """Return True if any inspected content is not a plain string.
+
+    Used by hooks whose mask/redact path operates on string offsets and
+    therefore cannot preserve multimodal non-text parts. Such hooks should
+    degrade to block-on-detect when this returns True so image/audio parts
+    are not silently stripped during in-place masking.
+    """
+    messages = data.get("messages")
+    if isinstance(messages, list):
+        for message in messages:
+            if isinstance(message, dict) and not isinstance(
+                message.get("content"), str
+            ):
+                if message.get("content") is not None:
+                    return True
+    input_value = data.get("input")
+    if input_value is not None and not isinstance(input_value, str):
+        return True
+    return False
+
+
+def build_inspection_messages(data: Dict[str, Any]) -> List[Dict[str, str]]:
+    """Synthesize a chat-style messages list for posting to a guardrail API.
+
+    Each returned message has a plain-string ``content`` — multimodal text
+    parts are joined with newlines and Responses-API ``input`` is lifted
+    into synthetic messages. Messages with no inspectable text are dropped.
+
+    Hooks that POST ``{"messages": [...]}`` to an external service should
+    call this instead of ``data.get("messages", [])`` so the Responses API
+    and multimodal content are covered.
+    """
+    flattened: List[Dict[str, str]] = []
+    for message in _iter_inspection_messages(data):
+        if not isinstance(message, dict):
+            continue
+        text = "\n".join(_iter_text_parts_in_content(message.get("content")))
+        if not text:
+            continue
+        role = message.get("role", "user") or "user"
+        flattened.append({"role": role, "content": text})
+    return flattened
--- a/litellm/proxy/guardrails/guardrail_hooks/aim/aim.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/aim/aim.py
@ -22,6 +22,11 @@ from litellm.llms.custom_httpx.http_handler import (
    httpxSpecialProvider,
 )
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.guardrails._content_utils import (
+    apply_redacted_messages_back,
+    build_inspection_messages,
+    has_non_string_content,
+)
 from litellm.types.utils import (
    CallTypesLiteral,
    Choices,
@ -101,10 +106,11 @@ class AimGuardrail(CustomGuardrail):
            user_email=user_email,
            litellm_call_id=call_id,
        )
+        # Covers multimodal list content + Responses-API input.
        response = await self.async_handler.post(
            f"{self.api_base}/fw/v1/analyze",
            headers=headers,
-            json={"messages": data.get("messages", [])},
+            json={"messages": build_inspection_messages(data)},
        )
        response.raise_for_status()
        res = response.json()
@ -137,13 +143,31 @@ class AimGuardrail(CustomGuardrail):
        redacted_chat = res.get("redacted_chat")
        if not redacted_chat:
            return data
-        data["messages"] = [
+        # Aim returns text-only redacted messages. Overwriting
+        # ``data["messages"]`` with that would silently strip image/audio
+        # parts from a multimodal request — degrade to block so the
+        # multimodal payload is never silently rewritten.
+        if has_non_string_content(data):
+            raise HTTPException(
+                status_code=400,
+                detail=(
+                    "Aim: anonymize action requested for multimodal input "
+                    "but mask-in-place would drop non-text parts. Send the "
+                    "request with plain string content to use anonymize, "
+                    "or rely on block-mode policies."
+                ),
+            )
+        redacted_messages = [
            {
                "role": message["role"],
                "content": message["content"],
            }
            for message in redacted_chat["all_redacted_messages"]
        ]
+        # Write back to ``messages`` AND ``input``. The Responses-API
+        # backend reads ``input``; writing only to ``messages`` would let
+        # unredacted text reach the LLM for ``/v1/responses`` calls.
+        apply_redacted_messages_back(data, redacted_messages)
        return data

    async def call_aim_guardrail_on_output(
@ -162,7 +186,7 @@ class AimGuardrail(CustomGuardrail):
                litellm_call_id=call_id,
            ),
            json={
-                "messages": request_data.get("messages", [])
+                "messages": build_inspection_messages(request_data)
                + [{"role": "assistant", "content": output}]
            },
        )
@ -233,15 +257,33 @@ class AimGuardrail(CustomGuardrail):
        user_api_key_dict: UserAPIKeyAuth,
        response: Union[Any, ModelResponse, EmbeddingResponse, ImageResponse],
    ) -> Any:
-        if (
-            isinstance(response, ModelResponse)
-            and response.choices
-            and isinstance(response.choices[0], Choices)
-        ):
-            content = response.choices[0].message.content or ""
-            aim_output_guardrail_result = await self.call_aim_guardrail_on_output(
-                data, content, hook="output", key_alias=user_api_key_dict.key_alias
-            )
+        if not (isinstance(response, ModelResponse) and response.choices):
+            return response
+        # Inspect every choice — when ``n>1`` the additional completions
+        # used to bypass Aim entirely because the hook only inspected
+        # ``choices[0]``. Run inspections concurrently so multi-completion
+        # responses don't pay an n× latency penalty.
+        choices_to_inspect = [c for c in response.choices if isinstance(c, Choices)]
+        if not choices_to_inspect:
+            return response
+        # ``return_exceptions=True`` lets every inspection finish even if
+        # one fails — without it, the first exception would propagate and
+        # leave the remaining tasks running in the background.
+        results = await asyncio.gather(
+            *(
+                self.call_aim_guardrail_on_output(
+                    data,
+                    choice.message.content or "",
+                    hook="output",
+                    key_alias=user_api_key_dict.key_alias,
+                )
+                for choice in choices_to_inspect
+            ),
+            return_exceptions=True,
+        )
+        for choice, aim_output_guardrail_result in zip(choices_to_inspect, results):
+            if isinstance(aim_output_guardrail_result, BaseException):
+                raise aim_output_guardrail_result
            if aim_output_guardrail_result and aim_output_guardrail_result.get(
                "detection_message"
            ):
@ -252,7 +294,7 @@ class AimGuardrail(CustomGuardrail):
            if aim_output_guardrail_result and aim_output_guardrail_result.get(
                "redacted_output"
            ):
-                response.choices[0].message.content = aim_output_guardrail_result.get(
+                choice.message.content = aim_output_guardrail_result.get(
                    "redacted_output"
                )
        return response
--- a/litellm/proxy/guardrails/guardrail_hooks/azure/text_moderation.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/azure/text_moderation.py
@ -254,15 +254,16 @@ class AzureContentSafetyTextModerationGuardrail(AzureGuardrailBase, CustomGuardr
    ) -> Any:
        from litellm.types.utils import Choices, ModelResponse

-        if (
-            isinstance(response, ModelResponse)
-            and response.choices
-            and isinstance(response.choices[0], Choices)
-        ):
-            content = response.choices[0].message.content or ""
-            await self.async_make_request(
-                text=content,
-            )
+        if isinstance(response, ModelResponse) and response.choices:
+            for choice in response.choices:
+                if not isinstance(choice, Choices):
+                    continue
+                content = _message_content_to_text(choice.message.content)
+                if not content:
+                    continue
+                await self.async_make_request(
+                    text=content,
+                )
        return response

    async def async_post_call_streaming_hook(
@ -279,3 +280,16 @@ class AzureContentSafetyTextModerationGuardrail(AzureGuardrailBase, CustomGuardr

            error_returned = json.dumps({"error": e.detail})
            return f"data: {error_returned}\n\n"
+
+
+def _message_content_to_text(content: Any) -> str:
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        text_parts = [
+            item.get("text")
+            for item in content
+            if isinstance(item, dict) and isinstance(item.get("text"), str)
+        ]
+        return "\n".join(part for part in text_parts if part)
+    return ""
--- a/litellm/proxy/guardrails/guardrail_hooks/ibm_guardrails/ibm_detector.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/ibm_guardrails/ibm_detector.py
@ -20,6 +20,7 @@ from litellm.llms.custom_httpx.http_handler import (
    httpxSpecialProvider,
 )
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.guardrails._content_utils import iter_message_text
 from litellm.types.guardrails import GuardrailEventHooks
 from litellm.types.proxy.guardrails.guardrail_hooks.ibm import (
    IBMDetectorDetection,
@ -463,65 +464,53 @@ class IBMGuardrailDetector(CustomGuardrail):
        if self.should_run_guardrail(data=data, event_type=event_type) is not True:
            return data

-        _messages = data.get("messages")
-        if _messages:
-            contents_to_check: List[str] = []
-            for message in _messages:
-                _content = message.get("content")
-                if isinstance(_content, str):
-                    contents_to_check.append(_content)
+        # Covers multimodal list content + Responses-API input.
+        contents_to_check: List[str] = list(iter_message_text(data))
+        if contents_to_check:
+            if self.is_detector_server:
+                # Call detector server with all contents at once
+                result = await self._call_detector_server(
+                    contents=contents_to_check,
+                    request_data=data,
+                    event_type=GuardrailEventHooks.pre_call,
+                )

-            if contents_to_check:
-                if self.is_detector_server:
-                    # Call detector server with all contents at once
-                    result = await self._call_detector_server(
-                        contents=contents_to_check,
+                verbose_proxy_logger.debug(
+                    "IBM Detector Server async_pre_call_hook result: %s", result
+                )
+
+                # Check if any detections were found
+                has_violations = False
+                for message_detections in result:
+                    filtered = self._filter_detections_by_threshold(message_detections)
+                    if filtered:
+                        has_violations = True
+                        break
+
+                if has_violations and self.block_on_detection:
+                    error_message = self._create_error_message_detector_server(result)
+                    raise ValueError(error_message)
+
+            else:
+                # Call orchestrator for each content separately
+                for content in contents_to_check:
+                    orchestrator_result = await self._call_orchestrator(
+                        content=content,
                        request_data=data,
                        event_type=GuardrailEventHooks.pre_call,
                    )

                    verbose_proxy_logger.debug(
-                        "IBM Detector Server async_pre_call_hook result: %s", result
+                        "IBM Orchestrator async_pre_call_hook result: %s",
+                        orchestrator_result,
                    )

-                    # Check if any detections were found
-                    has_violations = False
-                    for message_detections in result:
-                        filtered = self._filter_detections_by_threshold(
-                            message_detections
-                        )
-                        if filtered:
-                            has_violations = True
-                            break
-
-                    if has_violations and self.block_on_detection:
-                        error_message = self._create_error_message_detector_server(
-                            result
-                        )
-                        raise ValueError(error_message)
-
-                else:
-                    # Call orchestrator for each content separately
-                    for content in contents_to_check:
-                        orchestrator_result = await self._call_orchestrator(
-                            content=content,
-                            request_data=data,
-                            event_type=GuardrailEventHooks.pre_call,
-                        )
-
-                        verbose_proxy_logger.debug(
-                            "IBM Orchestrator async_pre_call_hook result: %s",
-                            orchestrator_result,
-                        )
-
-                        filtered = self._filter_detections_by_threshold(
+                    filtered = self._filter_detections_by_threshold(orchestrator_result)
+                    if filtered and self.block_on_detection:
+                        error_message = self._create_error_message_orchestrator(
                            orchestrator_result
                        )
-                        if filtered and self.block_on_detection:
-                            error_message = self._create_error_message_orchestrator(
-                                orchestrator_result
-                            )
-                            raise ValueError(error_message)
+                        raise ValueError(error_message)

        # Add guardrail to applied guardrails header
        add_guardrail_to_applied_guardrails_header(
@ -550,65 +539,53 @@ class IBMGuardrailDetector(CustomGuardrail):
        if self.should_run_guardrail(data=data, event_type=event_type) is not True:
            return

-        _messages = data.get("messages")
-        if _messages:
-            contents_to_check: List[str] = []
-            for message in _messages:
-                _content = message.get("content")
-                if isinstance(_content, str):
-                    contents_to_check.append(_content)
+        # Covers multimodal list content + Responses-API input.
+        contents_to_check: List[str] = list(iter_message_text(data))
+        if contents_to_check:
+            if self.is_detector_server:
+                # Call detector server with all contents at once
+                result = await self._call_detector_server(
+                    contents=contents_to_check,
+                    request_data=data,
+                    event_type=GuardrailEventHooks.during_call,
+                )

-            if contents_to_check:
-                if self.is_detector_server:
-                    # Call detector server with all contents at once
-                    result = await self._call_detector_server(
-                        contents=contents_to_check,
+                verbose_proxy_logger.debug(
+                    "IBM Detector Server async_moderation_hook result: %s", result
+                )
+
+                # Check if any detections were found
+                has_violations = False
+                for message_detections in result:
+                    filtered = self._filter_detections_by_threshold(message_detections)
+                    if filtered:
+                        has_violations = True
+                        break
+
+                if has_violations and self.block_on_detection:
+                    error_message = self._create_error_message_detector_server(result)
+                    raise ValueError(error_message)
+
+            else:
+                # Call orchestrator for each content separately
+                for content in contents_to_check:
+                    orchestrator_result = await self._call_orchestrator(
+                        content=content,
                        request_data=data,
                        event_type=GuardrailEventHooks.during_call,
                    )

                    verbose_proxy_logger.debug(
-                        "IBM Detector Server async_moderation_hook result: %s", result
+                        "IBM Orchestrator async_moderation_hook result: %s",
+                        orchestrator_result,
                    )

-                    # Check if any detections were found
-                    has_violations = False
-                    for message_detections in result:
-                        filtered = self._filter_detections_by_threshold(
-                            message_detections
-                        )
-                        if filtered:
-                            has_violations = True
-                            break
-
-                    if has_violations and self.block_on_detection:
-                        error_message = self._create_error_message_detector_server(
-                            result
-                        )
-                        raise ValueError(error_message)
-
-                else:
-                    # Call orchestrator for each content separately
-                    for content in contents_to_check:
-                        orchestrator_result = await self._call_orchestrator(
-                            content=content,
-                            request_data=data,
-                            event_type=GuardrailEventHooks.during_call,
-                        )
-
-                        verbose_proxy_logger.debug(
-                            "IBM Orchestrator async_moderation_hook result: %s",
-                            orchestrator_result,
-                        )
-
-                        filtered = self._filter_detections_by_threshold(
+                    filtered = self._filter_detections_by_threshold(orchestrator_result)
+                    if filtered and self.block_on_detection:
+                        error_message = self._create_error_message_orchestrator(
                            orchestrator_result
                        )
-                        if filtered and self.block_on_detection:
-                            error_message = self._create_error_message_orchestrator(
-                                orchestrator_result
-                            )
-                            raise ValueError(error_message)
+                        raise ValueError(error_message)

        # Add guardrail to applied guardrails header
        add_guardrail_to_applied_guardrails_header(
--- a/litellm/proxy/guardrails/guardrail_hooks/lakera_ai_v2.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/lakera_ai_v2.py
@ -13,6 +13,11 @@ from litellm.llms.custom_httpx.http_handler import (
    httpxSpecialProvider,
 )
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.guardrails._content_utils import (
+    apply_redacted_messages_back,
+    build_inspection_messages,
+    has_non_string_content,
+)
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.guardrails import GuardrailEventHooks
 from litellm.types.llms.openai import AllMessageValues
@ -214,18 +219,26 @@ class LakeraAIGuardrail(CustomGuardrail):
            )
            return data

-        new_messages: Optional[List[AllMessageValues]] = data.get("messages")
-        if new_messages is None:
+        # Covers multimodal list content + Responses-API input.
+        new_messages = build_inspection_messages(data)
+        if not new_messages:
            verbose_proxy_logger.warning(
-                "Lakera AI: not running guardrail. No messages in data"
+                "Lakera AI: not running guardrail. No inspectable text in data"
            )
            return data

+        # Mask-in-place uses offsets returned by Lakera and can only
+        # preserve non-text parts (images, audio, …) when the original
+        # content is a plain string. For multimodal/Responses-API input
+        # we degrade to block-on-detect so we never silently strip image
+        # parts while attempting to redact text.
+        is_multimodal_input = has_non_string_content(data)
+
        #########################################################
        ########## 1. Make the Lakera AI v2 guard API request ##########
        #########################################################
        lakera_guardrail_response, masked_entity_count = await self.call_v2_guard(
-            messages=new_messages,
+            messages=new_messages,  # type: ignore[arg-type]
            request_data=data,
            event_type=GuardrailEventHooks.pre_call,
        )
@ -234,13 +247,20 @@ class LakeraAIGuardrail(CustomGuardrail):
        ########## 2. Handle flagged content ##########
        #########################################################
        if lakera_guardrail_response.get("flagged") is True:
-            # If only PII violations exist, mask the PII
-            if self._is_only_pii_violation(lakera_guardrail_response):
-                data["messages"] = self._mask_pii_in_messages(
-                    messages=new_messages,
+            # If only PII violations exist, mask the PII (string input only).
+            if (
+                self._is_only_pii_violation(lakera_guardrail_response)
+                and not is_multimodal_input
+            ):
+                redacted_messages = self._mask_pii_in_messages(
+                    messages=new_messages,  # type: ignore[arg-type]
                    lakera_response=lakera_guardrail_response,
                    masked_entity_count=masked_entity_count,
                )
+                # Write back to ``messages`` AND ``input``. The Responses-API
+                # backend reads ``input``; writing only to ``messages``
+                # would let unredacted PII reach the LLM for /v1/responses.
+                apply_redacted_messages_back(data, list(redacted_messages))  # type: ignore[arg-type]
                verbose_proxy_logger.debug(
                    "Lakera AI: Masked PII in messages instead of blocking request"
                )
@ -252,7 +272,9 @@ class LakeraAIGuardrail(CustomGuardrail):
                    )
                    # Log violation but continue
                elif self.on_flagged == "block":
-                    # If there are other violations or not set to mask PII, raise exception
+                    # Either non-PII violations, or PII on multimodal input
+                    # (which cannot be masked in place without dropping
+                    # image/audio parts) — raise the standard block error.
                    raise self._get_http_exception_for_blocked_guardrail(
                        lakera_guardrail_response
                    )
@ -280,18 +302,22 @@ class LakeraAIGuardrail(CustomGuardrail):
        if self.should_run_guardrail(data=data, event_type=event_type) is not True:
            return

-        new_messages: Optional[List[AllMessageValues]] = data.get("messages")
-        if new_messages is None:
+        new_messages = build_inspection_messages(data)
+        if not new_messages:
            verbose_proxy_logger.warning(
-                "Lakera AI: not running guardrail. No messages in data"
+                "Lakera AI: not running guardrail. No inspectable text in data"
            )
            return

+        # See ``async_pre_call_hook`` — multimodal input degrades to
+        # block-on-detect because mask-in-place would drop image parts.
+        is_multimodal_input = has_non_string_content(data)
+
        #########################################################
        ########## 1. Make the Lakera AI v2 guard API request ##########
        #########################################################
        lakera_guardrail_response, masked_entity_count = await self.call_v2_guard(
-            messages=new_messages,
+            messages=new_messages,  # type: ignore[arg-type]
            request_data=data,
            event_type=GuardrailEventHooks.during_call,
        )
@ -300,25 +326,28 @@ class LakeraAIGuardrail(CustomGuardrail):
        ########## 2. Handle flagged content ##########
        #########################################################
        if lakera_guardrail_response.get("flagged") is True:
-            # If only PII violations exist, mask the PII
-            if self._is_only_pii_violation(lakera_guardrail_response):
-                data["messages"] = self._mask_pii_in_messages(
-                    messages=new_messages,
+            if (
+                self._is_only_pii_violation(lakera_guardrail_response)
+                and not is_multimodal_input
+            ):
+                redacted_messages = self._mask_pii_in_messages(
+                    messages=new_messages,  # type: ignore[arg-type]
                    lakera_response=lakera_guardrail_response,
                    masked_entity_count=masked_entity_count,
                )
+                # Write back to ``messages`` AND ``input``. The Responses-API
+                # backend reads ``input``; writing only to ``messages``
+                # would let unredacted PII reach the LLM for /v1/responses.
+                apply_redacted_messages_back(data, list(redacted_messages))  # type: ignore[arg-type]
                verbose_proxy_logger.debug(
                    "Lakera AI: Masked PII in messages instead of blocking request"
                )
            else:
-                # Check on_flagged setting
                if self.on_flagged == "monitor":
                    verbose_proxy_logger.warning(
                        "Lakera Guardrail: Monitoring mode - violation detected but allowing request"
                    )
-                    # Log violation but continue
                elif self.on_flagged == "block":
-                    # If there are other violations or not set to mask PII, raise exception
                    raise self._get_http_exception_for_blocked_guardrail(
                        lakera_guardrail_response
                    )
--- a/litellm/proxy/guardrails/guardrail_hooks/lasso/lasso.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/lasso/lasso.py
@ -50,6 +50,11 @@ from litellm.llms.custom_httpx.http_handler import (
    httpxSpecialProvider,
 )
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.guardrails._content_utils import (
+    apply_redacted_messages_back,
+    build_inspection_messages,
+    has_non_string_content,
+)
 from litellm.types.guardrails import GuardrailEventHooks
 import litellm

@ -366,16 +371,19 @@ class LassoGuardrail(CustomGuardrail):
            LassoGuardrailAPIError: If the Lasso API call fails
            HTTPException: If blocking violations are detected
        """
-        messages: List[Dict[str, str]] = data.get("messages", [])
+        # Covers multimodal list content + Responses-API input.
+        messages: List[Dict[str, str]] = build_inspection_messages(data)
        if not messages:
            return data

-        if self.mask:
+        # Lasso's classifix endpoint returns masked text that we copy back
+        # into ``data["messages"]``. For multimodal/Responses-API input we
+        # would silently strip image/audio parts, so fall back to the
+        # classify endpoint (which still raises on BLOCK actions) and
+        # leave the original payload intact.
+        if self.mask and not has_non_string_content(data):
            return await self._handle_masking(data, cache, message_type, messages)
-        else:
-            return await self._handle_classification(
-                data, cache, message_type, messages
-            )
+        return await self._handle_classification(data, cache, message_type, messages)

    async def _handle_classification(
        self,
@ -413,8 +421,9 @@ class LassoGuardrail(CustomGuardrail):
            self._process_lasso_response(response)

            # Apply masking to messages if violations detected and masked messages are available
-            if response.get("violations_detected") and response.get("messages"):
-                data["messages"] = response["messages"]
+            redacted_messages = response.get("messages")
+            if response.get("violations_detected") and redacted_messages:
+                apply_redacted_messages_back(data, list(redacted_messages))
                self._log_masking_applied(message_type, dict(response))

            return data
--- a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/content_filter.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/content_filter.py
@ -1873,8 +1873,9 @@ class ContentFilterGuardrail(CustomGuardrail):
        and the UI Request Lifecycle panel. Mirrors apply_guardrail's finally-block
        contract.
        """
-        accumulated_full_text = ""
-        yielded_masked_text_len = 0
+        accumulated_text_by_choice: Dict[int, str] = {}
+        yielded_masked_text_len_by_choice: Dict[int, int] = {}
+        latest_detections_by_choice: Dict[int, List[ContentFilterDetection]] = {}
        buffer_size = 50  # Increased buffer to catch patterns split across many chunks

        start_time = datetime.now()
@ -1890,79 +1891,90 @@ class ContentFilterGuardrail(CustomGuardrail):
        try:
            async for item in response:
                if isinstance(item, ModelResponseStream) and item.choices:
-                    delta_content = ""
-                    is_final = False
                    for choice in item.choices:
-                        if hasattr(choice, "delta") and choice.delta:
-                            content = getattr(choice.delta, "content", None)
-                            if content and isinstance(content, str):
-                                delta_content += content
-                        if getattr(choice, "finish_reason", None):
-                            is_final = True
+                        if not (hasattr(choice, "delta") and choice.delta):
+                            continue

-                    accumulated_full_text += delta_content
+                        choice_index = getattr(choice, "index", 0)
+                        if not isinstance(choice_index, int):
+                            choice_index = 0

-                    # Check for blocking or apply masking
-                    # Add a space at the end if it's the final chunk to trigger word boundaries (\b)
-                    text_to_check = accumulated_full_text
-                    if is_final:
-                        text_to_check += " "
+                        content = getattr(choice.delta, "content", None)
+                        is_final = bool(getattr(choice, "finish_reason", None))
+                        if isinstance(content, str) and content:
+                            accumulated_text_by_choice[choice_index] = (
+                                accumulated_text_by_choice.get(choice_index, "")
+                                + content
+                            )
+                        elif not is_final:
+                            continue

-                    try:
-                        # Reset before each scan: _filter_single_text scans the
-                        # whole accumulated buffer every chunk, so previous-chunk
-                        # matches are guaranteed to be re-found. Keeping only the
-                        # latest scan's detections avoids N× duplication in the
-                        # final log row. BLOCK still records correctly because
-                        # handlers append to detections before raising.
-                        detections.clear()
-                        masked_text = self._filter_single_text(
-                            text_to_check, detections=detections
+                        text_to_check = accumulated_text_by_choice.get(choice_index, "")
+                        if not text_to_check:
+                            continue
+
+                        # Add a space at the end if it's the final chunk to trigger word boundaries (\b)
+                        text_to_scan = text_to_check + (" " if is_final else "")
+                        choice_detections: List[ContentFilterDetection] = []
+
+                        try:
+                            # _filter_single_text scans the whole accumulated
+                            # choice buffer every chunk, so previous-chunk
+                            # matches are guaranteed to be re-found. Keeping
+                            # only each choice's latest scan avoids duplicate
+                            # detections in the final log row.
+                            masked_text = self._filter_single_text(
+                                text_to_scan, detections=choice_detections
+                            )
+                            if is_final and masked_text.endswith(" "):
+                                masked_text = masked_text[:-1]
+                            latest_detections_by_choice[choice_index] = (
+                                choice_detections
+                            )
+                        except HTTPException:
+                            latest_detections_by_choice[choice_index] = (
+                                choice_detections
+                            )
+                            raise
+                        except Exception as e:
+                            verbose_proxy_logger.error(
+                                f"ContentFilterGuardrail: Error in masking: {e}"
+                            )
+                            masked_text = text_to_scan  # Fallback to current text
+
+                        # Determine how much can be safely yielded
+                        if is_final:
+                            safe_to_yield_len = len(masked_text)
+                        else:
+                            safe_to_yield_len = max(0, len(masked_text) - buffer_size)
+
+                        yielded_masked_text_len = yielded_masked_text_len_by_choice.get(
+                            choice_index, 0
                        )
-                        if is_final and masked_text.endswith(" "):
-                            masked_text = masked_text[:-1]
-                    except HTTPException:
-                        raise
-                    except Exception as e:
-                        verbose_proxy_logger.error(
-                            f"ContentFilterGuardrail: Error in masking: {e}"
-                        )
-                        masked_text = text_to_check  # Fallback to current text
+                        if safe_to_yield_len > yielded_masked_text_len:
+                            new_masked_content = masked_text[
+                                yielded_masked_text_len:safe_to_yield_len
+                            ]
+                            choice.delta.content = new_masked_content
+                            yielded_masked_text_len_by_choice[choice_index] = (
+                                safe_to_yield_len
+                            )
+                        else:
+                            # Hold content by yielding empty content on this choice
+                            # while preserving chunk metadata and other choices.
+                            choice.delta.content = ""

-                    # Determine how much can be safely yielded
-                    if is_final:
-                        safe_to_yield_len = len(masked_text)
-                    else:
-                        safe_to_yield_len = max(0, len(masked_text) - buffer_size)
-
-                    if safe_to_yield_len > yielded_masked_text_len:
-                        new_masked_content = masked_text[
-                            yielded_masked_text_len:safe_to_yield_len
-                        ]
-                        # Modify the chunk to contain only the new masked content
-                        if (
-                            item.choices
-                            and hasattr(item.choices[0], "delta")
-                            and item.choices[0].delta
-                        ):
-                            item.choices[0].delta.content = new_masked_content
-                            yielded_masked_text_len = safe_to_yield_len
-                            yield item
-                    else:
-                        # Hold content by yielding empty content chunk (keeps metadata/structure)
-                        if (
-                            item.choices
-                            and hasattr(item.choices[0], "delta")
-                            and item.choices[0].delta
-                        ):
-                            item.choices[0].delta.content = ""
-                        yield item
+                    yield item
                else:
                    # Not a ModelResponseStream or no choices - yield as is
                    yield item

            # Any remaining content (should have been handled by is_final, but just in case)
-            if yielded_masked_text_len < len(accumulated_full_text):
+            if any(
+                yielded_masked_text_len_by_choice.get(choice_index, 0)
+                < len(accumulated_text)
+                for choice_index, accumulated_text in accumulated_text_by_choice.items()
+            ):
                # We already reached the end of the generator
                pass
        except HTTPException:
@ -1973,6 +1985,11 @@ class ContentFilterGuardrail(CustomGuardrail):
            exception_str = str(e)
            raise e
        finally:
+            detections = [
+                detection
+                for choice_detections in latest_detections_by_choice.values()
+                for detection in choice_detections
+            ]
            self._count_masked_entities(detections, masked_entity_count)
            self._log_guardrail_information(
                request_data=request_data,
--- a/litellm/proxy/guardrails/guardrail_hooks/semantic_guard/semantic_guard.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/semantic_guard/semantic_guard.py
@ -187,11 +187,28 @@ def _extract_user_text(messages: List) -> str:


 def _extract_response_text(response: Any) -> str:
-    """Extract text from LLM response object."""
+    """Extract text from every LLM response choice."""
    if hasattr(response, "choices") and response.choices:
-        choice = response.choices[0]
-        if hasattr(choice, "message") and choice.message:
-            return choice.message.content or ""
+        text_parts: List[str] = []
+        for choice in response.choices:
+            if hasattr(choice, "message") and choice.message:
+                text = _content_to_text(choice.message.content)
+                if text:
+                    text_parts.append(text)
+        return "\n".join(text_parts)
+    return ""
+
+
+def _content_to_text(content: Any) -> str:
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        text_parts = [
+            block.get("text")
+            for block in content
+            if isinstance(block, dict) and isinstance(block.get("text"), str)
+        ]
+        return " ".join(part for part in text_parts if part)
    return ""


--- a/litellm/proxy/guardrails/guardrail_hooks/xecguard/xecguard.py
+++ b/litellm/proxy/guardrails/guardrail_hooks/xecguard/xecguard.py
@ -480,21 +480,32 @@ class XecGuardGuardrail(CustomGuardrail):
            choices = response.get("choices")
        if not choices:
            return None
-        first = choices[0]
-        if hasattr(first, "message"):
-            message = first.message
-        elif isinstance(first, dict):
-            message = first.get("message")
+        text_parts: List[str] = []
+        for choice in choices:
+            content = XecGuardGuardrail._extract_choice_content(choice)
+            text = XecGuardGuardrail._content_to_text(content)
+            if text:
+                text_parts.append(text)
+        return "\n".join(text_parts) or None
+
+    @staticmethod
+    def _extract_choice_content(choice: Any) -> Any:
+        if hasattr(choice, "message"):
+            message = choice.message
+        elif isinstance(choice, dict):
+            message = choice.get("message")
        else:
            return None
        if message is None:
            return None
        if hasattr(message, "content"):
-            content = message.content
-        elif isinstance(message, dict):
-            content = message.get("content")
-        else:
-            return None
+            return message.content
+        if isinstance(message, dict):
+            return message.get("content")
+        return None
+
+    @staticmethod
+    def _content_to_text(content: Any) -> Optional[str]:
        if isinstance(content, str) and content:
            return content
        if isinstance(content, list):
--- a/litellm/proxy/health_endpoints/_health_endpoints.py
+++ b/litellm/proxy/health_endpoints/_health_endpoints.py
@ -1447,14 +1447,11 @@ def callback_name(callback):
            return str(callback)


-@router.get(
-    "/health/readiness",
-    tags=["health"],
-    dependencies=[Depends(user_api_key_auth)],
-)
-async def health_readiness(response: Response):
+async def _get_health_readiness_details(
+    response: Optional[Response] = None,
+) -> Dict[str, Any]:
    """
-    Unprotected endpoint for checking if worker can receive requests
+    Detailed health payload for authenticated diagnostics.
    """
    from litellm.proxy.proxy_server import prisma_client, version

@ -1473,7 +1470,7 @@ async def health_readiness(response: Response):
            success_callback_names = litellm.success_callback

        # check Cache
-        cache_type = None
+        cache_type: Any = None
        if litellm.cache is not None:
            from litellm.caching.caching import RedisSemanticCache

@ -1482,6 +1479,7 @@ async def health_readiness(response: Response):
            if isinstance(litellm.cache.cache, RedisSemanticCache):
                # ping the cache
                # TODO: @ishaan-jaff - we should probably not ping the cache on every /health/readiness check
+                index_info: Any
                try:
                    index_info = await litellm.cache.cache._index_info()
                except Exception as e:
@ -1499,7 +1497,7 @@ async def health_readiness(response: Response):
            # serve requests that depend on persisted state (keys, budgets,
            # spend logs). Return 503 so orchestrators take this pod out of
            # rotation; "Not connected" (no DB configured at all) stays 200.
-            if db_health_status["status"] != "connected":
+            if response is not None and db_health_status["status"] != "connected":
                response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
            return {
                "status": "healthy",
@ -1526,6 +1524,52 @@ async def health_readiness(response: Response):
        raise HTTPException(status_code=503, detail=f"Service Unhealthy ({str(e)})")


+def _allow_public_health_readiness_details() -> bool:
+    from litellm.proxy.proxy_server import general_settings
+
+    return general_settings.get("allow_public_health_readiness_details") is True
+
+
+async def _set_public_readiness_status(response: Response) -> None:
+    from litellm.proxy.proxy_server import prisma_client
+
+    if prisma_client is None:
+        return
+
+    db_health_status = await _db_health_readiness_check()
+    if db_health_status["status"] != "connected":
+        response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
+
+
+@router.get(
+    "/health/readiness",
+    tags=["health"],
+)
+async def health_readiness(response: Response):
+    """
+    Public readiness probe. Keep this low-detail for unauthenticated load
+    balancers by default. Admins can opt into the legacy detailed public
+    payload with general_settings.allow_public_health_readiness_details.
+    """
+    if _allow_public_health_readiness_details():
+        return await _get_health_readiness_details(response=response)
+
+    await _set_public_readiness_status(response=response)
+    return {"status": "healthy"}
+
+
+@router.get(
+    "/health/readiness/details",
+    tags=["health"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def health_readiness_details(response: Response):
+    """
+    Authenticated readiness diagnostics with DB/cache/callback metadata.
+    """
+    return await _get_health_readiness_details(response=response)
+
+
@router.get(
    "/health/backlog",
    tags=["health"],
@ -1561,7 +1605,6 @@ async def health_liveliness():
@router.options(
    "/health/readiness",
    tags=["health"],
-    dependencies=[Depends(user_api_key_auth)],
 )
 async def health_readiness_options():
    """
--- a/litellm/proxy/hooks/azure_content_safety.py
+++ b/litellm/proxy/hooks/azure_content_safety.py
@ -8,6 +8,10 @@ from litellm._logging import verbose_proxy_logger
 from litellm.caching.caching import DualCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.guardrails._content_utils import (
+    is_text_content_call_type,
+    iter_message_text,
+)


 class _PROXY_AzureContentSafety(
@ -118,10 +122,9 @@ class _PROXY_AzureContentSafety(
    ):
        verbose_proxy_logger.debug("Inside Azure Content-Safety Pre-Call Hook")
        try:
-            if call_type == "completion" and "messages" in data:
-                for m in data["messages"]:
-                    if "content" in m and isinstance(m["content"], str):
-                        await self.test_violation(content=m["content"], source="input")
+            if is_text_content_call_type(call_type):
+                for text in iter_message_text(data):
+                    await self.test_violation(content=text, source="input")

        except HTTPException as e:
            raise e
@ -140,12 +143,16 @@ class _PROXY_AzureContentSafety(
        response,
    ):
        verbose_proxy_logger.debug("Inside Azure Content-Safety Post-Call Hook")
-        if isinstance(response, litellm.ModelResponse) and isinstance(
-            response.choices[0], litellm.utils.Choices
-        ):
-            await self.test_violation(
-                content=response.choices[0].message.content or "", source="output"
-            )
+        if not isinstance(response, litellm.ModelResponse):
+            return
+
+        for choice in response.choices:
+            if not isinstance(choice, litellm.utils.Choices):
+                continue
+            message = getattr(choice, "message", None)
+            content = getattr(message, "content", None)
+            if isinstance(content, str):
+                await self.test_violation(content=content, source="output")

    # async def async_post_call_streaming_hook(
    #    self,
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@ -61,6 +61,7 @@ from litellm.secret_managers.main import get_secret_bool
 from litellm.types.llms.anthropic import ANTHROPIC_API_HEADERS
 from litellm.types.services import ServiceTypes
 from litellm.types.utils import (
+    CustomPricingLiteLLMParams,
    LlmProviders,
    ProviderSpecificHeader,
    StandardLoggingUserAPIKeyMetadata,
@ -168,6 +169,20 @@ _ALLOW_CLIENT_MESSAGE_REDACTION_OPT_OUT_METADATA_KEY = (
    "allow_client_message_redaction_opt_out"
 )

+# Per-request pricing parameters mutate cost-tracking output and (via
+# ``litellm.completion`` → ``register_model``) the process-wide
+# ``litellm.model_cost`` map. Both effects belong to deployment configuration,
+# not to user-supplied request bodies, so the proxy strips them before they
+# reach the call path. Built from the Pydantic model so newly-added pricing
+# fields are covered automatically.
+_CLIENT_PRICING_CONTROL_FIELDS = frozenset(
+    CustomPricingLiteLLMParams.model_fields.keys()
+)
+# ``model_info`` carries the same pricing fields when read by
+# ``use_custom_pricing_for_model``; strip from metadata for the same reason.
+_CLIENT_PRICING_METADATA_FIELDS = frozenset({"model_info"})
+_ALLOW_CLIENT_PRICING_OVERRIDE_METADATA_KEY = "allow_client_pricing_override"
+
 # Request fields whose value, when URL-valued, becomes the outbound destination
 # for a provider call. Letting a proxy caller pin the destination is an SSRF
 # primitive (HuggingFace/Oobabooga `model`, Gemini files `file_id`); guard
@ -265,6 +280,46 @@ def _key_or_team_allows_client_message_redaction_opt_out(
    )


+def _key_or_team_allows_client_pricing_override(
+    user_api_key_dict: UserAPIKeyAuth,
+) -> bool:
+    return _key_or_team_metadata_flag_is_true(
+        user_api_key_dict=user_api_key_dict,
+        metadata_key=_ALLOW_CLIENT_PRICING_OVERRIDE_METADATA_KEY,
+    )
+
+
+def _strip_client_pricing_overrides(data: Dict[str, Any]) -> None:
+    """Drop pricing overrides from the request body and any metadata variant.
+
+    Skipped only when the calling key/team carries
+    ``allow_client_pricing_override: True`` in its metadata. Emits a
+    ``debug``-level log line naming the dropped fields so operators can
+    trace why a client-supplied pricing override stopped being applied
+    (otherwise the strip is invisible from the caller's perspective).
+    """
+    stripped: List[str] = []
+    for field in _CLIENT_PRICING_CONTROL_FIELDS:
+        if field in data:
+            stripped.append(field)
+            data.pop(field, None)
+    for metadata_key in ("metadata", "litellm_metadata"):
+        metadata = data.get(metadata_key)
+        if not isinstance(metadata, dict):
+            continue
+        for field in _CLIENT_PRICING_METADATA_FIELDS:
+            if field in metadata:
+                stripped.append(f"{metadata_key}.{field}")
+                metadata.pop(field, None)
+    if stripped:
+        verbose_proxy_logger.debug(
+            "Stripped client-supplied pricing fields from request body: %s. "
+            "Set `allow_client_pricing_override: true` on the key or team "
+            "metadata to keep these values.",
+            ", ".join(stripped),
+        )
+
+
 def _get_metadata_variable_name(request: Request) -> str:
    """
    Helper to return what the "metadata" field should be called in the request data
@ -1364,6 +1419,14 @@ async def add_litellm_data_to_request(  # noqa: PLR0915
            ]:
                _user_meta.pop(_k, None)

+    # Strip pricing overrides AFTER the litellm_metadata string-to-dict parse
+    # above, for the same reason as the user_api_key_* strip — JSON-string
+    # metadata (sent via multipart/form-data or extra_body) wouldn't be a
+    # dict yet at the earlier strip point and the isinstance(dict) guard
+    # would silently skip the field.
+    if not _key_or_team_allows_client_pricing_override(user_api_key_dict):
+        _strip_client_pricing_overrides(data)
+
    # Strip caller-supplied routing/budget tags unless the admin has opted
    # this key or team in via metadata.allow_client_tags=True. Tags drive
    # tag-based routing and tag budget attribution — accepting them from
--- a/litellm/proxy/management_endpoints/router_settings_endpoints.py
+++ b/litellm/proxy/management_endpoints/router_settings_endpoints.py
@ -104,11 +104,16 @@ async def get_router_settings(
        config = await proxy_config.get_config()
        router_settings_from_config = config.get("router_settings", {})

-        # Get current values from llm_router if initialized
-        current_values = {}
+        current_values: Dict[str, Any] = {}
        if llm_router is not None:
-            # Check all field names from the fields list
+            # Router exposes routing groups as private `_routing_groups`; the
+            # generic `hasattr` loop below would miss them.
+            current_values["routing_groups"] = [
+                group.model_dump() for group in llm_router._routing_groups.values()
+            ]
            for field in router_fields:
+                if field.field_name == "routing_groups":
+                    continue
                if hasattr(llm_router, field.field_name):
                    value = getattr(llm_router, field.field_name)
                    current_values[field.field_name] = value
--- a/litellm/proxy/middleware/prometheus_auth_middleware.py
+++ b/litellm/proxy/middleware/prometheus_auth_middleware.py
@ -20,13 +20,13 @@ class PrometheusAuthMiddleware:
    """
    Middleware to authenticate requests to the metrics endpoint.

-    By default, auth is not run on the metrics endpoint.
+    By default, auth is run on the metrics endpoint.

-    Enabled by setting the following in proxy_config.yaml:
+    To allow unauthenticated metrics in proxy_config.yaml:

    ```yaml
    litellm_settings:
-        require_auth_for_metrics_endpoint: true
+        require_auth_for_metrics_endpoint: false
    ```
    """

@ -39,8 +39,8 @@ class PrometheusAuthMiddleware:
            await self.app(scope, receive, send)
            return

-        # Only run auth if configured to do so
-        if litellm.require_auth_for_metrics_endpoint is True:
+        # Run auth by default; allow legacy public metrics only when explicitly disabled.
+        if litellm.require_auth_for_metrics_endpoint is not False:
            # user_api_key_auth reads the request body, which consumes ASGI `receive`.
            # Buffer those messages and replay them for the inner app; otherwise a
            # successful auth would forward an exhausted receive and /metrics hangs.
@ -52,10 +52,29 @@ class PrometheusAuthMiddleware:
                return message

            request = Request(scope, receive_for_auth)
-            api_key = request.headers.get(_AUTHORIZATION_HEADER) or ""

            try:
-                await user_api_key_auth(request=request, api_key=api_key)
+                await user_api_key_auth(
+                    request=request,
+                    api_key=request.headers.get(_AUTHORIZATION_HEADER) or "",
+                    azure_api_key_header=request.headers.get(
+                        SpecialHeaders.azure_authorization.value
+                    )
+                    or "",
+                    anthropic_api_key_header=request.headers.get(
+                        SpecialHeaders.anthropic_authorization.value
+                    ),
+                    google_ai_studio_api_key_header=request.headers.get(
+                        SpecialHeaders.google_ai_studio_authorization.value
+                    ),
+                    azure_apim_header=request.headers.get(
+                        SpecialHeaders.azure_apim_authorization.value
+                    )
+                    or "",
+                    custom_litellm_key_header=request.headers.get(
+                        SpecialHeaders.custom_litellm_api_key.value
+                    ),
+                )
            except Exception as e:
                # Send 401 response directly via ASGI protocol
                error_message = getattr(e, "message", str(e))
--- a/litellm/proxy/public_endpoints/public_endpoints.py
+++ b/litellm/proxy/public_endpoints/public_endpoints.py
@ -5,7 +5,7 @@ from importlib.resources import files
 from typing import Any, Dict, List, Optional

 import litellm
-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, HTTPException

 from litellm._logging import verbose_logger
 from litellm.litellm_core_utils.get_blog_posts import (
@ -14,8 +14,9 @@ from litellm.litellm_core_utils.get_blog_posts import (
    GetBlogPosts,
    get_blog_posts,
 )
-from litellm.proxy._types import CommonProxyErrors
-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy._types import (
+    CommonProxyErrors,
+)
 from litellm.types.agents import AgentCard
 from litellm.types.mcp import MCPPublicServer
 from litellm.types.proxy.management_endpoints.model_management_endpoints import (
@ -31,6 +32,7 @@ from litellm.types.utils import LlmProviders

 router = APIRouter()

+
 # ---------------------------------------------------------------------------
 # /public/endpoints — helpers
 # ---------------------------------------------------------------------------
@ -153,7 +155,6 @@ def _load_endpoints() -> List[Dict[str, Any]]:
@router.get(
    "/public/model_hub",
    tags=["public", "model management"],
-    dependencies=[Depends(user_api_key_auth)],
    response_model=List[ModelGroupInfoProxy],
 )
 async def public_model_hub():
@ -208,7 +209,6 @@ async def public_model_hub():
@router.get(
    "/public/agent_hub",
    tags=["[beta] Agents", "public"],
-    dependencies=[Depends(user_api_key_auth)],
    response_model=List[AgentCard],
 )
 async def get_agents():
@ -230,7 +230,6 @@ async def get_agents():
@router.get(
    "/public/mcp_hub",
    tags=["[beta] MCP", "public"],
-    dependencies=[Depends(user_api_key_auth)],
    response_model=List[MCPPublicServer],
 )
 async def get_mcp_servers():
--- a/litellm/proxy/spend_tracking/spend_management_endpoints.py
+++ b/litellm/proxy/spend_tracking/spend_management_endpoints.py
@ -3079,7 +3079,11 @@ async def global_spend_models(
    return response


-@router.get("/provider/budgets", response_model=ProviderBudgetResponse)
+@router.get(
+    "/provider/budgets",
+    dependencies=[Depends(user_api_key_auth)],
+    response_model=ProviderBudgetResponse,
+)
 async def provider_budgets() -> ProviderBudgetResponse:
    """
    Provider Budget Routing - Get Budget, Spend Details https://docs.litellm.ai/docs/proxy/provider_budget_routing
--- a/litellm/proxy/ui_crud_endpoints/proxy_setting_endpoints.py
+++ b/litellm/proxy/ui_crud_endpoints/proxy_setting_endpoints.py
@ -99,6 +99,11 @@ class UISettings(BaseModel):
        description="If true, requires authentication for accessing the public AI Hub.",
    )

+    allow_public_health_readiness_details: bool = Field(
+        default=False,
+        description="If true, returns the legacy detailed payload from the unauthenticated /health/readiness endpoint.",
+    )
+
    forward_client_headers_to_llm_api: bool = Field(
        default=False,
        description=(
@ -169,6 +174,7 @@ ALLOWED_UI_SETTINGS_FIELDS = {
    "disable_team_admin_delete_team_user",
    "enabled_ui_pages_internal_users",
    "require_auth_for_public_ai_hub",
+    "allow_public_health_readiness_details",
    "forward_client_headers_to_llm_api",
    "forward_llm_provider_auth_headers",
    "disable_agents_for_internal_users",
@ -183,6 +189,7 @@ ALLOWED_UI_SETTINGS_FIELDS = {
 # Flags that must be synced from the persisted UISettings into
 # general_settings at runtime (on both read and write).
 _RUNTIME_GENERAL_SETTINGS_FLAGS = [
+    "allow_public_health_readiness_details",
    "forward_client_headers_to_llm_api",
    "forward_llm_provider_auth_headers",
    "disable_agents_for_internal_users",
--- a/litellm/router.py
+++ b/litellm/router.py
@ -1052,11 +1052,17 @@ class Router:
            strategy = self._normalize_strategy(self.routing_strategy)
            attr = self._DEFAULT_SELECTOR_ATTR_BY_STRATEGY.get(strategy or "")
            selector = getattr(self, attr, None) if attr is not None else None
+            verbose_router_logger.debug(
+                "routing_group=default model=%s strategy=%s", model, strategy
+            )
            return strategy, selector

        group = self._routing_groups[group_name]
        strategy = self._normalize_strategy(group.routing_strategy)
        selector = self._group_selectors.get(group_name, {}).get(strategy or "")
+        verbose_router_logger.debug(
+            "routing_group=%s model=%s strategy=%s", group_name, model, strategy
+        )
        return strategy, selector

    async def _select_deployment_async(
--- a/litellm/types/management_endpoints/router_settings_endpoints.py
+++ b/litellm/types/management_endpoints/router_settings_endpoints.py
@ -112,6 +112,14 @@ ROUTER_SETTINGS_FIELDS: List[RouterSettingsField] = [
        field_default={},
        ui_field_name="Routing Strategy Args",
    ),
+    RouterSettingsField(
+        field_name="routing_groups",
+        field_type="List",
+        field_value=None,
+        field_description="Named subsets of model_names that share a routing strategy. Models not claimed by an explicit group fall through to the top-level routing_strategy.",
+        field_default=[],
+        ui_field_name="Routing Groups",
+    ),
    RouterSettingsField(
        field_name="num_retries",
        field_type="Integer",
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2243,12 +2243,52 @@ def encode(model="", text="", custom_tokenizer: Optional[dict] = None):
    return enc


-def decode(model="", tokens: List[int] = [], custom_tokenizer: Optional[dict] = None):
+def decode(
+    model="",
+    tokens: List[int] = [],
+    custom_tokenizer: Optional[dict] = None,
+    skip_special_tokens: bool = True,
+):
+    """
+    Decodes token ids using the selected tokenizer.
+
+    Args:
+        skip_special_tokens: For HuggingFace tokenizers, keep the historical
+            LiteLLM round-trip behavior by omitting special tokens by default.
+            Set to False to inspect decoded BOS/EOS tokens.
+    """
    tokenizer_json = custom_tokenizer or _select_tokenizer(model=model)
+    if tokenizer_json["type"] == "huggingface_tokenizer":
+        if skip_special_tokens:
+            tokens = _strip_huggingface_special_token_ids(
+                tokenizer_json["tokenizer"], tokens
+            )
+        dec = tokenizer_json["tokenizer"].decode(
+            tokens, skip_special_tokens=skip_special_tokens
+        )
+        return dec
    dec = tokenizer_json["tokenizer"].decode(tokens)
    return dec


+def _strip_huggingface_special_token_ids(
+    tokenizer: Tokenizer, tokens: List[int]
+) -> List[int]:
+    try:
+        added_tokens_decoder = tokenizer.get_added_tokens_decoder()
+    except Exception:
+        return tokens
+
+    special_token_ids = {
+        token_id
+        for token_id, added_token in added_tokens_decoder.items()
+        if getattr(added_token, "special", False)
+    }
+    if not special_token_ids:
+        return tokens
+    return [token for token in tokens if token not in special_token_ids]
+
+
 def create_pretrained_tokenizer(
    identifier: str, revision="main", auth_token: Optional[str] = None
 ):
--- a/package-lock.json
+++ b/package-lock.json
@ -1,19 +1,19 @@
 {
-  "name": "litellm",
+  "name": "litellm-dependency-refresh",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "dependencies": {
-        "prism-react-renderer": "^2.4.1",
-        "prisma": "^5.17.0",
-        "react-copy-to-clipboard": "^5.1.0"
+        "prism-react-renderer": "2.4.1",
+        "prisma": "5.17.0",
+        "react-copy-to-clipboard": "5.1.1"
      },
      "devDependencies": {
-        "@testing-library/jest-dom": "^6.8.0",
-        "@testing-library/react": "^14.3.1",
-        "@types/react-copy-to-clipboard": "^5.0.7",
-        "jest": "^29.7.0"
+        "@testing-library/jest-dom": "6.8.0",
+        "@testing-library/react": "14.3.1",
+        "@types/react-copy-to-clipboard": "5.0.7",
+        "jest": "29.7.0"
      }
    },
    "node_modules/@adobe/css-tools": {
@ -529,29 +529,6 @@
      "dev": true,
      "license": "MIT"
    },
-    "node_modules/@isaacs/balanced-match": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz",
-      "integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==",
-      "dev": true,
-      "license": "MIT",
-      "engines": {
-        "node": "20 || >=22"
-      }
-    },
-    "node_modules/@isaacs/brace-expansion": {
-      "version": "5.0.0",
-      "resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.0.tgz",
-      "integrity": "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "@isaacs/balanced-match": "^4.0.1"
-      },
-      "engines": {
-        "node": "20 || >=22"
-      }
-    },
    "node_modules/@istanbuljs/load-nyc-config": {
      "version": "1.1.0",
      "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz",
@ -957,48 +934,48 @@
      }
    },
    "node_modules/@prisma/debug": {
-      "version": "5.22.0",
-      "resolved": "https://registry.npmjs.org/@prisma/debug/-/debug-5.22.0.tgz",
-      "integrity": "sha512-AUt44v3YJeggO2ZU5BkXI7M4hu9BF2zzH2iF2V5pyXT/lRTyWiElZ7It+bRH1EshoMRxHgpYg4VB6rCM+mG5jQ==",
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/@prisma/debug/-/debug-5.17.0.tgz",
+      "integrity": "sha512-l7+AteR3P8FXiYyo496zkuoiJ5r9jLQEdUuxIxNCN1ud8rdbH3GTxm+f+dCyaSv9l9WY+29L9czaVRXz9mULfg==",
      "license": "Apache-2.0"
    },
    "node_modules/@prisma/engines": {
-      "version": "5.22.0",
-      "resolved": "https://registry.npmjs.org/@prisma/engines/-/engines-5.22.0.tgz",
-      "integrity": "sha512-UNjfslWhAt06kVL3CjkuYpHAWSO6L4kDCVPegV6itt7nD1kSJavd3vhgAEhjglLJJKEdJ7oIqDJ+yHk6qO8gPA==",
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/@prisma/engines/-/engines-5.17.0.tgz",
+      "integrity": "sha512-+r+Nf+JP210Jur+/X8SIPLtz+uW9YA4QO5IXA+KcSOBe/shT47bCcRMTYCbOESw3FFYFTwe7vU6KTWHKPiwvtg==",
      "hasInstallScript": true,
      "license": "Apache-2.0",
      "dependencies": {
-        "@prisma/debug": "5.22.0",
-        "@prisma/engines-version": "5.22.0-44.605197351a3c8bdd595af2d2a9bc3025bca48ea2",
-        "@prisma/fetch-engine": "5.22.0",
-        "@prisma/get-platform": "5.22.0"
+        "@prisma/debug": "5.17.0",
+        "@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
+        "@prisma/fetch-engine": "5.17.0",
+        "@prisma/get-platform": "5.17.0"
      }
    },
    "node_modules/@prisma/engines-version": {
-      "version": "5.22.0-44.605197351a3c8bdd595af2d2a9bc3025bca48ea2",
-      "resolved": "https://registry.npmjs.org/@prisma/engines-version/-/engines-version-5.22.0-44.605197351a3c8bdd595af2d2a9bc3025bca48ea2.tgz",
-      "integrity": "sha512-2PTmxFR2yHW/eB3uqWtcgRcgAbG1rwG9ZriSvQw+nnb7c4uCr3RAcGMb6/zfE88SKlC1Nj2ziUvc96Z379mHgQ==",
+      "version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
+      "resolved": "https://registry.npmjs.org/@prisma/engines-version/-/engines-version-5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053.tgz",
+      "integrity": "sha512-tUuxZZysZDcrk5oaNOdrBnnkoTtmNQPkzINFDjz7eG6vcs9AVDmA/F6K5Plsb2aQc/l5M2EnFqn3htng9FA4hg==",
      "license": "Apache-2.0"
    },
    "node_modules/@prisma/fetch-engine": {
-      "version": "5.22.0",
-      "resolved": "https://registry.npmjs.org/@prisma/fetch-engine/-/fetch-engine-5.22.0.tgz",
-      "integrity": "sha512-bkrD/Mc2fSvkQBV5EpoFcZ87AvOgDxbG99488a5cexp5Ccny+UM6MAe/UFkUC0wLYD9+9befNOqGiIJhhq+HbA==",
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/@prisma/fetch-engine/-/fetch-engine-5.17.0.tgz",
+      "integrity": "sha512-ESxiOaHuC488ilLPnrv/tM2KrPhQB5TRris/IeIV4ZvUuKeaicCl4Xj/JCQeG9IlxqOgf1cCg5h5vAzlewN91Q==",
      "license": "Apache-2.0",
      "dependencies": {
-        "@prisma/debug": "5.22.0",
-        "@prisma/engines-version": "5.22.0-44.605197351a3c8bdd595af2d2a9bc3025bca48ea2",
-        "@prisma/get-platform": "5.22.0"
+        "@prisma/debug": "5.17.0",
+        "@prisma/engines-version": "5.17.0-31.393aa359c9ad4a4bb28630fb5613f9c281cde053",
+        "@prisma/get-platform": "5.17.0"
      }
    },
    "node_modules/@prisma/get-platform": {
-      "version": "5.22.0",
-      "resolved": "https://registry.npmjs.org/@prisma/get-platform/-/get-platform-5.22.0.tgz",
-      "integrity": "sha512-pHhpQdr1UPFpt+zFfnPazhulaZYCUqeIcPpJViYoq9R+D/yw4fjE+CtnsnKzPYm0ddUbeXUzjGVGIRVgPDCk4Q==",
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/@prisma/get-platform/-/get-platform-5.17.0.tgz",
+      "integrity": "sha512-UlDgbRozCP1rfJ5Tlkf3Cnftb6srGrEQ4Nm3og+1Se2gWmCZ0hmPIi+tQikGDUVLlvOWx3Gyi9LzgRP+HTXV9w==",
      "license": "Apache-2.0",
      "dependencies": {
-        "@prisma/debug": "5.22.0"
+        "@prisma/debug": "5.17.0"
      }
    },
    "node_modules/@sinclair/typebox": {
@ -1066,9 +1043,9 @@
      "license": "MIT"
    },
    "node_modules/@testing-library/jest-dom": {
-      "version": "6.9.1",
-      "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.9.1.tgz",
-      "integrity": "sha512-zIcONa+hVtVSSep9UT3jZ5rizo2BsxgyDYU7WFD5eICBE7no3881HGeb/QkGfsJs6JTkY1aQhT7rIPC7e+0nnA==",
+      "version": "6.8.0",
+      "resolved": "https://registry.npmjs.org/@testing-library/jest-dom/-/jest-dom-6.8.0.tgz",
+      "integrity": "sha512-WgXcWzVM6idy5JaftTVC8Vs83NKRmGJz4Hqs4oyOuO2J4r/y79vvKZsb+CaGyCSEbUPI6OsewfPd0G1A0/TUZQ==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
@ -1497,11 +1474,14 @@
      }
    },
    "node_modules/balanced-match": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
+      "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==",
      "dev": true,
-      "license": "MIT"
+      "license": "MIT",
+      "engines": {
+        "node": "18 || 20 || >=22"
+      }
    },
    "node_modules/baseline-browser-mapping": {
      "version": "2.8.30",
@ -1514,14 +1494,16 @@
      }
    },
    "node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+      "version": "5.0.5",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
+      "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
      "dev": true,
      "license": "MIT",
      "dependencies": {
-        "balanced-match": "^1.0.0",
-        "concat-map": "0.0.1"
+        "balanced-match": "^4.0.2"
+      },
+      "engines": {
+        "node": "18 || 20 || >=22"
      }
    },
    "node_modules/braces": {
@ -1791,13 +1773,6 @@
      "dev": true,
      "license": "MIT"
    },
-    "node_modules/concat-map": {
-      "version": "0.0.1",
-      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
-      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
-      "dev": true,
-      "license": "MIT"
-    },
    "node_modules/convert-source-map": {
      "version": "2.0.0",
      "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
@ -2258,6 +2233,7 @@
      "version": "2.3.3",
      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
+      "dev": true,
      "hasInstallScript": true,
      "license": "MIT",
      "optional": true,
@ -4090,16 +4066,16 @@
      }
    },
    "node_modules/minimatch": {
-      "version": "10.1.1",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.1.1.tgz",
-      "integrity": "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==",
+      "version": "10.2.5",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz",
+      "integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==",
      "dev": true,
      "license": "BlueOak-1.0.0",
      "dependencies": {
-        "@isaacs/brace-expansion": "^5.0.0"
+        "brace-expansion": "^5.0.5"
      },
      "engines": {
-        "node": "20 || >=22"
+        "node": "18 || 20 || >=22"
      },
      "funding": {
        "url": "https://github.com/sponsors/isaacs"
@ -4388,9 +4364,9 @@
      "license": "ISC"
    },
    "node_modules/picomatch": {
-      "version": "2.3.1",
-      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
-      "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+      "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
      "dev": true,
      "license": "MIT",
      "engines": {
@ -4475,22 +4451,19 @@
      }
    },
    "node_modules/prisma": {
-      "version": "5.22.0",
-      "resolved": "https://registry.npmjs.org/prisma/-/prisma-5.22.0.tgz",
-      "integrity": "sha512-vtpjW3XuYCSnMsNVBjLMNkTj6OZbudcPPTPYHqX0CJfpcdWciI1dM8uHETwmDxxiqEwCIE6WvXucWUetJgfu/A==",
+      "version": "5.17.0",
+      "resolved": "https://registry.npmjs.org/prisma/-/prisma-5.17.0.tgz",
+      "integrity": "sha512-m4UWkN5lBE6yevqeOxEvmepnL5cNPEjzMw2IqDB59AcEV6w7D8vGljDLd1gPFH+W6gUxw9x7/RmN5dCS/WTPxA==",
      "hasInstallScript": true,
      "license": "Apache-2.0",
      "dependencies": {
-        "@prisma/engines": "5.22.0"
+        "@prisma/engines": "5.17.0"
      },
      "bin": {
        "prisma": "build/index.js"
      },
      "engines": {
        "node": ">=16.13"
-      },
-      "optionalDependencies": {
-        "fsevents": "2.3.3"
      }
    },
    "node_modules/prompts": {
@ -4555,16 +4528,16 @@
      }
    },
    "node_modules/react-copy-to-clipboard": {
-      "version": "5.1.0",
-      "resolved": "https://registry.npmjs.org/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.0.tgz",
-      "integrity": "sha512-k61RsNgAayIJNoy9yDsYzDe/yAZAzEbEgcz3DZMhF686LEyukcE1hzurxe85JandPUG+yTfGVFzuEw3xt8WP/A==",
+      "version": "5.1.1",
+      "resolved": "https://registry.npmjs.org/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.1.tgz",
+      "integrity": "sha512-s+HrzLyJBxrpGTYXF15dTgMjAJpEPZT/Yp6NytAtZMRngejxt6Pt5WrfFxLAcsqUDU6sY1Jz6tyHwIicE1U2Xg==",
      "license": "MIT",
      "dependencies": {
-        "copy-to-clipboard": "^3.3.1",
+        "copy-to-clipboard": "^3.3.3",
        "prop-types": "^15.8.1"
      },
      "peerDependencies": {
-        "react": "^15.3.0 || 16 || 17 || 18"
+        "react": ">=15.3.0"
      }
    },
    "node_modules/react-dom": {
@ -5068,19 +5041,6 @@
        "node": ">=8"
      }
    },
-    "node_modules/test-exclude/node_modules/minimatch": {
-      "version": "3.1.2",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
-      "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
-      "dev": true,
-      "license": "ISC",
-      "dependencies": {
-        "brace-expansion": "^1.1.7"
-      },
-      "engines": {
-        "node": "*"
-      }
-    },
    "node_modules/tmpl": {
      "version": "1.0.5",
      "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz",
--- a/package.json
+++ b/package.json
@ -2,7 +2,7 @@
  "dependencies": {
    "prism-react-renderer": "2.4.1",
    "prisma": "5.17.0",
-    "react-copy-to-clipboard": "5.1.0"
+    "react-copy-to-clipboard": "5.1.1"
  },
  "devDependencies": {
    "@testing-library/jest-dom": "6.8.0",
@ -12,7 +12,8 @@
  },
  "overrides": {
    "glob": "13.0.0",
-    "minimatch": "10.1.1",
+    "minimatch": "10.2.5",
+    "picomatch": "2.3.2",
    "@isaacs/brace-expansion": "5.0.0",
    "@babel/traverse": "7.28.5",
    "braces": "3.0.3"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -12,11 +12,11 @@ authors = [
 dependencies = [
    "fastuuid==0.14.0",
    "httpx==0.28.1",
-    "openai==2.24.0",
+    "openai==2.33.0",
    "python-dotenv==1.2.2",
    "tiktoken==0.12.0",
    "importlib-metadata==8.5.0",
-    "tokenizers==0.22.2",
+    "tokenizers==0.23.1",
    "click==8.1.8",
    "jinja2==3.1.6",
    "aiohttp==3.13.4",
@ -44,11 +44,11 @@ proxy = [
    "apscheduler==3.11.2",
    "fastapi-sso==0.19.0",
    "PyJWT==2.12.0",
-    "python-multipart==0.0.26",
+    "python-multipart==0.0.27",
    "cryptography==46.0.7",
    "pynacl==1.6.2",
    "websockets==15.0.1",
-    "boto3==1.42.59",
+    "boto3==1.43.1",
    "azure-identity==1.25.2",
    "azure-storage-blob==12.28.0",
    "mcp==1.26.0",
@ -120,9 +120,9 @@ dev = [
    "flake8==7.3.0",
    "black==24.10.0",
    "mypy==1.19.0",
-    "pytest==8.3.5",
+    "pytest==9.0.3",
    "pytest-mock==3.15.1",
-    "pytest-asyncio==1.2.0",
+    "pytest-asyncio==1.3.0",
    "pytest-postgresql==7.0.2",
    # pytest-postgresql imports psycopg v3 during pytest startup. Keep the base
    # package and the binary wheel in the default dev environment so local
@ -191,7 +191,7 @@ ci = [
    "pylint==4.0.5",
    "pyright==1.1.408",
    "langchain-mcp-adapters==0.2.1",
-    "langchain-openai==1.1.10",
+    "langchain-openai==1.1.14",
    "langgraph==1.0.10",
    # langgraph-prebuilt 1.0.9 imports ExecutionInfo/ServerInfo from
    # langgraph.runtime, which is not exported until langgraph 1.1.0.
@ -205,7 +205,7 @@ healthcheck = [
 ]

 [build-system]
-requires = ["uv_build==0.10.7"]
+requires = ["uv_build==0.11.8"]
 build-backend = "uv_build"

 [tool.uv]
--- a/tests/code_coverage_tests/liccheck.ini
+++ b/tests/code_coverage_tests/liccheck.ini
@ -150,6 +150,7 @@ jaraco.context: >=6.1.0 # Unknown license
 pypdf: >=6.6.2 # BSD-3-Clause license - https://github.com/py-pdf/pypdf/blob/main/LICENSE
 hf-xet: >=1.4.2 # Apache 2.0 License - https://github.com/huggingface/xet-tools/blob/main/LICENSE
 pytest-asyncio: >=1.2.0 # Apache 2.0 license
+pytest: >=9.0.3 # MIT license
 pytest-postgresql: >=7.0.2 # LGPLv3+ license
 pytest-xdist: >=3.8.0 # MIT License
 ruff: >=0.15.3 # MIT License
--- a/tests/guardrails_tests/test_semantic_guard.py
+++ b/tests/guardrails_tests/test_semantic_guard.py
@ -171,6 +171,25 @@ class TestHelperFunctions:
        mock_response.choices[0].message.content = "Hello from LLM"
        assert _extract_response_text(mock_response) == "Hello from LLM"

+    def test_extract_response_text_combines_all_choices(self):
+        from litellm.proxy.guardrails.guardrail_hooks.semantic_guard.semantic_guard import (
+            _extract_response_text,
+        )
+
+        first_choice = MagicMock()
+        first_choice.message.content = "first response"
+        second_choice = MagicMock()
+        second_choice.message.content = [
+            {"type": "text", "text": "second"},
+            {"type": "text", "text": "response"},
+        ]
+        mock_response = MagicMock()
+        mock_response.choices = [first_choice, second_choice]
+
+        assert (
+            _extract_response_text(mock_response) == "first response\nsecond response"
+        )
+
    def test_extract_response_text_empty(self):
        from litellm.proxy.guardrails.guardrail_hooks.semantic_guard.semantic_guard import (
            _extract_response_text,
--- a/tests/local_testing/test_router_debug_logs.py
+++ b/tests/local_testing/test_router_debug_logs.py
@ -18,6 +18,7 @@ from litellm import Router

 # this tests debug logs from litellm router and litellm proxy server
 from litellm._logging import verbose_logger, verbose_proxy_logger, verbose_router_logger
+from litellm.llms.custom_httpx.async_client_cleanup import close_litellm_async_clients


 # this tests debug logs from litellm router and litellm proxy server
@ -74,6 +75,9 @@ def test_async_fallbacks(caplog):
            pytest.fail(f"An exception occurred: {e}")
        finally:
            router.reset()
+            # Close cached aiohttp/httpx clients before the event loop ends
+            # to prevent "Unclosed client session" / "Unclosed connector" warnings.
+            await close_litellm_async_clients()

    asyncio.run(_make_request())
    captured_logs = [rec.message for rec in caplog.records]
--- a/tests/local_testing/test_streaming.py
+++ b/tests/local_testing/test_streaming.py
@ -3640,7 +3640,7 @@ def test_mock_response_iterator_tool_use():
    [
        # "deepseek/deepseek-reasoner",
        # "anthropic/claude-3-7-sonnet-20250219",
-        "openrouter/anthropic/claude-3.7-sonnet",
+        "openrouter/anthropic/claude-sonnet-4.5",
    ],
 )
 def test_reasoning_content_completion(model):
--- a/tests/pass_through_tests/package-lock.json
+++ b/tests/pass_through_tests/package-lock.json
@ -8,8 +8,8 @@
      "name": "litellm-pass-through-tests",
      "version": "0.0.0",
      "dependencies": {
-        "@google-cloud/vertexai": "1.9.3",
-        "@google/generative-ai": "0.21.0"
+        "@google-cloud/vertexai": "1.12.0",
+        "@google/generative-ai": "0.24.1"
      },
      "devDependencies": {
        "jest": "29.7.0"
@ -512,21 +512,46 @@
      "license": "MIT"
    },
    "node_modules/@google-cloud/vertexai": {
-      "version": "1.9.3",
-      "resolved": "https://registry.npmjs.org/@google-cloud/vertexai/-/vertexai-1.9.3.tgz",
-      "integrity": "sha512-35o5tIEMLW3JeFJOaaMNR2e5sq+6rpnhrF97PuAxeOm0GlqVTESKhkGj7a5B5mmJSSSU3hUfIhcQCRRsw4Ipzg==",
+      "version": "1.12.0",
+      "resolved": "https://registry.npmjs.org/@google-cloud/vertexai/-/vertexai-1.12.0.tgz",
+      "integrity": "sha512-XMJIk7GIeavFLP5A3YEUlowKa5Y5PZRrnnuTJcqR0k+lFKkv7+IWpdRp+Xbqb8xNDrvQaE2hP2RYPUylyD5EdA==",
      "license": "Apache-2.0",
      "dependencies": {
+        "@google/genai": "^1.45.0",
        "google-auth-library": "^9.1.0"
      },
      "engines": {
        "node": ">=18.0.0"
      }
    },
+    "node_modules/@google/genai": {
+      "version": "1.51.0",
+      "resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.51.0.tgz",
+      "integrity": "sha512-vTZZF3CSimN7cn2zsLpW2p5WF0eZa5Gz69ITMPCNHpPrDlAstOfGifSfi0p/s9Z9400f7xJRkgvkQNrcM7pJ6w==",
+      "hasInstallScript": true,
+      "license": "Apache-2.0",
+      "dependencies": {
+        "google-auth-library": "^10.3.0",
+        "p-retry": "^4.6.2",
+        "protobufjs": "^7.5.4",
+        "ws": "^8.18.0"
+      },
+      "engines": {
+        "node": ">=20.0.0"
+      },
+      "peerDependencies": {
+        "@modelcontextprotocol/sdk": "^1.25.2"
+      },
+      "peerDependenciesMeta": {
+        "@modelcontextprotocol/sdk": {
+          "optional": true
+        }
+      }
+    },
    "node_modules/@google/generative-ai": {
-      "version": "0.21.0",
-      "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.21.0.tgz",
-      "integrity": "sha512-7XhUbtnlkSEZK15kN3t+tzIMxsbKm/dSkKBFalj+20NvPKe1kBY7mR2P7vuijEn+f06z5+A8bVGKO0v39cr6Wg==",
+      "version": "0.24.1",
+      "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz",
+      "integrity": "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q==",
      "license": "Apache-2.0",
      "engines": {
        "node": ">=18.0.0"
@ -901,6 +926,70 @@
        "@jridgewell/sourcemap-codec": "^1.4.14"
      }
    },
+    "node_modules/@protobufjs/aspromise": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
+      "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/base64": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
+      "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/codegen": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.5.tgz",
+      "integrity": "sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/eventemitter": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
+      "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/fetch": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
+      "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "@protobufjs/aspromise": "^1.1.1",
+        "@protobufjs/inquire": "^1.1.0"
+      }
+    },
+    "node_modules/@protobufjs/float": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
+      "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/inquire": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.1.tgz",
+      "integrity": "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/path": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
+      "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/pool": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
+      "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/utf8": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.1.tgz",
+      "integrity": "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg==",
+      "license": "BSD-3-Clause"
+    },
    "node_modules/@sinclair/typebox": {
      "version": "0.27.10",
      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.10.tgz",
@ -1014,12 +1103,17 @@
      "version": "25.6.0",
      "resolved": "https://registry.npmjs.org/@types/node/-/node-25.6.0.tgz",
      "integrity": "sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==",
-      "dev": true,
      "license": "MIT",
      "dependencies": {
        "undici-types": "~7.19.0"
      }
    },
+    "node_modules/@types/retry": {
+      "version": "0.12.0",
+      "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz",
+      "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==",
+      "license": "MIT"
+    },
    "node_modules/@types/stack-utils": {
      "version": "2.0.3",
      "resolved": "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-2.0.3.tgz",
@ -1560,6 +1654,15 @@
        "node": ">= 8"
      }
    },
+    "node_modules/data-uri-to-buffer": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-4.0.1.tgz",
+      "integrity": "sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 12"
+      }
+    },
    "node_modules/debug": {
      "version": "4.4.3",
      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@ -1785,6 +1888,29 @@
        "bser": "2.1.1"
      }
    },
+    "node_modules/fetch-blob": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-3.2.0.tgz",
+      "integrity": "sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/jimmywarting"
+        },
+        {
+          "type": "paypal",
+          "url": "https://paypal.me/jimmywarting"
+        }
+      ],
+      "license": "MIT",
+      "dependencies": {
+        "node-domexception": "^1.0.0",
+        "web-streams-polyfill": "^3.0.3"
+      },
+      "engines": {
+        "node": "^12.20 || >= 14.13"
+      }
+    },
    "node_modules/fill-range": {
      "version": "7.1.1",
      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
@ -1812,6 +1938,18 @@
        "node": ">=8"
      }
    },
+    "node_modules/formdata-polyfill": {
+      "version": "4.0.10",
+      "resolved": "https://registry.npmjs.org/formdata-polyfill/-/formdata-polyfill-4.0.10.tgz",
+      "integrity": "sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==",
+      "license": "MIT",
+      "dependencies": {
+        "fetch-blob": "^3.1.2"
+      },
+      "engines": {
+        "node": ">=12.20.0"
+      }
+    },
    "node_modules/fs.realpath": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
@ -1845,33 +1983,31 @@
      }
    },
    "node_modules/gaxios": {
-      "version": "6.7.1",
-      "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.7.1.tgz",
-      "integrity": "sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==",
+      "version": "7.1.4",
+      "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-7.1.4.tgz",
+      "integrity": "sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA==",
      "license": "Apache-2.0",
      "dependencies": {
        "extend": "^3.0.2",
        "https-proxy-agent": "^7.0.1",
-        "is-stream": "^2.0.0",
-        "node-fetch": "^2.6.9",
-        "uuid": "^9.0.1"
+        "node-fetch": "^3.3.2"
      },
      "engines": {
-        "node": ">=14"
+        "node": ">=18"
      }
    },
    "node_modules/gcp-metadata": {
-      "version": "6.1.1",
-      "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.1.1.tgz",
-      "integrity": "sha512-a4tiq7E0/5fTjxPAaH4jpjkSv/uCaU2p5KC6HVGrvl0cDjA8iBZv4vv1gyzlmK0ZUKqwpOyQMKzZQe3lTit77A==",
+      "version": "8.1.2",
+      "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-8.1.2.tgz",
+      "integrity": "sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg==",
      "license": "Apache-2.0",
      "dependencies": {
-        "gaxios": "^6.1.1",
-        "google-logging-utils": "^0.0.2",
+        "gaxios": "^7.0.0",
+        "google-logging-utils": "^1.0.0",
        "json-bigint": "^1.0.0"
      },
      "engines": {
-        "node": ">=14"
+        "node": ">=18"
      }
    },
    "node_modules/gensync": {
@ -1940,26 +2076,26 @@
      }
    },
    "node_modules/google-auth-library": {
-      "version": "9.15.1",
-      "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.15.1.tgz",
-      "integrity": "sha512-Jb6Z0+nvECVz+2lzSMt9u98UsoakXxA2HGHMCxh+so3n90XgYWkq5dur19JAJV7ONiJY22yBTyJB1TSkvPq9Ng==",
+      "version": "10.6.2",
+      "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-10.6.2.tgz",
+      "integrity": "sha512-e27Z6EThmVNNvtYASwQxose/G57rkRuaRbQyxM2bvYLLX/GqWZ5chWq2EBoUchJbCc57eC9ArzO5wMsEmWftCw==",
      "license": "Apache-2.0",
      "dependencies": {
        "base64-js": "^1.3.0",
        "ecdsa-sig-formatter": "^1.0.11",
-        "gaxios": "^6.1.1",
-        "gcp-metadata": "^6.1.0",
-        "gtoken": "^7.0.0",
+        "gaxios": "^7.1.4",
+        "gcp-metadata": "8.1.2",
+        "google-logging-utils": "1.1.3",
        "jws": "^4.0.0"
      },
      "engines": {
-        "node": ">=14"
+        "node": ">=18"
      }
    },
    "node_modules/google-logging-utils": {
-      "version": "0.0.2",
-      "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-0.0.2.tgz",
-      "integrity": "sha512-NEgUnEcBiP5HrPzufUkBzJOD/Sxsco3rLNo1F1TNf7ieU8ryUzBhqba8r756CjLX7rn3fHl6iLEwPYuqpoKgQQ==",
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/google-logging-utils/-/google-logging-utils-1.1.3.tgz",
+      "integrity": "sha512-eAmLkjDjAFCVXg7A1unxHsLf961m6y17QFqXqAXGj/gVkKFrEICfStRfwUlGNfeCEjNRa32JEWOUTlYXPyyKvA==",
      "license": "Apache-2.0",
      "engines": {
        "node": ">=14"
@ -1972,19 +2108,6 @@
      "dev": true,
      "license": "ISC"
    },
-    "node_modules/gtoken": {
-      "version": "7.1.0",
-      "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.1.0.tgz",
-      "integrity": "sha512-pCcEwRi+TKpMlxAQObHDQ56KawURgyAf6jtIY046fJ5tIv3zDe/LEIubckAO8fj6JnAxLdmWkUfNyulQ2iKdEw==",
-      "license": "MIT",
-      "dependencies": {
-        "gaxios": "^6.0.0",
-        "jws": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=14.0.0"
-      }
-    },
    "node_modules/has-flag": {
      "version": "4.0.0",
      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
@ -2144,6 +2267,7 @@
      "version": "2.0.1",
      "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz",
      "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==",
+      "dev": true,
      "license": "MIT",
      "engines": {
        "node": ">=8"
@ -2963,6 +3087,12 @@
        "node": ">=8"
      }
    },
+    "node_modules/long": {
+      "version": "5.3.2",
+      "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
+      "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==",
+      "license": "Apache-2.0"
+    },
    "node_modules/lru-cache": {
      "version": "5.1.1",
      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
@ -3069,24 +3199,42 @@
      "dev": true,
      "license": "MIT"
    },
+    "node_modules/node-domexception": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
+      "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
+      "deprecated": "Use your platform's native DOMException instead",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/jimmywarting"
+        },
+        {
+          "type": "github",
+          "url": "https://paypal.me/jimmywarting"
+        }
+      ],
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.5.0"
+      }
+    },
    "node_modules/node-fetch": {
-      "version": "2.7.0",
-      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
-      "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
+      "version": "3.3.2",
+      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.3.2.tgz",
+      "integrity": "sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==",
      "license": "MIT",
      "dependencies": {
-        "whatwg-url": "^5.0.0"
+        "data-uri-to-buffer": "^4.0.0",
+        "fetch-blob": "^3.1.4",
+        "formdata-polyfill": "^4.0.10"
      },
      "engines": {
-        "node": "4.x || >=6.0.0"
+        "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
      },
-      "peerDependencies": {
-        "encoding": "^0.1.0"
-      },
-      "peerDependenciesMeta": {
-        "encoding": {
-          "optional": true
-        }
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/node-fetch"
      }
    },
    "node_modules/node-int64": {
@ -3197,6 +3345,19 @@
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
+    "node_modules/p-retry": {
+      "version": "4.6.2",
+      "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz",
+      "integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@types/retry": "0.12.0",
+        "retry": "^0.13.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
    "node_modules/p-try": {
      "version": "2.2.0",
      "resolved": "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz",
@ -3348,6 +3509,30 @@
        "node": ">= 6"
      }
    },
+    "node_modules/protobufjs": {
+      "version": "7.5.6",
+      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.5.6.tgz",
+      "integrity": "sha512-M71sTMB146U3u0di3yup8iM+zv8yPRNQVr1KK4tyBitl3qFvEGucq/rGDRShD2rsJhtN02RJaJ7j5X5hmy8SJg==",
+      "hasInstallScript": true,
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "@protobufjs/aspromise": "^1.1.2",
+        "@protobufjs/base64": "^1.1.2",
+        "@protobufjs/codegen": "^2.0.5",
+        "@protobufjs/eventemitter": "^1.1.0",
+        "@protobufjs/fetch": "^1.1.0",
+        "@protobufjs/float": "^1.0.2",
+        "@protobufjs/inquire": "^1.1.1",
+        "@protobufjs/path": "^1.1.2",
+        "@protobufjs/pool": "^1.1.0",
+        "@protobufjs/utf8": "^1.1.1",
+        "@types/node": ">=13.7.0",
+        "long": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=12.0.0"
+      }
+    },
    "node_modules/pure-rand": {
      "version": "6.1.0",
      "resolved": "https://registry.npmjs.org/pure-rand/-/pure-rand-6.1.0.tgz",
@ -3437,6 +3622,15 @@
        "node": ">=10"
      }
    },
+    "node_modules/retry": {
+      "version": "0.13.1",
+      "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz",
+      "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 4"
+      }
+    },
    "node_modules/safe-buffer": {
      "version": "5.2.1",
      "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
@ -3691,12 +3885,6 @@
        "node": ">=8.0"
      }
    },
-    "node_modules/tr46": {
-      "version": "0.0.3",
-      "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
-      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
-      "license": "MIT"
-    },
    "node_modules/type-detect": {
      "version": "4.0.8",
      "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz",
@ -3724,7 +3912,6 @@
      "version": "7.19.2",
      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.19.2.tgz",
      "integrity": "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==",
-      "dev": true,
      "license": "MIT"
    },
    "node_modules/update-browserslist-db": {
@ -3758,19 +3945,6 @@
        "browserslist": ">= 4.21.0"
      }
    },
-    "node_modules/uuid": {
-      "version": "9.0.1",
-      "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
-      "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
-      "funding": [
-        "https://github.com/sponsors/broofa",
-        "https://github.com/sponsors/ctavan"
-      ],
-      "license": "MIT",
-      "bin": {
-        "uuid": "dist/bin/uuid"
-      }
-    },
    "node_modules/v8-to-istanbul": {
      "version": "9.3.0",
      "resolved": "https://registry.npmjs.org/v8-to-istanbul/-/v8-to-istanbul-9.3.0.tgz",
@ -3796,20 +3970,13 @@
        "makeerror": "1.0.12"
      }
    },
-    "node_modules/webidl-conversions": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
-      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
-      "license": "BSD-2-Clause"
-    },
-    "node_modules/whatwg-url": {
-      "version": "5.0.0",
-      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
-      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
+    "node_modules/web-streams-polyfill": {
+      "version": "3.3.3",
+      "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
+      "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==",
      "license": "MIT",
-      "dependencies": {
-        "tr46": "~0.0.3",
-        "webidl-conversions": "^3.0.0"
+      "engines": {
+        "node": ">= 8"
      }
    },
    "node_modules/which": {
@ -3867,6 +4034,27 @@
        "node": "^12.13.0 || ^14.15.0 || >=16.0.0"
      }
    },
+    "node_modules/ws": {
+      "version": "8.20.0",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz",
+      "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=10.0.0"
+      },
+      "peerDependencies": {
+        "bufferutil": "^4.0.1",
+        "utf-8-validate": ">=5.0.2"
+      },
+      "peerDependenciesMeta": {
+        "bufferutil": {
+          "optional": true
+        },
+        "utf-8-validate": {
+          "optional": true
+        }
+      }
+    },
    "node_modules/y18n": {
      "version": "5.0.8",
      "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
--- a/tests/pass_through_tests/package.json
+++ b/tests/pass_through_tests/package.json
@ -4,10 +4,15 @@
  "private": true,
  "description": "JS pass-through tests for Vertex AI / Google AI Studio routes. CI-only; not published.",
  "dependencies": {
-    "@google-cloud/vertexai": "1.9.3",
-    "@google/generative-ai": "0.21.0"
+    "@google-cloud/vertexai": "1.12.0",
+    "@google/generative-ai": "0.24.1"
  },
  "devDependencies": {
    "jest": "29.7.0"
+  },
+  "overrides": {
+    "@google-cloud/vertexai": {
+      "google-auth-library": "10.6.2"
+    }
  }
 }
--- a/tests/proxy_admin_ui_tests/ui_unit_tests/package-lock.json
+++ b/tests/proxy_admin_ui_tests/ui_unit_tests/package-lock.json
@ -8,22 +8,22 @@
            "name": "ui-unit-tests",
            "version": "1.0.0",
            "dependencies": {
-                "@ant-design/icons": "^5.0.0",
-                "antd": "^5.12.5",
-                "react": "^18.2.0",
-                "react-dom": "^18.2.0"
+                "@ant-design/icons": "5.6.1",
+                "antd": "5.29.1",
+                "react": "18.3.1",
+                "react-dom": "18.3.1"
            },
            "devDependencies": {
-                "@testing-library/jest-dom": "^6.0.0",
-                "@testing-library/react": "^14.0.0",
-                "@types/jest": "^29.5.0",
-                "@types/react": "^18.2.0",
-                "@types/react-dom": "^18.2.0",
-                "identity-obj-proxy": "^3.0.0",
-                "jest": "^29.5.0",
-                "jest-environment-jsdom": "^29.5.0",
-                "ts-jest": "^29.1.0",
-                "typescript": "^5.0.0"
+                "@testing-library/jest-dom": "6.9.1",
+                "@testing-library/react": "14.3.1",
+                "@types/jest": "29.5.14",
+                "@types/react": "18.3.27",
+                "@types/react-dom": "18.3.7",
+                "identity-obj-proxy": "3.0.0",
+                "jest": "29.7.0",
+                "jest-environment-jsdom": "29.7.0",
+                "ts-jest": "29.4.5",
+                "typescript": "5.9.3"
            }
        },
        "node_modules/@adobe/css-tools": {
@ -647,29 +647,6 @@
            "integrity": "sha512-OWORNpfjMsSSUBVrRBVGECkhWcULOAJz9ZW8uK9qgxD+87M7jHRcvh/A96XXNhXTLmKcoYSQtBEX7lHMO7YRwg==",
            "license": "MIT"
        },
-        "node_modules/@isaacs/balanced-match": {
-            "version": "4.0.1",
-            "resolved": "https://registry.npmjs.org/@isaacs/balanced-match/-/balanced-match-4.0.1.tgz",
-            "integrity": "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==",
-            "dev": true,
-            "license": "MIT",
-            "engines": {
-                "node": "20 || >=22"
-            }
-        },
-        "node_modules/@isaacs/brace-expansion": {
-            "version": "5.0.0",
-            "resolved": "https://registry.npmjs.org/@isaacs/brace-expansion/-/brace-expansion-5.0.0.tgz",
-            "integrity": "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==",
-            "dev": true,
-            "license": "MIT",
-            "dependencies": {
-                "@isaacs/balanced-match": "^4.0.1"
-            },
-            "engines": {
-                "node": "20 || >=22"
-            }
-        },
        "node_modules/@istanbuljs/load-nyc-config": {
            "version": "1.1.0",
            "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz",
@ -1326,9 +1303,9 @@
            }
        },
        "node_modules/@tootallnate/once": {
-            "version": "2.0.0",
-            "resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-2.0.0.tgz",
-            "integrity": "sha512-XCuKFP5PS55gnMVu3dty8KPatLqUoy/ZYzDzAGCQ8JNFCkLXzmI7vNHCR+XpbZaMWQK/vQubr7PkYq8g470J/A==",
+            "version": "3.0.1",
+            "resolved": "https://registry.npmjs.org/@tootallnate/once/-/once-3.0.1.tgz",
+            "integrity": "sha512-VyMVKRrpHTT8PnotUeV8L/mDaMwD5DaAKCFLP73zAqAtvF0FCqky+Ki7BYbFCYQmqFyTe9316Ed5zS70QUR9eg==",
            "dev": true,
            "license": "MIT",
            "engines": {
@ -1907,11 +1884,14 @@
            }
        },
        "node_modules/balanced-match": {
-            "version": "1.0.2",
-            "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-            "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
+            "version": "4.0.4",
+            "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-4.0.4.tgz",
+            "integrity": "sha512-BLrgEcRTwX2o6gGxGOCNyMvGSp35YofuYzw9h1IMTRmKqttAZZVU67bdb9Pr2vUHA8+j3i2tJfjO6C6+4myGTA==",
            "dev": true,
-            "license": "MIT"
+            "license": "MIT",
+            "engines": {
+                "node": "18 || 20 || >=22"
+            }
        },
        "node_modules/baseline-browser-mapping": {
            "version": "2.8.30",
@ -1924,14 +1904,16 @@
            }
        },
        "node_modules/brace-expansion": {
-            "version": "1.1.12",
-            "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-            "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+            "version": "5.0.5",
+            "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-5.0.5.tgz",
+            "integrity": "sha512-VZznLgtwhn+Mact9tfiwx64fA9erHH/MCXEUfB/0bX/6Fz6ny5EGTXYltMocqg4xFAQZtnO3DHWWXi8RiuN7cQ==",
            "dev": true,
            "license": "MIT",
            "dependencies": {
-                "balanced-match": "^1.0.0",
-                "concat-map": "0.0.1"
+                "balanced-match": "^4.0.2"
+            },
+            "engines": {
+                "node": "18 || 20 || >=22"
            }
        },
        "node_modules/braces": {
@ -2230,13 +2212,6 @@
            "integrity": "sha512-VRhuHOLoKYOy4UbilLbUzbYg93XLjv2PncJC50EuTWPA3gaja1UjBsUP/D/9/juV3vQFr6XBEzn9KCAHdUvOHw==",
            "license": "MIT"
        },
-        "node_modules/concat-map": {
-            "version": "0.0.1",
-            "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
-            "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
-            "dev": true,
-            "license": "MIT"
-        },
        "node_modules/convert-source-map": {
            "version": "2.0.0",
            "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz",
@ -3015,9 +2990,9 @@
            "license": "ISC"
        },
        "node_modules/handlebars": {
-            "version": "4.7.8",
-            "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.8.tgz",
-            "integrity": "sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==",
+            "version": "4.7.9",
+            "resolved": "https://registry.npmjs.org/handlebars/-/handlebars-4.7.9.tgz",
+            "integrity": "sha512-4E71E0rpOaQuJR2A3xDZ+GM1HyWYv1clR58tC8emQNeQe3RH7MAzSbat+V0wG78LQBo6m6bzSG/L4pBuCsgnUQ==",
            "dev": true,
            "license": "MIT",
            "dependencies": {
@ -4920,16 +4895,16 @@
            }
        },
        "node_modules/minimatch": {
-            "version": "10.1.1",
-            "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.1.1.tgz",
-            "integrity": "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ==",
+            "version": "10.2.5",
+            "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-10.2.5.tgz",
+            "integrity": "sha512-MULkVLfKGYDFYejP07QOurDLLQpcjk7Fw+7jXS2R2czRQzR56yHRveU5NDJEOviH+hETZKSkIk5c+T23GjFUMg==",
            "dev": true,
            "license": "BlueOak-1.0.0",
            "dependencies": {
-                "@isaacs/brace-expansion": "^5.0.0"
+                "brace-expansion": "^5.0.5"
            },
            "engines": {
-                "node": "20 || >=22"
+                "node": "18 || 20 || >=22"
            },
            "funding": {
                "url": "https://github.com/sponsors/isaacs"
@ -5246,9 +5221,9 @@
            "license": "ISC"
        },
        "node_modules/picomatch": {
-            "version": "2.3.1",
-            "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
-            "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
+            "version": "2.3.2",
+            "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.2.tgz",
+            "integrity": "sha512-V7+vQEJ06Z+c5tSye8S+nHUfI51xoXIXjHQ99cQtKUkQqqO1kO/KCJUfZXuB47h/YBlDhah2H3hdUGXn8ie0oA==",
            "dev": true,
            "license": "MIT",
            "engines": {
@ -6562,19 +6537,6 @@
                "node": ">=8"
            }
        },
-        "node_modules/test-exclude/node_modules/minimatch": {
-            "version": "3.1.2",
-            "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
-            "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
-            "dev": true,
-            "license": "ISC",
-            "dependencies": {
-                "brace-expansion": "^1.1.7"
-            },
-            "engines": {
-                "node": "*"
-            }
-        },
        "node_modules/throttle-debounce": {
            "version": "5.0.2",
            "resolved": "https://registry.npmjs.org/throttle-debounce/-/throttle-debounce-5.0.2.tgz",
--- a/tests/proxy_admin_ui_tests/ui_unit_tests/package.json
+++ b/tests/proxy_admin_ui_tests/ui_unit_tests/package.json
@ -25,10 +25,13 @@
    },
    "overrides": {
        "glob": "13.0.0",
-        "minimatch": "10.1.1",
+        "minimatch": "10.2.5",
+        "picomatch": "2.3.2",
+        "handlebars": "4.7.9",
+        "@tootallnate/once": "3.0.1",
        "@isaacs/brace-expansion": "5.0.0",
        "@babel/traverse": "7.28.5",
        "ws": "8.18.3",
        "braces": "3.0.3"
    }
-}
+}
--- a/tests/spend_tracking_tests/test_spend_accuracy_tests.py
+++ b/tests/spend_tracking_tests/test_spend_accuracy_tests.py
@ -128,8 +128,8 @@ async def get_spend_info(session, entity_type: str, entity_id: str):


 async def get_proxy_readiness(session):
-    """Fetch /health/readiness. Used both as a fail-fast gate and as a diagnostic on poll timeout."""
-    url = "http://0.0.0.0:4000/health/readiness"
+    """Fetch authenticated readiness details. Used both as a fail-fast gate and as a diagnostic on poll timeout."""
+    url = "http://0.0.0.0:4000/health/readiness/details"
    headers = {"Authorization": "Bearer sk-1234"}
    async with session.get(url, headers=headers) as response:
        return response.status, await response.json()
@ -140,7 +140,7 @@ async def assert_proxy_healthy(session):
    status, body = await get_proxy_readiness(session)
    if status != 200 or body.get("db") != "connected":
        pytest.fail(
-            f"Proxy /health/readiness unhealthy (status={status}). "
+            f"Proxy /health/readiness/details unhealthy (status={status}). "
            f"Cannot run spend accuracy test. Response: {body}"
        )
    print(f"Proxy readiness OK: {body}")
--- a/tests/test_health.py
+++ b/tests/test_health.py
@ -73,13 +73,32 @@ async def test_health_readiness():
            response_json = await response.json()

            print(response_json)
-            assert "litellm_version" in response_json
            assert "status" in response_json

            if status != 200:
                raise Exception(f"Request did not return a 200 status code: {status}")


+@pytest.mark.asyncio
+async def test_health_readiness_details():
+    """
+    Check if authenticated readiness diagnostics expose version metadata.
+    """
+    async with aiohttp.ClientSession() as session:
+        url = "http://0.0.0.0:4000/health/readiness/details"
+        headers = {"Authorization": "Bearer sk-1234"}
+        async with session.get(url, headers=headers) as response:
+            status = response.status
+            response_json = await response.json()
+
+            print(response_json)
+            assert "status" in response_json
+            assert "litellm_version" in response_json
+
+            if status != 200:
+                raise Exception(f"Request did not return a 200 status code: {status}")
+
+
@pytest.mark.asyncio
 async def test_health_liveliness():
    """
--- a/tests/test_litellm/integrations/test_prompt_manager_ssti.py
+++ b/tests/test_litellm/integrations/test_prompt_manager_ssti.py
@ -0,0 +1,125 @@
+"""SSTI regression coverage for non-dotprompt prompt managers.
+
+DotpromptManager was hardened to render through
+``ImmutableSandboxedEnvironment``. The sibling managers (gitlab, arize,
+bitbucket) ship the exact same attacker-controlled-template surface —
+repository write access or workspace edit access turns into RCE on the
+proxy host if the renderer is unsandboxed. This suite locks in the sandbox
+so the regression can't recur.
+"""
+
+from unittest.mock import MagicMock
+
+import pytest
+from jinja2.exceptions import SecurityError
+from jinja2.sandbox import ImmutableSandboxedEnvironment
+
+from litellm.integrations.arize.arize_phoenix_prompt_manager import (
+    ArizePhoenixTemplateManager,
+)
+from litellm.integrations.bitbucket.bitbucket_prompt_manager import (
+    BitBucketTemplateManager,
+)
+from litellm.integrations.gitlab.gitlab_prompt_manager import GitLabTemplateManager
+
+# Classic Jinja2 SSTI payloads. Any one of these rendering as anything other
+# than the literal string (or raising) means the sandbox isn't engaged.
+_SSTI_PAYLOADS = [
+    "{{ ''.__class__.__mro__[1].__subclasses__() }}",
+    "{{ config.__class__.__init__.__globals__['os'].popen('id').read() }}",
+    "{{ cycler.__init__.__globals__.os.popen('id').read() }}",
+    "{{ ().__class__.__bases__[0].__subclasses__() }}",
+]
+
+
+def _build_gitlab_manager() -> GitLabTemplateManager:
+    # The constructor calls into a GitLab client when prompt_id is set; pass
+    # None so __init__ stops at jinja_env construction and we can assert on it.
+    return GitLabTemplateManager(
+        gitlab_config={"project": "p", "access_token": "t", "branch": "main"},
+        prompt_id=None,
+        gitlab_client=MagicMock(),
+    )
+
+
+def _build_bitbucket_manager(monkeypatch) -> BitBucketTemplateManager:
+    # Stub the BitBucket client so we don't need network or real config.
+    from litellm.integrations.bitbucket import bitbucket_prompt_manager
+
+    monkeypatch.setattr(
+        bitbucket_prompt_manager, "BitBucketClient", lambda *a, **kw: MagicMock()
+    )
+    return BitBucketTemplateManager(
+        bitbucket_config={"workspace": "w", "repository": "r", "access_token": "t"},
+        prompt_id=None,
+    )
+
+
+def _build_arize_manager(monkeypatch) -> ArizePhoenixTemplateManager:
+    from litellm.integrations.arize import arize_phoenix_prompt_manager
+
+    monkeypatch.setattr(
+        arize_phoenix_prompt_manager, "ArizePhoenixClient", lambda *a, **kw: MagicMock()
+    )
+    return ArizePhoenixTemplateManager(
+        api_key="k",
+        api_base="https://example.test",
+        prompt_id=None,
+    )
+
+
+@pytest.mark.parametrize(
+    "manager_factory",
+    [
+        ("gitlab", lambda mp: _build_gitlab_manager()),
+        ("bitbucket", _build_bitbucket_manager),
+        ("arize", _build_arize_manager),
+    ],
+    ids=lambda v: v[0] if isinstance(v, tuple) else v,
+)
+def test_jinja_env_is_sandboxed(manager_factory, monkeypatch):
+    """Each prompt manager must render via ``ImmutableSandboxedEnvironment``."""
+    _, factory = manager_factory
+    manager = factory(monkeypatch)
+    assert isinstance(manager.jinja_env, ImmutableSandboxedEnvironment)
+
+
+@pytest.mark.parametrize(
+    "manager_factory",
+    [
+        ("gitlab", lambda mp: _build_gitlab_manager()),
+        ("bitbucket", _build_bitbucket_manager),
+        ("arize", _build_arize_manager),
+    ],
+    ids=lambda v: v[0] if isinstance(v, tuple) else v,
+)
+@pytest.mark.parametrize("payload", _SSTI_PAYLOADS)
+def test_jinja_env_blocks_ssti_payloads(manager_factory, payload, monkeypatch):
+    """Attribute-traversal payloads must raise ``SecurityError`` at render time.
+
+    A plain ``Environment()`` would happily evaluate these and execute
+    arbitrary Python on the proxy host.
+    """
+    _, factory = manager_factory
+    manager = factory(monkeypatch)
+    template = manager.jinja_env.from_string(payload)
+    with pytest.raises(SecurityError):
+        template.render()
+
+
+@pytest.mark.parametrize(
+    "manager_factory",
+    [
+        ("gitlab", lambda mp: _build_gitlab_manager()),
+        ("bitbucket", _build_bitbucket_manager),
+        ("arize", _build_arize_manager),
+    ],
+    ids=lambda v: v[0] if isinstance(v, tuple) else v,
+)
+def test_jinja_env_still_renders_normal_variables(manager_factory, monkeypatch):
+    """The sandbox is a strict superset for the legitimate use case — plain
+    ``{{ var }}`` substitution must keep working unchanged."""
+    _, factory = manager_factory
+    manager = factory(monkeypatch)
+    template = manager.jinja_env.from_string("Hello {{ name }}!")
+    assert template.render(name="world") == "Hello world!"
--- a/tests/test_litellm/litellm_core_utils/test_decode_special_tokens.py
+++ b/tests/test_litellm/litellm_core_utils/test_decode_special_tokens.py
@ -0,0 +1,37 @@
+from tokenizers import AddedToken, Tokenizer
+from tokenizers.models import WordLevel
+from tokenizers.pre_tokenizers import Whitespace
+from tokenizers.processors import TemplateProcessing
+
+from litellm import decode, encode
+
+
+def _create_custom_tokenizer():
+    tokenizer = Tokenizer(
+        WordLevel({"[UNK]": 0, "Hello": 1, "World": 2}, unk_token="[UNK]")
+    )
+    tokenizer.pre_tokenizer = Whitespace()
+    tokenizer.add_special_tokens([AddedToken("[BOS]", special=True)])
+    bos_token_id = tokenizer.token_to_id("[BOS]")
+    assert bos_token_id is not None
+    tokenizer.post_processor = TemplateProcessing(
+        single="[BOS] $A",
+        special_tokens=[("[BOS]", bos_token_id)],
+    )
+    return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
+
+
+def test_decode_can_preserve_huggingface_special_tokens():
+    custom_tokenizer = _create_custom_tokenizer()
+    sample_text = "Hello World"
+    tokens = encode(text=sample_text, custom_tokenizer=custom_tokenizer)
+
+    decoded_text = decode(tokens=tokens, custom_tokenizer=custom_tokenizer)
+    decoded_text_with_special_tokens = decode(
+        tokens=tokens,
+        custom_tokenizer=custom_tokenizer,
+        skip_special_tokens=False,
+    )
+
+    assert decoded_text == sample_text
+    assert decoded_text_with_special_tokens == "[BOS] Hello World"
--- a/tests/test_litellm/proxy/auth/test_auth_utils.py
+++ b/tests/test_litellm/proxy/auth/test_auth_utils.py
@ -1493,6 +1493,7 @@ def test_observability_ban_covers_canonical_supported_callback_params():
    safe is an explicit decision recorded in
    ``_SAFE_CLIENT_CALLBACK_PARAMS``."""
    from litellm.litellm_core_utils.initialize_dynamic_callback_params import (
+        _request_blocked_callback_params,
        _supported_callback_params,
    )
    from litellm.proxy.auth.auth_utils import (
@ -1508,3 +1509,8 @@ def test_observability_ban_covers_canonical_supported_callback_params():
            f"informational per-request field; otherwise the derivation will "
            f"ban it automatically."
        )
+    for param in _request_blocked_callback_params:
+        assert param in banned, (
+            f"{param} is in _request_blocked_callback_params but is not banned "
+            "at the proxy request-body boundary."
+        )
--- a/tests/test_litellm/proxy/auth/test_user_api_key_auth.py
+++ b/tests/test_litellm/proxy/auth/test_user_api_key_auth.py
@ -10,9 +10,11 @@ sys.path.insert(

 import pytest

+import litellm
 import litellm.proxy.proxy_server
 from litellm.caching.dual_cache import DualCache
 from litellm.proxy._types import (
+    LiteLLMRoutes,
    LiteLLM_JWTAuth,
    LiteLLM_BudgetTable,
    LiteLLM_EndUserTable,
@ -27,6 +29,7 @@ from litellm.proxy.auth.handle_jwt import JWTHandler
 from litellm.proxy.auth.auth_checks import get_key_object, _cache_key_object
 from litellm.proxy.auth.route_checks import RouteChecks
 from litellm.proxy.auth.user_api_key_auth import (
+    _route_requires_auth_despite_public,
    _reserve_budget_after_common_checks,
    _run_centralized_common_checks,
    _run_post_custom_auth_checks,
@ -59,6 +62,29 @@ def test_get_api_key():
    ) == (api_key, passed_in_key)


+def test_route_requires_auth_despite_public_for_metrics(monkeypatch):
+    monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
+
+    assert _route_requires_auth_despite_public("/metrics", {}) is True
+    assert _route_requires_auth_despite_public("/metrics/", {}) is True
+
+    monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", False)
+
+    assert _route_requires_auth_despite_public("/metrics", {}) is False
+
+
+def test_public_ai_hub_routes_remain_public():
+    for route in (
+        "/public/model_hub",
+        "/public/model_hub/info",
+        "/public/agent_hub",
+        "/public/mcp_hub",
+        "/public/skill_hub",
+    ):
+        assert route in LiteLLMRoutes.public_routes.value
+        assert _route_requires_auth_despite_public(route, {}) is False
+
+
@pytest.mark.asyncio
 async def test_should_clear_stale_budget_reservation_when_budget_checks_skip():
    user_api_key_auth_obj = UserAPIKeyAuth(
@ -2352,18 +2378,18 @@ async def test_centralized_common_checks_short_circuits_when_master_key_unset():

@pytest.mark.asyncio
 async def test_centralized_common_checks_skips_public_routes():
-    """Regression: public routes (e.g. /health/readiness) are exempted
+    """Regression: public routes (e.g. /health/liveness) are exempted
    by the builder fast-path. The wrapper must not retroactively run
    common_checks on top — the synthetic INTERNAL_USER_VIEW_ONLY token
    has no user_id, so common_checks would reject the request as
-    admin-only. Breaks k8s readiness probes when master_key is set."""
+    admin-only."""
    import litellm.proxy.proxy_server as _proxy_server_mod
    from fastapi import Request
    from starlette.datastructures import URL

    token = UserAPIKeyAuth(user_role=LitellmUserRoles.INTERNAL_USER_VIEW_ONLY)
    request = Request(scope={"type": "http"})
-    request._url = URL(url="/health/readiness")
+    request._url = URL(url="/health/liveness")

    attrs = _proxy_attrs_for_centralized_checks(user_custom_auth=None)
    originals = {a: getattr(_proxy_server_mod, a, None) for a in attrs}
@ -2378,7 +2404,7 @@ async def test_centralized_common_checks_skips_public_routes():
                user_api_key_auth_obj=token,
                request=request,
                request_data={},
-                route="/health/readiness",
+                route="/health/liveness",
            )
            mock_checks.assert_not_awaited()
    finally:
--- a/tests/test_litellm/proxy/guardrails/guardrail_hooks/azure/test_azure_text_moderation.py
+++ b/tests/test_litellm/proxy/guardrails/guardrail_hooks/azure/test_azure_text_moderation.py
@ -232,6 +232,52 @@ async def test_azure_text_moderation_guardrail_post_call_success_hook():
        assert mock_async_make_request.call_args.kwargs["text"] == "Hello world"


+@pytest.mark.asyncio
+async def test_azure_text_moderation_guardrail_post_call_checks_all_choices():
+    azure_text_moderation_guardrail = AzureContentSafetyTextModerationGuardrail(
+        guardrail_name="azure_text_moderation",
+        api_key="azure_text_moderation_api_key",
+        api_base="azure_text_moderation_api_base",
+    )
+    with patch.object(
+        azure_text_moderation_guardrail, "async_make_request"
+    ) as mock_async_make_request:
+        mock_async_make_request.side_effect = [
+            {
+                "blocklistsMatch": [],
+                "categoriesAnalysis": [{"category": "Hate", "severity": 0}],
+            },
+            HTTPException(
+                status_code=400,
+                detail={"error": "blocked second choice"},
+            ),
+        ]
+
+        with pytest.raises(HTTPException):
+            await azure_text_moderation_guardrail.async_post_call_success_hook(
+                data={},
+                user_api_key_dict=UserAPIKeyAuth(
+                    api_key="azure_text_moderation_api_key"
+                ),
+                response=ModelResponse(
+                    choices=[
+                        Choices(
+                            index=0,
+                            message=Message(content="safe response"),
+                        ),
+                        Choices(
+                            index=1,
+                            message=Message(content="unsafe response"),
+                        ),
+                    ]
+                ),
+            )
+
+        assert [
+            call.kwargs["text"] for call in mock_async_make_request.call_args_list
+        ] == ["safe response", "unsafe response"]
+
+
@pytest.mark.asyncio
 async def test_azure_text_moderation_guardrail_post_call_streaming_hook():

--- a/tests/test_litellm/proxy/guardrails/guardrail_hooks/content_filter/test_content_filter.py
+++ b/tests/test_litellm/proxy/guardrails/guardrail_hooks/content_filter/test_content_filter.py
@ -453,6 +453,71 @@ class TestContentFilterGuardrail:
        assert "[EMAIL_REDACTED]" in full_content
        assert "Contact me at [EMAIL_REDACTED] for info" in full_content

+    @pytest.mark.asyncio
+    async def test_streaming_hook_mask_checks_all_choices(self):
+        from litellm.types.utils import Delta, ModelResponseStream, StreamingChoices
+
+        patterns = [
+            ContentFilterPattern(
+                pattern_type="prebuilt",
+                pattern_name="email",
+                action=ContentFilterAction.MASK,
+            ),
+        ]
+
+        guardrail = ContentFilterGuardrail(
+            guardrail_name="test-streaming-mask-all-choices",
+            patterns=patterns,
+            event_hook=GuardrailEventHooks.during_call,
+        )
+
+        async def mock_stream():
+            yield ModelResponseStream(
+                id="chunk1",
+                choices=[
+                    StreamingChoices(
+                        delta=Delta(content="Contact first@ex"),
+                        index=0,
+                    ),
+                    StreamingChoices(
+                        delta=Delta(content="Email second@ex"),
+                        index=1,
+                    ),
+                ],
+                model="gpt-4",
+            )
+            yield ModelResponseStream(
+                id="chunk2",
+                choices=[
+                    StreamingChoices(
+                        delta=Delta(content="ample.com for help"),
+                        index=0,
+                        finish_reason="stop",
+                    ),
+                    StreamingChoices(
+                        delta=Delta(content="ample.com for support"),
+                        index=1,
+                        finish_reason="stop",
+                    ),
+                ],
+                model="gpt-4",
+            )
+
+        content_by_choice = {0: "", 1: ""}
+        async for chunk in guardrail.async_post_call_streaming_iterator_hook(
+            user_api_key_dict=MagicMock(),
+            response=mock_stream(),
+            request_data={},
+        ):
+            for choice in chunk.choices:
+                if choice.delta.content:
+                    content_by_choice[choice.index] += choice.delta.content
+
+        assert "first@example.com" not in content_by_choice[0]
+        assert "second@example.com" not in content_by_choice[1]
+        assert content_by_choice[0] == "Contact [EMAIL_REDACTED] for help"
+        assert content_by_choice[1] == "Email [EMAIL_REDACTED] for support"
+
    @pytest.mark.asyncio
    async def test_streaming_hook_block(self):
        """
--- a/tests/test_litellm/proxy/guardrails/guardrail_hooks/test_xecguard.py
+++ b/tests/test_litellm/proxy/guardrails/guardrail_hooks/test_xecguard.py
@ -6,7 +6,6 @@ branch coverage. Network calls are always mocked; the companion live
 suite lives in ``test_xecguard_live.py``.
 """

-import asyncio
 import os
 from unittest.mock import MagicMock, patch

@ -1196,6 +1195,26 @@ class TestXecGuardMessageAssembly:
            is None
        )

+    def test_extract_assistant_text_combines_all_choices(self, xecguard_guardrail):
+        assert (
+            xecguard_guardrail._extract_assistant_text_from_response(
+                {
+                    "choices": [
+                        {"message": {"content": "first response"}},
+                        {
+                            "message": {
+                                "content": [
+                                    {"type": "text", "text": "second"},
+                                    {"type": "text", "text": "response"},
+                                ]
+                            }
+                        },
+                    ]
+                }
+            )
+            == "first response\nsecond\nresponse"
+        )
+
    def test_synthesize_user_inputs_not_dict(self, xecguard_guardrail):
        assert xecguard_guardrail._synthesize_user_from_inputs("not-dict") is None

--- a/tests/test_litellm/proxy/guardrails/test_content_utils.py
+++ b/tests/test_litellm/proxy/guardrails/test_content_utils.py
@ -0,0 +1,303 @@
+"""Tests for the shared guardrail content extraction helpers."""
+
+from litellm.proxy.guardrails._content_utils import (
+    apply_redacted_messages_back,
+    build_inspection_messages,
+    has_non_string_content,
+    iter_message_text,
+    walk_user_text,
+)
+
+
+# ── iter_message_text ────────────────────────────────────────────────────────────
+
+
+def test_iter_message_text_string_messages():
+    data = {
+        "messages": [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi"},
+        ]
+    }
+    assert list(iter_message_text(data)) == ["hello", "hi"]
+
+
+def test_iter_message_text_multimodal_list_content():
+    """VERIA-11: list-format content must be inspected, not silently skipped."""
+    data = {
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "AWS_KEY=AKIA..."},
+                    {"type": "image_url", "image_url": {"url": "..."}},
+                    {"type": "text", "text": "more secrets"},
+                ],
+            }
+        ]
+    }
+    assert list(iter_message_text(data)) == ["AWS_KEY=AKIA...", "more secrets"]
+
+
+def test_iter_message_text_responses_api_string_input():
+    """fniVO9-F: Responses-API ``input`` must be inspectable when ``messages`` absent."""
+    data = {"input": "tell me a secret"}
+    assert list(iter_message_text(data)) == ["tell me a secret"]
+
+
+def test_iter_message_text_responses_api_list_input_messages():
+    data = {
+        "input": [
+            {"role": "user", "content": "first"},
+            {"role": "user", "content": "second"},
+        ]
+    }
+    assert list(iter_message_text(data)) == ["first", "second"]
+
+
+def test_iter_message_text_responses_api_list_input_content_parts():
+    data = {
+        "input": [
+            {"type": "text", "text": "alpha"},
+            {"type": "image_url", "image_url": {"url": "..."}},
+            {"type": "text", "text": "beta"},
+        ]
+    }
+    assert list(iter_message_text(data)) == ["alpha", "beta"]
+
+
+def test_iter_message_text_responses_api_list_input_mixed_dicts_and_strings():
+    """Greptile P2: mixed-list ``input`` with content-part dicts AND bare
+    strings must yield every text fragment — read helpers used to truncate
+    the bare strings."""
+    data = {
+        "input": [
+            {"type": "text", "text": "from-dict"},
+            "from-bare-string",
+            {"type": "image_url", "image_url": {"url": "..."}},
+            "another-bare-string",
+        ]
+    }
+    assert list(iter_message_text(data)) == [
+        "from-dict",
+        "from-bare-string",
+        "another-bare-string",
+    ]
+
+
+def test_iter_message_text_walks_messages_and_input_independently():
+    """When both are present (rare), every fragment from either field is
+    inspected — a stricter guarantee than "first one wins"."""
+    data = {
+        "messages": [{"role": "user", "content": "msg-content"}],
+        "input": "input-content",
+    }
+    assert list(iter_message_text(data)) == ["msg-content", "input-content"]
+
+
+def test_iter_message_text_empty_data():
+    assert list(iter_message_text({})) == []
+    assert list(iter_message_text({"messages": []})) == []
+    assert list(iter_message_text({"input": ""})) == []
+
+
+# ── walk_user_text ────────────────────────────────────────────────────────────
+
+
+def test_walk_user_text_redacts_string_messages_in_place():
+    data = {
+        "messages": [
+            {"role": "user", "content": "leak: AKIAEXAMPLE"},
+            {"role": "assistant", "content": "ok"},
+        ]
+    }
+    visited = walk_user_text(data, lambda s: s.replace("AKIAEXAMPLE", "[REDACTED]"))
+    assert visited == 2
+    assert data["messages"][0]["content"] == "leak: [REDACTED]"
+    assert data["messages"][1]["content"] == "ok"
+
+
+def test_walk_user_text_redacts_multimodal_text_parts():
+    """VERIA-11: list-content text parts must be mutable for in-place redaction."""
+    data = {
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "AKIAEXAMPLE here"},
+                    {"type": "image_url", "image_url": {"url": "..."}},
+                    {"type": "text", "text": "no secret"},
+                ],
+            }
+        ]
+    }
+    visited = walk_user_text(data, lambda s: s.replace("AKIAEXAMPLE", "[REDACTED]"))
+    assert visited == 2
+    parts = data["messages"][0]["content"]
+    assert parts[0] == {"type": "text", "text": "[REDACTED] here"}
+    # Non-text part must be left untouched.
+    assert parts[1] == {"type": "image_url", "image_url": {"url": "..."}}
+    assert parts[2] == {"type": "text", "text": "no secret"}
+
+
+def test_walk_user_text_redacts_responses_api_string_input():
+    data = {"input": "leak AKIAEXAMPLE"}
+    visited = walk_user_text(data, lambda s: s.replace("AKIAEXAMPLE", "[REDACTED]"))
+    assert visited == 1
+    assert data["input"] == "leak [REDACTED]"
+
+
+def test_walk_user_text_redacts_responses_api_list_input():
+    data = {
+        "input": [
+            {"type": "text", "text": "AKIAEXAMPLE"},
+            {"type": "image_url", "image_url": {"url": "..."}},
+        ]
+    }
+    visited = walk_user_text(data, lambda s: f"[redacted]{s}[/]")
+    assert visited == 1
+    assert data["input"][0] == {"type": "text", "text": "[redacted]AKIAEXAMPLE[/]"}
+    assert data["input"][1] == {"type": "image_url", "image_url": {"url": "..."}}
+
+
+def test_walk_user_text_redacts_mixed_list_input():
+    """Read and write helpers must agree on coverage — bare strings inside
+    a mixed ``input`` list are inspected by both."""
+    data = {
+        "input": [
+            {"type": "text", "text": "secret-one"},
+            "secret-two",
+            {"type": "image_url", "image_url": {"url": "..."}},
+        ]
+    }
+    visited = walk_user_text(data, lambda s: f"<{s}>")
+    assert visited == 2
+    assert data["input"][0] == {"type": "text", "text": "<secret-one>"}
+    assert data["input"][1] == "<secret-two>"
+    assert data["input"][2] == {"type": "image_url", "image_url": {"url": "..."}}
+
+
+# ── build_inspection_messages ─────────────────────────────────────────────────
+
+
+def test_build_inspection_messages_chat_completion_passthrough():
+    data = {
+        "messages": [
+            {"role": "system", "content": "be helpful"},
+            {"role": "user", "content": "hi"},
+        ]
+    }
+    assert build_inspection_messages(data) == [
+        {"role": "system", "content": "be helpful"},
+        {"role": "user", "content": "hi"},
+    ]
+
+
+def test_build_inspection_messages_joins_multimodal_text_parts():
+    data = {
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "first part"},
+                    {"type": "image_url", "image_url": {"url": "..."}},
+                    {"type": "text", "text": "second part"},
+                ],
+            }
+        ]
+    }
+    assert build_inspection_messages(data) == [
+        {"role": "user", "content": "first part\nsecond part"}
+    ]
+
+
+def test_build_inspection_messages_lifts_responses_api_input():
+    """fniVO9-F: ``input`` must be visible to hooks that POST messages to a remote API."""
+    data = {"input": "responses-api content"}
+    assert build_inspection_messages(data) == [
+        {"role": "user", "content": "responses-api content"}
+    ]
+
+
+def test_build_inspection_messages_drops_messages_with_no_text():
+    data = {
+        "messages": [
+            {"role": "user", "content": ""},
+            {
+                "role": "user",
+                "content": [{"type": "image_url", "image_url": {"url": "..."}}],
+            },
+            {"role": "user", "content": "kept"},
+        ]
+    }
+    assert build_inspection_messages(data) == [{"role": "user", "content": "kept"}]
+
+
+def test_build_inspection_messages_empty_data():
+    assert build_inspection_messages({}) == []
+    assert build_inspection_messages({"messages": []}) == []
+    assert build_inspection_messages({"input": ""}) == []
+
+
+# ── has_non_string_content ────────────────────────────────────────────────────
+
+
+def test_has_non_string_content_string_messages():
+    data = {"messages": [{"role": "user", "content": "hello"}]}
+    assert has_non_string_content(data) is False
+
+
+def test_has_non_string_content_multimodal_messages():
+    data = {"messages": [{"role": "user", "content": [{"type": "text", "text": "hi"}]}]}
+    assert has_non_string_content(data) is True
+
+
+def test_has_non_string_content_responses_api_string_input():
+    assert has_non_string_content({"input": "plain string"}) is False
+
+
+def test_has_non_string_content_responses_api_list_input():
+    assert has_non_string_content({"input": ["a", "b"]}) is True
+
+
+def test_has_non_string_content_empty_data():
+    assert has_non_string_content({}) is False
+    assert has_non_string_content({"messages": []}) is False
+    assert has_non_string_content({"input": ""}) is False
+
+
+# ── apply_redacted_messages_back ──────────────────────────────────────────────
+
+
+def test_apply_redacted_messages_back_chat_completion():
+    data = {"messages": [{"role": "user", "content": "secret"}]}
+    apply_redacted_messages_back(data, [{"role": "user", "content": "[REDACTED]"}])
+    assert data["messages"] == [{"role": "user", "content": "[REDACTED]"}]
+    assert "input" not in data
+
+
+def test_apply_redacted_messages_back_responses_api_string_input():
+    """A Responses-API request reads ``data["input"]``; writing only to
+    ``messages`` would let unredacted text reach the LLM."""
+    data = {"input": "secret payload"}
+    apply_redacted_messages_back(data, [{"role": "user", "content": "[REDACTED]"}])
+    assert data["input"] == "[REDACTED]"
+
+
+def test_apply_redacted_messages_back_both_fields():
+    """Defensive: when both fields are present, both are updated."""
+    data = {
+        "messages": [{"role": "user", "content": "old"}],
+        "input": "old",
+    }
+    apply_redacted_messages_back(data, [{"role": "user", "content": "[REDACTED]"}])
+    assert data["messages"] == [{"role": "user", "content": "[REDACTED]"}]
+    assert data["input"] == "[REDACTED]"
+
+
+def test_apply_redacted_messages_back_skips_input_when_not_string():
+    """List ``input`` (multimodal Responses-API) is left alone — the
+    multimodal-degrades-to-block guard runs upstream."""
+    data = {"input": [{"type": "text", "text": "leak"}]}
+    apply_redacted_messages_back(data, [{"role": "user", "content": "[REDACTED]"}])
+    assert data["input"] == [{"type": "text", "text": "leak"}]
--- a/tests/test_litellm/proxy/guardrails/test_guardrail_coverage.py
+++ b/tests/test_litellm/proxy/guardrails/test_guardrail_coverage.py
@ -0,0 +1,811 @@
+"""
+Regression tests for guardrail-coverage gaps.
+
+Each test confirms that a previously-bypassable input shape now triggers
+inspection by the relevant guardrail hook:
+
+- VERIA-11: multimodal list-format ``content`` is inspected (no longer
+  silently skipped because of an ``isinstance(content, str)`` check).
+- fniVO9-F: Responses-API ``data["input"]`` is inspected (no longer
+  silently skipped because the hook only looked at ``data["messages"]``).
+- yVS0wMDO: Aim's post-call hook inspects every choice when ``n>1``,
+  not just ``choices[0]``.
+"""
+
+from typing import Any, Dict
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from httpx import Request, Response
+
+from litellm import DualCache
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.types.utils import Choices, Message, ModelResponse
+
+
+@pytest.fixture
+def user_api_key():
+    return UserAPIKeyAuth(api_key="hashed", user_id="u", key_alias=None)
+
+
+# ── Aim ───────────────────────────────────────────────────────────────────────
+
+
+def _aim_no_action_response() -> Response:
+    return Response(
+        status_code=200,
+        json={"required_action": None},
+        request=Request("POST", "https://api.aim.security/fw/v1/analyze"),
+    )
+
+
+@pytest.mark.asyncio
+async def test_aim_inspects_multimodal_list_content(user_api_key, monkeypatch):
+    monkeypatch.setenv("AIM_API_KEY", "hs-aim-key")
+    from litellm.proxy.guardrails.guardrail_hooks.aim.aim import AimGuardrail
+
+    guard = AimGuardrail()
+    sent_payload: Dict[str, Any] = {}
+
+    async def capture(url, headers, json):
+        sent_payload.update(json)
+        return _aim_no_action_response()
+
+    with patch.object(guard.async_handler, "post", side_effect=capture):
+        await guard.async_pre_call_hook(
+            user_api_key_dict=user_api_key,
+            cache=DualCache(),
+            data={
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "secret payload"},
+                            {"type": "image_url", "image_url": {"url": "..."}},
+                        ],
+                    }
+                ]
+            },
+            call_type="acompletion",
+        )
+
+    # The multimodal text part must be visible to Aim.
+    assert sent_payload["messages"] == [{"role": "user", "content": "secret payload"}]
+
+
+@pytest.mark.asyncio
+async def test_aim_inspects_responses_api_input(user_api_key, monkeypatch):
+    monkeypatch.setenv("AIM_API_KEY", "hs-aim-key")
+    from litellm.proxy.guardrails.guardrail_hooks.aim.aim import AimGuardrail
+
+    guard = AimGuardrail()
+    sent_payload: Dict[str, Any] = {}
+
+    async def capture(url, headers, json):
+        sent_payload.update(json)
+        return _aim_no_action_response()
+
+    with patch.object(guard.async_handler, "post", side_effect=capture):
+        await guard.async_pre_call_hook(
+            user_api_key_dict=user_api_key,
+            cache=DualCache(),
+            data={"input": "responses-api content"},
+            call_type="acompletion",
+        )
+
+    assert sent_payload["messages"] == [
+        {"role": "user", "content": "responses-api content"}
+    ]
+
+
+@pytest.mark.asyncio
+async def test_aim_post_call_inspects_all_choices(user_api_key, monkeypatch):
+    """yVS0wMDO: ``n>1`` no longer bypasses Aim by hiding violations in
+    ``choices[1+]``."""
+    monkeypatch.setenv("AIM_API_KEY", "hs-aim-key")
+    from litellm.proxy.guardrails.guardrail_hooks.aim.aim import AimGuardrail
+
+    guard = AimGuardrail()
+    inspected_outputs = []
+
+    async def capture(request_data, output, hook, key_alias):
+        inspected_outputs.append(output)
+        return {"redacted_output": output}
+
+    response = ModelResponse(
+        choices=[
+            Choices(index=0, message=Message(role="assistant", content="first")),
+            Choices(index=1, message=Message(role="assistant", content="second")),
+            Choices(index=2, message=Message(role="assistant", content="third")),
+        ]
+    )
+
+    with patch.object(guard, "call_aim_guardrail_on_output", side_effect=capture):
+        await guard.async_post_call_success_hook(
+            data={"messages": [{"role": "user", "content": "hi"}]},
+            user_api_key_dict=user_api_key,
+            response=response,
+        )
+
+    # ``asyncio.gather`` is used for parallelism, so order of inspection is
+    # not guaranteed.
+    assert sorted(inspected_outputs) == ["first", "second", "third"]
+
+
+# ── Lakera v2 ─────────────────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_lakera_v2_inspects_responses_api_input(user_api_key, monkeypatch):
+    monkeypatch.setenv("LAKERA_API_KEY", "lk-test")
+    from litellm.proxy.guardrails.guardrail_hooks.lakera_ai_v2 import (
+        LakeraAIGuardrail,
+    )
+
+    guard = LakeraAIGuardrail(api_key="lk-test", on_flagged="monitor")
+
+    seen_messages = []
+
+    async def fake_call_v2_guard(messages, request_data, event_type):
+        seen_messages.append(messages)
+        return {"flagged": False}, {}
+
+    with patch.object(guard, "call_v2_guard", side_effect=fake_call_v2_guard):
+        await guard.async_pre_call_hook(
+            user_api_key_dict=user_api_key,
+            cache=DualCache(),
+            data={"input": "responses-api content"},
+            call_type="responses",
+        )
+
+    assert seen_messages == [[{"role": "user", "content": "responses-api content"}]]
+
+
+@pytest.mark.asyncio
+async def test_lakera_v2_responses_api_input_redacted_writeback(
+    user_api_key, monkeypatch
+):
+    """Greptile P1: when input arrives via Responses-API ``data["input"]``
+    (string) and Lakera flags PII, the redacted content must be written
+    back to ``data["input"]`` — the Responses-API backend reads from
+    ``input``, so writing only to ``messages`` would let unredacted PII
+    reach the LLM."""
+    monkeypatch.setenv("LAKERA_API_KEY", "lk-test")
+    from litellm.proxy.guardrails.guardrail_hooks.lakera_ai_v2 import (
+        LakeraAIGuardrail,
+    )
+
+    guard = LakeraAIGuardrail(api_key="lk-test", on_flagged="block")
+
+    async def fake_call_v2_guard(messages, request_data, event_type):
+        return ({"flagged": True, "payload": []}, {"EMAIL": 1})
+
+    def fake_mask(messages, lakera_response, masked_entity_count):
+        return [{"role": "user", "content": "[REDACTED EMAIL]"}]
+
+    with (
+        patch.object(guard, "call_v2_guard", side_effect=fake_call_v2_guard),
+        patch.object(guard, "_is_only_pii_violation", return_value=True),
+        patch.object(guard, "_mask_pii_in_messages", side_effect=fake_mask),
+    ):
+        data = {"input": "user@example.com leaked"}
+        await guard.async_pre_call_hook(
+            user_api_key_dict=user_api_key,
+            cache=DualCache(),
+            data=data,
+            call_type="responses",
+        )
+
+    assert data["input"] == "[REDACTED EMAIL]"
+
+
+@pytest.mark.asyncio
+async def test_aim_responses_api_input_anonymize_writeback(user_api_key, monkeypatch):
+    """Greptile P1: Aim's anonymize action must redact ``data["input"]``
+    for Responses-API requests, not just ``data["messages"]``."""
+    monkeypatch.setenv("AIM_API_KEY", "hs-aim-key")
+    from litellm.proxy.guardrails.guardrail_hooks.aim.aim import AimGuardrail
+
+    guard = AimGuardrail()
+
+    aim_response_body = {
+        "required_action": {"action_type": "anonymize_action"},
+        "redacted_chat": {
+            "all_redacted_messages": [
+                {"role": "user", "content": "[REDACTED] anonymised"}
+            ]
+        },
+    }
+
+    async def capture(url, headers, json):
+        return Response(
+            status_code=200,
+            json=aim_response_body,
+            request=Request("POST", "https://api.aim.security/fw/v1/analyze"),
+        )
+
+    with patch.object(guard.async_handler, "post", side_effect=capture):
+        data = {"input": "user@example.com leaked"}
+        await guard.async_pre_call_hook(
+            user_api_key_dict=user_api_key,
+            cache=DualCache(),
+            data=data,
+            call_type="responses",
+        )
+
+    assert data["input"] == "[REDACTED] anonymised"
+
+
+@pytest.mark.asyncio
+async def test_lakera_v2_multimodal_pii_degrades_to_block(user_api_key, monkeypatch):
+    """Mask-in-place uses Lakera offsets and cannot preserve image/audio
+    parts of multimodal input. When PII is detected on a multimodal
+    request, the hook must raise the block exception instead of silently
+    flattening ``data["messages"]`` to text-only."""
+    monkeypatch.setenv("LAKERA_API_KEY", "lk-test")
+    from fastapi import HTTPException
+
+    from litellm.proxy.guardrails.guardrail_hooks.lakera_ai_v2 import (
+        LakeraAIGuardrail,
+    )
+
+    guard = LakeraAIGuardrail(api_key="lk-test", on_flagged="block")
+
+    async def fake_call_v2_guard(messages, request_data, event_type):
+        return (
+            {
+                "flagged": True,
+                "payload": [{"detector_type": "pii/email", "start": 0, "end": 5}],
+            },
+            {"EMAIL": 1},
+        )
+
+    with (
+        patch.object(guard, "call_v2_guard", side_effect=fake_call_v2_guard),
+        patch.object(guard, "_is_only_pii_violation", return_value=True),
+        patch.object(
+            guard,
+            "_get_http_exception_for_blocked_guardrail",
+            return_value=HTTPException(status_code=400, detail="blocked"),
+        ),
+    ):
+        with pytest.raises(HTTPException):
+            await guard.async_pre_call_hook(
+                user_api_key_dict=user_api_key,
+                cache=DualCache(),
+                data={
+                    "messages": [
+                        {
+                            "role": "user",
+                            "content": [
+                                {"type": "text", "text": "leak"},
+                                {"type": "image_url", "image_url": {"url": "..."}},
+                            ],
+                        }
+                    ]
+                },
+                call_type="acompletion",
+            )
+
+
+@pytest.mark.asyncio
+async def test_lakera_v2_inspects_multimodal_list_content(user_api_key, monkeypatch):
+    monkeypatch.setenv("LAKERA_API_KEY", "lk-test")
+    from litellm.proxy.guardrails.guardrail_hooks.lakera_ai_v2 import (
+        LakeraAIGuardrail,
+    )
+
+    guard = LakeraAIGuardrail(api_key="lk-test", on_flagged="monitor")
+    seen_messages = []
+
+    async def fake_call_v2_guard(messages, request_data, event_type):
+        seen_messages.append(messages)
+        return {"flagged": False}, {}
+
+    with patch.object(guard, "call_v2_guard", side_effect=fake_call_v2_guard):
+        await guard.async_pre_call_hook(
+            user_api_key_dict=user_api_key,
+            cache=DualCache(),
+            data={
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "AKIAEXAMPLE"},
+                            {"type": "image_url", "image_url": {"url": "..."}},
+                        ],
+                    }
+                ]
+            },
+            call_type="acompletion",
+        )
+
+    assert seen_messages == [[{"role": "user", "content": "AKIAEXAMPLE"}]]
+
+
+# ── Lasso ─────────────────────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_lasso_multimodal_falls_back_to_classify(user_api_key, monkeypatch):
+    """Lasso's classifix (mask) endpoint returns text that overwrites
+    ``data["messages"]``. For multimodal input that would silently strip
+    image parts — the hook must use the classify endpoint instead and
+    leave the original payload intact."""
+    monkeypatch.setenv("LASSO_API_KEY", "ls-test")
+    from litellm.proxy.guardrails.guardrail_hooks.lasso.lasso import LassoGuardrail
+
+    guard = LassoGuardrail(lasso_api_key="ls-test", mask=True)
+
+    masking_called = False
+    classify_called = False
+
+    async def fake_masking(data, cache, message_type, messages):
+        nonlocal masking_called
+        masking_called = True
+        return data
+
+    async def fake_classification(data, cache, message_type, messages):
+        nonlocal classify_called
+        classify_called = True
+        return data
+
+    with (
+        patch.object(guard, "_handle_masking", side_effect=fake_masking),
+        patch.object(guard, "_handle_classification", side_effect=fake_classification),
+    ):
+        await guard._run_lasso_guardrail(
+            data={
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "hello"},
+                            {"type": "image_url", "image_url": {"url": "..."}},
+                        ],
+                    }
+                ]
+            },
+            cache=DualCache(),
+            message_type="PROMPT",
+        )
+
+    assert classify_called is True
+    assert masking_called is False
+
+
+@pytest.mark.asyncio
+async def test_lasso_inspects_responses_api_input(user_api_key, monkeypatch):
+    monkeypatch.setenv("LASSO_API_KEY", "ls-test")
+    from litellm.proxy.guardrails.guardrail_hooks.lasso.lasso import LassoGuardrail
+
+    guard = LassoGuardrail(lasso_api_key="ls-test")
+
+    seen_messages = []
+
+    async def fake_handle_classification(data, cache, message_type, messages):
+        seen_messages.append(messages)
+        return data
+
+    with patch.object(
+        guard, "_handle_classification", side_effect=fake_handle_classification
+    ):
+        await guard._run_lasso_guardrail(
+            data={"input": "responses-api content"},
+            cache=DualCache(),
+            message_type="PROMPT",
+        )
+
+    assert seen_messages == [[{"role": "user", "content": "responses-api content"}]]
+
+
+@pytest.mark.asyncio
+async def test_lasso_masking_writes_back_responses_api_input(user_api_key, monkeypatch):
+    """Krrish blocker: Lasso classifix masking must update ``data["input"]``
+    for Responses-API requests, not only ``data["messages"]``."""
+    monkeypatch.setenv("LASSO_API_KEY", "ls-test")
+    from litellm.proxy.guardrails.guardrail_hooks.lasso.lasso import LassoGuardrail
+
+    guard = LassoGuardrail(lasso_api_key="ls-test", mask=True)
+    lasso_response = {
+        "violations_detected": True,
+        "deputies": {"pii": True},
+        "findings": {"pii": [{"action": "AUTO_MASKING"}]},
+        "messages": [{"role": "user", "content": "[REDACTED]"}],
+    }
+
+    async def fake_call_lasso_api(headers, payload, api_url=None):
+        return lasso_response
+
+    data = {"input": "user@example.com leaked"}
+
+    with patch.object(guard, "_call_lasso_api", side_effect=fake_call_lasso_api):
+        await guard._run_lasso_guardrail(
+            data=data,
+            cache=DualCache(),
+            message_type="PROMPT",
+        )
+
+    assert data["input"] == "[REDACTED]"
+
+
+# ── Banned Keywords ───────────────────────────────────────────────────────────
+
+
+def test_banned_keywords_blocks_multimodal_content(monkeypatch):
+    """VERIA-11: a banned word hidden in a multimodal text part is now caught.
+
+    Uses ``acompletion`` — the value the proxy ingress actually passes
+    for ``/v1/chat/completions``. Asserting against the literal sync
+    ``"completion"`` would pass even if the hook's call-type gate were
+    misaligned with the runtime, so the test wouldn't catch regressions.
+    """
+    monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
+    from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
+    from fastapi import HTTPException
+
+    guard = _ENTERPRISE_BannedKeywords()
+
+    async def _run():
+        await guard.async_pre_call_hook(
+            user_api_key_dict=UserAPIKeyAuth(api_key="hashed", user_id="u"),
+            cache=DualCache(),
+            data={
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "forbidden word here"},
+                            {"type": "image_url", "image_url": {"url": "..."}},
+                        ],
+                    }
+                ]
+            },
+            call_type="acompletion",
+        )
+
+    import asyncio
+
+    with pytest.raises(HTTPException) as exc:
+        asyncio.run(_run())
+    assert "forbidden" in str(exc.value.detail).lower()
+
+
+def test_banned_keywords_blocks_responses_api_input(monkeypatch):
+    monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
+    from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
+    from fastapi import HTTPException
+
+    guard = _ENTERPRISE_BannedKeywords()
+
+    async def _run():
+        await guard.async_pre_call_hook(
+            user_api_key_dict=UserAPIKeyAuth(api_key="hashed", user_id="u"),
+            cache=DualCache(),
+            data={"input": "this contains forbidden content"},
+            call_type="aresponses",
+        )
+
+    import asyncio
+
+    with pytest.raises(HTTPException):
+        asyncio.run(_run())
+
+
+@pytest.mark.parametrize("call_type", ["completion", "acompletion", "aresponses"])
+def test_banned_keywords_fires_on_text_content_call_types(monkeypatch, call_type):
+    """Locks the call-type gate to the runtime ``route_type`` values the
+    proxy actually emits — pinning a regression where the hook had
+    ``call_type == "completion"`` and silently no-op'd both
+    ``acompletion`` (chat completions) and ``aresponses`` (Responses API).
+    """
+    monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
+    from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
+    from fastapi import HTTPException
+
+    guard = _ENTERPRISE_BannedKeywords()
+
+    import asyncio
+
+    with pytest.raises(HTTPException):
+        asyncio.run(
+            guard.async_pre_call_hook(
+                user_api_key_dict=UserAPIKeyAuth(api_key="hashed", user_id="u"),
+                cache=DualCache(),
+                data={
+                    "messages": [{"role": "user", "content": "forbidden text"}],
+                    "input": "forbidden text",
+                },
+                call_type=call_type,
+            )
+        )
+
+
+def test_banned_keywords_skips_non_text_call_types(monkeypatch):
+    """Embedding / moderation / audio paths don't carry chat text and
+    aren't in the text-guardrail scope. They must not trigger the hook
+    even when the request body otherwise looks like a chat payload.
+    """
+    monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
+    from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
+
+    guard = _ENTERPRISE_BannedKeywords()
+
+    import asyncio
+
+    for call_type in ("aembedding", "amoderation", "aspeech", "atranscription"):
+        # Should return without raising, even though the data carries the banned word.
+        asyncio.run(
+            guard.async_pre_call_hook(
+                user_api_key_dict=UserAPIKeyAuth(api_key="hashed", user_id="u"),
+                cache=DualCache(),
+                data={"input": "forbidden text"},
+                call_type=call_type,
+            )
+        )
+
+
+@pytest.mark.asyncio
+async def test_banned_keywords_post_call_checks_all_choices(monkeypatch, user_api_key):
+    """Krrish blocker: ``n>1`` responses must not bypass post-call checks by
+    placing the banned text in ``choices[1+]``."""
+    monkeypatch.setattr("litellm.banned_keywords_list", ["forbidden"], raising=False)
+    from enterprise.enterprise_hooks.banned_keywords import _ENTERPRISE_BannedKeywords
+    from fastapi import HTTPException
+
+    guard = _ENTERPRISE_BannedKeywords()
+    response = ModelResponse(
+        choices=[
+            Choices(index=0, message=Message(role="assistant", content="clean")),
+            Choices(index=1, message=Message(role="assistant", content="forbidden")),
+        ]
+    )
+
+    with pytest.raises(HTTPException) as exc:
+        await guard.async_post_call_success_hook(
+            data={},
+            user_api_key_dict=user_api_key,
+            response=response,
+        )
+
+    assert "forbidden" in str(exc.value.detail).lower()
+
+
+# ── Azure Content Safety ──────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "call_type, data",
+    [
+        (
+            "acompletion",
+            {
+                "messages": [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": "scan me"},
+                            {"type": "image_url", "image_url": {"url": "..."}},
+                        ],
+                    }
+                ]
+            },
+        ),
+        ("aresponses", {"input": "scan me"}),
+    ],
+)
+async def test_azure_content_safety_pre_call_fires_on_runtime_call_types(
+    user_api_key, call_type, data
+):
+    """The proxy ingress passes ``route_type`` straight through as
+    ``call_type`` — ``acompletion`` for chat completions and
+    ``aresponses`` for the Responses API. The hook must inspect text
+    fragments under both, not only the literal ``"completion"`` string
+    used by some SDK callers."""
+    from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
+
+    guard = _PROXY_AzureContentSafety.__new__(_PROXY_AzureContentSafety)
+    seen = []
+
+    async def fake_test_violation(content, source=None):
+        seen.append((content, source))
+
+    guard.test_violation = fake_test_violation
+    await guard.async_pre_call_hook(
+        user_api_key_dict=user_api_key,
+        cache=DualCache(),
+        data=data,
+        call_type=call_type,
+    )
+    assert ("scan me", "input") in seen
+
+
+@pytest.mark.asyncio
+async def test_azure_content_safety_post_call_checks_all_choices(user_api_key):
+    """Krrish blocker: ``n>1`` responses must not bypass Azure Content Safety
+    by placing the unsafe text in ``choices[1+]``."""
+    from fastapi import HTTPException
+    from litellm.proxy.hooks.azure_content_safety import _PROXY_AzureContentSafety
+
+    guard = _PROXY_AzureContentSafety.__new__(_PROXY_AzureContentSafety)
+    seen_outputs = []
+
+    async def fake_test_violation(content, source=None):
+        seen_outputs.append((content, source))
+        if "unsafe" in content:
+            raise HTTPException(status_code=400, detail={"error": "unsafe"})
+
+    guard.test_violation = fake_test_violation
+    response = ModelResponse(
+        choices=[
+            Choices(index=0, message=Message(role="assistant", content="clean")),
+            Choices(index=1, message=Message(role="assistant", content="unsafe")),
+            Choices(index=2, message=Message(role="assistant", content="later")),
+        ]
+    )
+
+    with pytest.raises(HTTPException):
+        await guard.async_post_call_success_hook(
+            data={},
+            user_api_key_dict=user_api_key,
+            response=response,
+        )
+
+    assert seen_outputs == [("clean", "output"), ("unsafe", "output")]
+
+
+# ── Secret Detection ──────────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_secret_detection_redacts_multimodal_text_parts(user_api_key):
+    from enterprise.litellm_enterprise.enterprise_callbacks.secret_detection import (
+        _ENTERPRISE_SecretDetection,
+    )
+
+    guard = _ENTERPRISE_SecretDetection()
+    data = {
+        "messages": [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "AKIAIOSFODNN7EXAMPLE is the key",
+                    },
+                    {"type": "image_url", "image_url": {"url": "..."}},
+                ],
+            }
+        ]
+    }
+
+    await guard.async_pre_call_hook(
+        user_api_key_dict=user_api_key,
+        cache=DualCache(),
+        data=data,
+        call_type="completion",
+    )
+
+    parts = data["messages"][0]["content"]
+    assert "AKIAIOSFODNN7EXAMPLE" not in parts[0]["text"]
+    assert "[REDACTED]" in parts[0]["text"]
+    # Non-text part is preserved untouched.
+    assert parts[1] == {"type": "image_url", "image_url": {"url": "..."}}
+
+
+@pytest.mark.asyncio
+async def test_secret_detection_redacts_responses_api_input(user_api_key):
+    from enterprise.litellm_enterprise.enterprise_callbacks.secret_detection import (
+        _ENTERPRISE_SecretDetection,
+    )
+
+    guard = _ENTERPRISE_SecretDetection()
+    data = {"input": "leak: AKIAIOSFODNN7EXAMPLE"}
+
+    await guard.async_pre_call_hook(
+        user_api_key_dict=user_api_key,
+        cache=DualCache(),
+        data=data,
+        call_type="moderation",
+    )
+
+    assert "AKIAIOSFODNN7EXAMPLE" not in data["input"]
+    assert "[REDACTED]" in data["input"]
+
+
+# ── OpenAI Moderation ─────────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_openai_moderation_inspects_multimodal_content(monkeypatch, user_api_key):
+    """The aggregated text passed to ``llm_router.amoderation`` must include
+    list-format text parts and Responses-API input — without this, multimodal
+    content silently passed moderation."""
+    from enterprise.enterprise_hooks.openai_moderation import (
+        _ENTERPRISE_OpenAI_Moderation,
+    )
+
+    guard = _ENTERPRISE_OpenAI_Moderation()
+
+    seen_inputs = []
+
+    class FakeModeration:
+        results = [type("R", (), {"flagged": False})()]
+
+    async def fake_amoderation(model, input):
+        seen_inputs.append(input)
+        return FakeModeration()
+
+    fake_router = MagicMock()
+    fake_router.amoderation = AsyncMock(side_effect=fake_amoderation)
+
+    monkeypatch.setattr(
+        "litellm.proxy.proxy_server.llm_router", fake_router, raising=False
+    )
+
+    await guard.async_moderation_hook(
+        data={
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "alpha "},
+                        {"type": "image_url", "image_url": {"url": "..."}},
+                        {"type": "text", "text": "beta"},
+                    ],
+                }
+            ]
+        },
+        user_api_key_dict=user_api_key,
+        call_type="acompletion",
+    )
+
+    assert seen_inputs == ["alpha beta"]
+
+
+# ── Google Text Moderation ────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_google_text_moderation_inspects_multimodal_content(user_api_key):
+    """The text passed to Google's moderation client must include list-format
+    text parts."""
+    from enterprise.enterprise_hooks.google_text_moderation import (
+        _ENTERPRISE_GoogleTextModeration,
+    )
+
+    guard = _ENTERPRISE_GoogleTextModeration.__new__(_ENTERPRISE_GoogleTextModeration)
+    seen_documents = []
+
+    def fake_language_document(content, type_):
+        seen_documents.append(content)
+        return MagicMock()
+
+    fake_response = MagicMock()
+    fake_response.moderation_categories = []
+
+    guard.language_document = fake_language_document
+    guard.moderate_text_request = MagicMock(return_value=MagicMock())
+    guard.document_type = MagicMock()
+    guard.client = MagicMock()
+    guard.client.moderate_text = MagicMock(return_value=fake_response)
+
+    await guard.async_moderation_hook(
+        data={
+            "messages": [
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "hello "},
+                        {"type": "image_url", "image_url": {"url": "..."}},
+                        {"type": "text", "text": "world"},
+                    ],
+                }
+            ]
+        },
+        user_api_key_dict=user_api_key,
+        call_type="acompletion",
+    )
+
+    assert seen_documents == ["hello world"]
--- a/tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py
+++ b/tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py
@ -11,10 +11,14 @@ sys.path.insert(

 import httpx
 import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
 from prisma.errors import ClientNotConnectedError, HTTPClientClosedError, PrismaError

 import litellm.proxy.health_endpoints._health_endpoints as _health_endpoints_module

+from litellm.proxy._types import LitellmUserRoles, UserAPIKeyAuth
+from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
 from litellm.proxy.health_endpoints._health_endpoints import (
    _db_health_readiness_check,
    get_callback_identifier,
@ -512,7 +516,7 @@ def proxy_client(monkeypatch):

    Redis cache:
    - If REDIS_HOST is set in environment, Redis cache will be automatically configured
-    - Cache configuration is included in /health/readiness endpoint response
+    - Cache diagnostics are included in the authenticated /health/readiness/details response
    """
    client = create_proxy_test_client(monkeypatch)
    with client:
@ -588,11 +592,7 @@ def test_health_liveness_endpoint(proxy_client):
 def test_health_readiness(proxy_client):
    """
    Test /health/readiness endpoint.
-    Database and Redis are optional - the endpoint should work whether they're available or not.
-
-    If DATABASE_URL is set, the endpoint will check database connectivity.
-    If REDIS_HOST is set, the endpoint will report cache status.
-    If neither is set, the endpoint should still return a valid health status.
+    Database and Redis are optional - the public endpoint should work whether they're available or not.
    """
    # Measure the time taken for the health check call
    start_time = time.perf_counter()
@ -614,40 +614,57 @@ def test_health_readiness(proxy_client):
        duration_ms < 500
    ), f"Health check took {duration_ms:.2f}ms, expected < 500ms for readiness endpoint"

-    # Assert response contains expected fields
+    # Assert response contains only low-detail public probe fields
    response_data = response.json()
-    assert "status" in response_data, "Response should contain 'status' field"
-    assert (
-        "litellm_version" in response_data
-    ), "Response should contain 'litellm_version' field"
-
-    # Display all health endpoint response fields (matches what /health/readiness returns)
-    print("\n" + "-" * 60)
-    print("HEALTH ENDPOINT RESPONSE")
-    print("-" * 60)
-    print(f"Status: {response_data.get('status', 'unknown')}")
-    print(f"Database: {response_data.get('db', 'not reported')}")
-    print(f"LiteLLM Version: {response_data.get('litellm_version', 'unknown')}")
-    print(f"Success Callbacks: {response_data.get('success_callbacks', [])}")
-    print(f"Cache: {response_data.get('cache', 'none')}")
-    print(
-        f"Use AioHTTP Transport: {response_data.get('use_aiohttp_transport', 'unknown')}"
-    )
+    assert response_data == {"status": "healthy"}
    print(f"Response time: {duration_ms:.2f}ms")

-    # If database status is reported, verify it's a valid status
-    # Database may be "connected", "disconnected", "unknown", or "Not connected" (when prisma_client is None)
-    if "db" in response_data:
-        db_status = response_data["db"]
-        # Database status can be any of these valid states
-        assert db_status in [
-            "connected",
-            "disconnected",
-            "unknown",
-            "Not connected",
-        ], f"Unexpected db status: {db_status}"

-    print("=" * 60 + "\n")
+def test_health_readiness_details_returns_diagnostic_fields(monkeypatch):
+    """
+    Detailed readiness diagnostics stay available behind the auth dependency.
+    """
+    app = FastAPI()
+    app.include_router(_health_endpoints_module.router)
+    app.dependency_overrides[user_api_key_auth] = lambda: UserAPIKeyAuth(
+        user_role=LitellmUserRoles.PROXY_ADMIN
+    )
+    client = TestClient(app)
+
+    monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", None)
+
+    response = client.get("/health/readiness/details")
+
+    assert response.status_code == 200, response.text
+    response_data = response.json()
+    assert response_data["status"] == "healthy"
+    assert "litellm_version" in response_data
+    assert "success_callbacks" in response_data
+    assert "cache" in response_data
+
+
+def test_health_readiness_allows_explicit_legacy_public_details(monkeypatch):
+    """
+    Operators can explicitly preserve the legacy public readiness payload.
+    """
+    app = FastAPI()
+    app.include_router(_health_endpoints_module.router)
+    client = TestClient(app)
+
+    monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", None)
+    monkeypatch.setattr(
+        "litellm.proxy.proxy_server.general_settings",
+        {"allow_public_health_readiness_details": True},
+    )
+
+    response = client.get("/health/readiness")
+
+    assert response.status_code == 200, response.text
+    response_data = response.json()
+    assert response_data["status"] == "healthy"
+    assert "litellm_version" in response_data
+    assert "success_callbacks" in response_data
+    assert "cache" in response_data


 def test_get_callback_identifier_string_and_object_with_callback_name():
@ -1503,8 +1520,7 @@ async def test_health_readiness_returns_503_when_db_disconnected():
        result = await health_readiness(response=response)

    assert response.status_code == 503
-    assert result["db"] == "disconnected"
-    assert result["status"] == "healthy"  # body shape unchanged for back-compat
+    assert result == {"status": "healthy"}


@pytest.mark.asyncio
@ -1527,7 +1543,7 @@ async def test_health_readiness_returns_200_when_db_connected():
        result = await health_readiness(response=response)

    assert response.status_code == 200
-    assert result["db"] == "connected"
+    assert result == {"status": "healthy"}


@pytest.mark.asyncio
@ -1546,7 +1562,7 @@ async def test_health_readiness_returns_200_when_no_db_configured():
        result = await health_readiness(response=response)

    assert response.status_code == 200
-    assert result["db"] == "Not connected"
+    assert result == {"status": "healthy"}


 def test_clean_endpoint_data_strips_credentials_keeps_routing_fields():
--- a/tests/test_litellm/proxy/management_endpoints/test_router_settings_endpoints.py
+++ b/tests/test_litellm/proxy/management_endpoints/test_router_settings_endpoints.py
@ -13,7 +13,13 @@ from fastapi.testclient import TestClient

 sys.path.insert(0, os.path.abspath("../../../.."))

+from litellm.proxy import proxy_server
+from litellm.proxy._types import LitellmUserRoles, UserAPIKeyAuth
+from litellm.proxy.management_endpoints.router_settings_endpoints import (
+    get_router_settings,
+)
 from litellm.proxy.proxy_server import app
+from litellm.router import Router

 client = TestClient(app)

@ -71,3 +77,48 @@ class TestRouterSettingsEndpoints:
        assert "options" in routing_strategy_field
        assert isinstance(routing_strategy_field["options"], list)
        assert len(routing_strategy_field["options"]) > 0
+
+    @pytest.mark.asyncio
+    async def test_get_router_settings_includes_routing_groups_from_live_router(
+        self, monkeypatch
+    ):
+        """GET /router/settings returns routing_groups from the live router."""
+        groups = [
+            {
+                "group_name": "test-group",
+                "models": ["latency-model"],
+                "routing_strategy": "latency-based-routing",
+                "routing_strategy_args": {},
+            }
+        ]
+        llm_router = Router(
+            model_list=[
+                {
+                    "model_name": "latency-model",
+                    "litellm_params": {
+                        "model": "openai/gpt-4o",
+                        "api_key": "sk-x",
+                    },
+                }
+            ],
+            routing_groups=groups,
+        )
+
+        monkeypatch.setattr(proxy_server, "llm_router", llm_router)
+
+        async def fake_get_config(self, config_file_path=None):
+            return {}
+
+        monkeypatch.setattr(
+            proxy_server.ProxyConfig, "get_config", fake_get_config, raising=True
+        )
+
+        admin_user = UserAPIKeyAuth(
+            user_role=LitellmUserRoles.PROXY_ADMIN, api_key="sk-x"
+        )
+        response = await get_router_settings(user_api_key_dict=admin_user)
+
+        assert response.current_values.get("routing_groups") == groups
+
+        rg_field = next(f for f in response.fields if f.field_name == "routing_groups")
+        assert rg_field.field_value == groups
--- a/tests/test_litellm/proxy/middleware/test_prometheus_auth_middleware.py
+++ b/tests/test_litellm/proxy/middleware/test_prometheus_auth_middleware.py
@ -1,18 +1,5 @@
-import json
-import os
-import sys
-
-import pytest
-from fastapi.testclient import TestClient
-
-sys.path.insert(
-    0, os.path.abspath("../../..")
-)  # Adds the parent directory to the system path
-
-
 import pytest
 from fastapi import FastAPI
-from fastapi.responses import JSONResponse
 from fastapi.testclient import TestClient

 import litellm
@ -21,7 +8,7 @@ from litellm.proxy.middleware.prometheus_auth_middleware import PrometheusAuthMi


 # Fake auth functions to simulate valid and invalid auth behavior.
-async def fake_valid_auth(request, api_key):
+async def fake_valid_auth(request, api_key, **kwargs):
    # Simulate valid authentication: do nothing (i.e. pass)
    return

@ -35,15 +22,11 @@ async def fake_valid_auth_reads_body(request, api_key, **kwargs):
    return


-async def fake_invalid_auth(request, api_key):
-    print("running fake invalid auth", request, api_key)
+async def fake_invalid_auth(request, api_key, **kwargs):
    # Simulate invalid auth by raising an exception.
    raise Exception("Invalid API key")


-from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
-
-
@pytest.fixture
 def app_with_middleware():
    """Create a FastAPI app with the PrometheusAuthMiddleware and dummy endpoints."""
@ -98,7 +81,7 @@ def test_valid_auth_metrics(app_with_middleware, monkeypatch):
    Test that a request to /metrics (and /metrics/) with valid auth headers passes.
    """
    # Enable auth on metrics endpoints.
-    litellm.require_auth_for_metrics_endpoint = True
+    monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
    # Patch the auth function to simulate a valid authentication.
    monkeypatch.setattr(
        "litellm.proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
@ -123,7 +106,7 @@ def test_invalid_auth_metrics(app_with_middleware, monkeypatch):
    """
    Test that a request to /metrics with invalid auth headers fails with a 401.
    """
-    litellm.require_auth_for_metrics_endpoint = True
+    monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
    # Patch the auth function to simulate a failed authentication.
    monkeypatch.setattr(
        "litellm.proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
@ -138,12 +121,48 @@ def test_invalid_auth_metrics(app_with_middleware, monkeypatch):
    assert "Unauthorized access to metrics endpoint" in response.text


+def test_metrics_auth_uses_real_auth_when_route_is_public(
+    app_with_middleware, monkeypatch
+):
+    """
+    Regression: /metrics is statically public, but require_auth_for_metrics_endpoint
+    must still force the real auth path.
+    """
+    monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)
+    monkeypatch.setattr("litellm.proxy.proxy_server.master_key", "sk-master")
+    monkeypatch.setattr("litellm.proxy.proxy_server.general_settings", {})
+
+    client = TestClient(app_with_middleware)
+
+    response = client.get("/metrics")
+
+    assert response.status_code == 401, response.text
+    assert "Unauthorized access to metrics endpoint" in response.text
+
+
+def test_metrics_auth_is_required_by_default(app_with_middleware, monkeypatch):
+    """
+    Metrics should require auth unless explicitly configured as public.
+    """
+    monkeypatch.setattr(
+        "litellm.proxy.middleware.prometheus_auth_middleware.user_api_key_auth",
+        fake_invalid_auth,
+    )
+
+    client = TestClient(app_with_middleware)
+
+    response = client.get("/metrics")
+
+    assert response.status_code == 401, response.text
+    assert "Unauthorized access to metrics endpoint" in response.text
+
+
 def test_no_auth_metrics_when_disabled(app_with_middleware, monkeypatch):
    """
    Test that when require_auth_for_metrics_endpoint is False, requests to /metrics
    bypass the auth check.
    """
-    litellm.require_auth_for_metrics_endpoint = False
+    monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", False)

    # To ensure auth is not run, patch the auth function with one that will raise if called.
    def should_not_be_called(*args, **kwargs):
@ -160,11 +179,11 @@ def test_no_auth_metrics_when_disabled(app_with_middleware, monkeypatch):
    assert response.json() == {"msg": "metrics OK"}


-def test_non_metrics_requests_pass_through(app_with_middleware):
+def test_non_metrics_requests_pass_through(app_with_middleware, monkeypatch):
    """
    Test that non-metrics endpoints pass through the middleware unaffected.
    """
-    litellm.require_auth_for_metrics_endpoint = True
+    monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)

    client = TestClient(app_with_middleware)

@ -182,7 +201,7 @@ def test_non_metrics_requests_dont_trigger_auth(app_with_middleware, monkeypatch
    Test that non-metrics requests never trigger auth, even when auth is enabled
    and the auth function would reject the request.
    """
-    litellm.require_auth_for_metrics_endpoint = True
+    monkeypatch.setattr(litellm, "require_auth_for_metrics_endpoint", True)

    def should_not_be_called(*args, **kwargs):
        raise Exception("Auth should not be called for non-metrics requests")
--- a/tests/test_litellm/proxy/public_endpoints/test_public_endpoints.py
+++ b/tests/test_litellm/proxy/public_endpoints/test_public_endpoints.py
@ -91,6 +91,19 @@ def test_get_litellm_model_cost_map_returns_cost_map():
    )


+def test_public_ai_hub_info_is_public_by_default(monkeypatch):
+    app = FastAPI()
+    app.include_router(router)
+    client = TestClient(app)
+
+    monkeypatch.setattr("litellm.proxy.proxy_server.general_settings", {})
+    monkeypatch.setattr("litellm.proxy.proxy_server.master_key", "sk-master")
+
+    response = client.get("/public/model_hub/info")
+
+    assert response.status_code == 200, response.text
+
+
 def test_watsonx_provider_fields():
    """Test that Watsonx provider has all required credential fields including multiple auth options."""
    app = FastAPI()
@ -166,9 +179,9 @@ def test_anthropic_provider_fields_support_byok():
        "Anthropic api_key must be optional so admins can configure BYOK models "
        "without entering a key. See BYOK tutorial."
    )
-    assert fields_by_key["api_key"].get("tooltip"), (
-        "Anthropic api_key must have a tooltip explaining the BYOK use case."
-    )
+    assert fields_by_key["api_key"].get(
+        "tooltip"
+    ), "Anthropic api_key must have a tooltip explaining the BYOK use case."
    assert "api_base" in fields_by_key, (
        "Anthropic provider form must expose api_base so cloud customers "
        "can override the upstream URL without env var access."
@ -176,16 +189,16 @@ def test_anthropic_provider_fields_support_byok():
    api_base_field = fields_by_key["api_base"]
    assert api_base_field["required"] is False
    assert api_base_field["field_type"] == "text"
-    assert api_base_field.get("tooltip"), (
-        "api_base should have a tooltip explaining it is optional."
-    )
+    assert api_base_field.get(
+        "tooltip"
+    ), "api_base should have a tooltip explaining it is optional."

    # UI forms render fields in credential_fields order; api_base should come first
    # so an admin sees the URL override before the key field.
    field_order = [f["key"] for f in anthropic["credential_fields"]]
-    assert field_order.index("api_base") < field_order.index("api_key"), (
-        "api_base must appear before api_key in credential_fields (matches AI21 and ANTHROPIC_TEXT convention)."
-    )
+    assert field_order.index("api_base") < field_order.index(
+        "api_key"
+    ), "api_base must appear before api_key in credential_fields (matches AI21 and ANTHROPIC_TEXT convention)."


 def test_public_model_hub_with_healthy_model():
--- a/tests/test_litellm/proxy/test_pricing_field_strip.py
+++ b/tests/test_litellm/proxy/test_pricing_field_strip.py
@ -0,0 +1,312 @@
+"""Proxy strips client-supplied pricing parameters from request bodies.
+
+`litellm.completion` accepts pricing fields (`input_cost_per_token`,
+`output_cost_per_token`, the rest of `CustomPricingLiteLLMParams`,
+`metadata.model_info`) as part of its kwarg surface. On direct SDK use that
+is intentional. On the proxy, those same fields would let any caller rewrite
+their own per-request cost and — via `litellm.register_model` — mutate
+`litellm.model_cost` for every subsequent caller in the worker. The proxy
+strips them at the boundary; an opt-in key/team flag preserves the override
+for operators who actually want it.
+"""
+
+import os
+import sys
+from unittest.mock import MagicMock
+
+import pytest
+from fastapi import Request
+
+import litellm
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.proxy.litellm_pre_call_utils import (
+    _CLIENT_PRICING_CONTROL_FIELDS,
+    _CLIENT_PRICING_METADATA_FIELDS,
+    _strip_client_pricing_overrides,
+    add_litellm_data_to_request,
+)
+from litellm.types.utils import CustomPricingLiteLLMParams
+
+sys.path.insert(0, os.path.abspath("../../.."))
+
+
+def _make_request_mock() -> Request:
+    request_mock = MagicMock(spec=Request)
+    request_mock.url.path = "/v1/chat/completions"
+    request_mock.url = MagicMock()
+    request_mock.url.__str__.return_value = "http://localhost/v1/chat/completions"
+    request_mock.method = "POST"
+    request_mock.query_params = {}
+    request_mock.headers = {"Content-Type": "application/json"}
+    request_mock.client = MagicMock()
+    request_mock.client.host = "127.0.0.1"
+    return request_mock
+
+
+def _user_api_key_auth(metadata=None, team_metadata=None) -> UserAPIKeyAuth:
+    return UserAPIKeyAuth(
+        api_key="hashed-key",
+        metadata=metadata or {},
+        team_metadata=team_metadata or {},
+        spend=0.0,
+        max_budget=100.0,
+        model_max_budget={},
+        team_spend=0.0,
+        team_max_budget=200.0,
+    )
+
+
+class TestStripClientPricingOverrides:
+    def test_pricing_field_set_tracks_pydantic_model(self):
+        # The strip set is built from the model so additions are picked up
+        # automatically — this test guards against the model and the strip
+        # set drifting apart if someone replaces the auto-derivation later.
+        assert _CLIENT_PRICING_CONTROL_FIELDS == frozenset(
+            CustomPricingLiteLLMParams.model_fields.keys()
+        )
+        # Sanity: the obvious top-level pricing fields are in the set.
+        for field in (
+            "input_cost_per_token",
+            "output_cost_per_token",
+            "input_cost_per_second",
+            "cache_creation_input_token_cost",
+        ):
+            assert field in _CLIENT_PRICING_CONTROL_FIELDS
+
+    def test_root_pricing_fields_dropped(self):
+        data = {
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": "hi"}],
+            "input_cost_per_token": 0.0,
+            "output_cost_per_token": 0.0,
+            "cache_creation_input_token_cost": 0.0,
+        }
+        _strip_client_pricing_overrides(data)
+        assert data == {
+            "model": "gpt-4",
+            "messages": [{"role": "user", "content": "hi"}],
+        }
+
+    def test_metadata_model_info_dropped(self):
+        data = {
+            "model": "gpt-4",
+            "metadata": {
+                "user_session": "keep-me",
+                "model_info": {"input_cost_per_token": 0.0},
+            },
+            "litellm_metadata": {
+                "model_info": {"output_cost_per_token": 0.0},
+            },
+        }
+        _strip_client_pricing_overrides(data)
+        assert data["metadata"] == {"user_session": "keep-me"}
+        assert data["litellm_metadata"] == {}
+
+    def test_non_pricing_fields_untouched(self):
+        data = {
+            "model": "gpt-4",
+            "temperature": 0.7,
+            "max_tokens": 100,
+            "tools": [{"type": "function"}],
+            "metadata": {"trace_id": "abc"},
+        }
+        snapshot = {
+            "model": "gpt-4",
+            "temperature": 0.7,
+            "max_tokens": 100,
+            "tools": [{"type": "function"}],
+            "metadata": {"trace_id": "abc"},
+        }
+        _strip_client_pricing_overrides(data)
+        assert data == snapshot
+
+    def test_metadata_strip_handles_non_dict_metadata(self):
+        # Defensive — Pydantic validation would normally reject non-dict
+        # metadata, but the strip mustn't crash if a malformed body sneaks in.
+        _strip_client_pricing_overrides({"metadata": "not-a-dict"})
+        _strip_client_pricing_overrides({"metadata": None})
+        _strip_client_pricing_overrides({"litellm_metadata": ["a", "b"]})
+
+    def test_metadata_field_set_contains_model_info(self):
+        assert "model_info" in _CLIENT_PRICING_METADATA_FIELDS
+
+    def test_strip_emits_debug_log_listing_dropped_fields(self, caplog):
+        # Operators need a paper trail so they can diagnose why a previously
+        # working override stopped applying after the strip landed.
+        import logging
+
+        from litellm._logging import verbose_proxy_logger
+
+        verbose_proxy_logger.setLevel(logging.DEBUG)
+        with caplog.at_level(logging.DEBUG, logger=verbose_proxy_logger.name):
+            _strip_client_pricing_overrides(
+                {
+                    "model": "gpt-4",
+                    "input_cost_per_token": 0.0,
+                    "metadata": {"model_info": {"output_cost_per_token": 0.0}},
+                }
+            )
+        log_text = " ".join(record.getMessage() for record in caplog.records)
+        assert "input_cost_per_token" in log_text
+        assert "metadata.model_info" in log_text
+        assert "allow_client_pricing_override" in log_text
+
+    def test_strip_does_not_log_when_no_fields_present(self, caplog):
+        # No-op strips must stay silent so the log isn't filled with noise on
+        # every legitimate request.
+        import logging
+
+        from litellm._logging import verbose_proxy_logger
+
+        verbose_proxy_logger.setLevel(logging.DEBUG)
+        with caplog.at_level(logging.DEBUG, logger=verbose_proxy_logger.name):
+            _strip_client_pricing_overrides({"model": "gpt-4", "temperature": 0.7})
+        assert not any(
+            "pricing" in record.getMessage().lower() for record in caplog.records
+        )
+
+
+@pytest.mark.asyncio
+async def test_add_litellm_data_to_request_strips_root_pricing_fields():
+    data = {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "hi"}],
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+    }
+
+    updated = await add_litellm_data_to_request(
+        data=data,
+        request=_make_request_mock(),
+        user_api_key_dict=_user_api_key_auth(),
+        proxy_config=MagicMock(),
+        general_settings={},
+        version="test-version",
+    )
+
+    assert "input_cost_per_token" not in updated
+    assert "output_cost_per_token" not in updated
+
+
+@pytest.mark.asyncio
+async def test_add_litellm_data_to_request_strips_metadata_model_info():
+    data = {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "hi"}],
+        "metadata": {"model_info": {"input_cost_per_token": 0.0}},
+    }
+
+    updated = await add_litellm_data_to_request(
+        data=data,
+        request=_make_request_mock(),
+        user_api_key_dict=_user_api_key_auth(),
+        proxy_config=MagicMock(),
+        general_settings={},
+        version="test-version",
+    )
+
+    assert "model_info" not in updated.get("metadata", {})
+
+
+@pytest.mark.asyncio
+async def test_add_litellm_data_to_request_skips_strip_with_key_opt_in():
+    data = {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "hi"}],
+        "input_cost_per_token": 0.0001,
+        "metadata": {"model_info": {"output_cost_per_token": 0.0002}},
+    }
+
+    user_auth = _user_api_key_auth(metadata={"allow_client_pricing_override": True})
+    updated = await add_litellm_data_to_request(
+        data=data,
+        request=_make_request_mock(),
+        user_api_key_dict=user_auth,
+        proxy_config=MagicMock(),
+        general_settings={},
+        version="test-version",
+    )
+
+    assert updated["input_cost_per_token"] == 0.0001
+    assert updated["metadata"]["model_info"] == {"output_cost_per_token": 0.0002}
+
+
+@pytest.mark.asyncio
+async def test_add_litellm_data_to_request_strips_json_string_litellm_metadata():
+    """``litellm_metadata`` may arrive as a JSON-encoded string (multipart/
+    form-data or ``extra_body``). The strip has to run after the proxy parses
+    it into a dict; otherwise the ``isinstance(dict)`` guard skips the field
+    and ``model_info`` survives the strip via the string path.
+    """
+    import json
+
+    data = {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "hi"}],
+        "litellm_metadata": json.dumps({"model_info": {"input_cost_per_token": 0.0}}),
+    }
+
+    updated = await add_litellm_data_to_request(
+        data=data,
+        request=_make_request_mock(),
+        user_api_key_dict=_user_api_key_auth(),
+        proxy_config=MagicMock(),
+        general_settings={},
+        version="test-version",
+    )
+
+    parsed_metadata = updated.get("litellm_metadata")
+    assert isinstance(parsed_metadata, dict)
+    assert "model_info" not in parsed_metadata
+
+
+@pytest.mark.asyncio
+async def test_add_litellm_data_to_request_skips_strip_with_team_opt_in():
+    data = {
+        "model": "gpt-4",
+        "messages": [{"role": "user", "content": "hi"}],
+        "input_cost_per_token": 0.0001,
+    }
+
+    user_auth = _user_api_key_auth(
+        team_metadata={"allow_client_pricing_override": True}
+    )
+    updated = await add_litellm_data_to_request(
+        data=data,
+        request=_make_request_mock(),
+        user_api_key_dict=user_auth,
+        proxy_config=MagicMock(),
+        general_settings={},
+        version="test-version",
+    )
+
+    assert updated["input_cost_per_token"] == 0.0001
+
+
+@pytest.mark.asyncio
+async def test_global_model_cost_unmutated_after_stripped_request(monkeypatch):
+    """After a stripped request, ``litellm.model_cost`` must not carry the
+    caller's submitted pricing for the model. The mutation only happens when
+    the pricing fields reach ``litellm.completion``; the strip prevents that."""
+    snapshot = dict(litellm.model_cost)
+    data = {
+        "model": "test-pricing-canary-model",
+        "messages": [{"role": "user", "content": "hi"}],
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+    }
+
+    await add_litellm_data_to_request(
+        data=data,
+        request=_make_request_mock(),
+        user_api_key_dict=_user_api_key_auth(),
+        proxy_config=MagicMock(),
+        general_settings={},
+        version="test-version",
+    )
+
+    # The strip prevents the pricing fields from ever reaching the path that
+    # would mutate the global model_cost map.
+    assert "test-pricing-canary-model" not in litellm.model_cost
+    # And no other entries were mutated as a side effect.
+    assert litellm.model_cost == snapshot
--- a/tests/test_litellm/proxy/test_sensitive_route_auth.py
+++ b/tests/test_litellm/proxy/test_sensitive_route_auth.py
@ -0,0 +1,34 @@
+from fastapi.routing import APIRoute
+
+from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+from litellm.proxy.common_utils.debug_utils import router as debug_router
+from litellm.proxy.spend_tracking.spend_management_endpoints import (
+    router as spend_router,
+)
+
+
+def _get_route_dependency_calls(router, path: str, method: str):
+    for route in router.routes:
+        if (
+            isinstance(route, APIRoute)
+            and route.path == path
+            and method in route.methods
+        ):
+            return [dependency.call for dependency in route.dependant.dependencies]
+    raise AssertionError(f"Route {method} {path} not found")
+
+
+def test_sensitive_debug_routes_require_auth_dependency():
+    for path, method in (
+        ("/debug/asyncio-tasks", "GET"),
+        ("/otel-spans", "GET"),
+    ):
+        assert user_api_key_auth in _get_route_dependency_calls(
+            debug_router, path, method
+        )
+
+
+def test_provider_budgets_requires_auth_dependency():
+    assert user_api_key_auth in _get_route_dependency_calls(
+        spend_router, "/provider/budgets", "GET"
+    )
--- a/tests/test_litellm/proxy/ui_crud_endpoints/test_proxy_setting_endpoints.py
+++ b/tests/test_litellm/proxy/ui_crud_endpoints/test_proxy_setting_endpoints.py
@ -868,6 +868,7 @@ class TestProxySettingEndpoints:
        mock_db_record = MagicMock()
        mock_db_record.ui_settings = {
            "disable_model_add_for_internal_users": True,
+            "require_auth_for_public_ai_hub": True,
            "unexpected_flag": True,
        }
        mock_prisma.db.litellm_uisettings.find_unique = AsyncMock(
@ -880,10 +881,12 @@ class TestProxySettingEndpoints:
        assert response.status_code == 200
        data = response.json()
        assert data["values"]["disable_model_add_for_internal_users"] is True
+        assert data["values"]["require_auth_for_public_ai_hub"] is True
        assert "unexpected_flag" not in data["values"]
        assert (
            "disable_model_add_for_internal_users" in data["field_schema"]["properties"]
        )
+        assert "require_auth_for_public_ai_hub" in data["field_schema"]["properties"]
        mock_prisma.db.litellm_uisettings.find_unique.assert_called_once_with(
            where={"id": "ui_settings"}
        )
@ -1070,6 +1073,43 @@ class TestProxySettingEndpoints:
        assert "unsupported_flag" not in stored_settings
        assert stored_settings["disable_model_add_for_internal_users"] is False

+    def test_update_ui_settings_preserves_public_ai_hub_auth_flag(
+        self, mock_auth, monkeypatch
+    ):
+        """Public AI Hub auth is an existing UI setting and must remain writable."""
+        from unittest.mock import AsyncMock, MagicMock
+
+        from litellm.proxy._types import UserAPIKeyAuth
+        from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+
+        mock_user_auth = UserAPIKeyAuth(
+            user_id="test-user-123",
+            user_role=LitellmUserRoles.PROXY_ADMIN,
+        )
+        app.dependency_overrides[user_api_key_auth] = lambda: mock_user_auth
+
+        monkeypatch.setattr("litellm.proxy.proxy_server.store_model_in_db", True)
+        mock_prisma = MagicMock()
+        mock_prisma.db.litellm_uisettings.upsert = AsyncMock()
+        mock_prisma.db.litellm_uisettings.find_unique = AsyncMock(return_value=None)
+        monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", mock_prisma)
+
+        payload = {"require_auth_for_public_ai_hub": True}
+
+        try:
+            response = client.patch("/update/ui_settings", json=payload)
+        finally:
+            app.dependency_overrides.clear()
+
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "success"
+        assert data["settings"]["require_auth_for_public_ai_hub"] is True
+
+        call_args = mock_prisma.db.litellm_uisettings.upsert.call_args
+        stored_settings = json.loads(call_args.kwargs["data"]["create"]["ui_settings"])
+        assert stored_settings["require_auth_for_public_ai_hub"] is True
+
    def test_update_ui_settings_persists_forward_llm_provider_auth_headers(
        self, mock_auth, monkeypatch
    ):
@ -1147,6 +1187,43 @@ class TestProxySettingEndpoints:
        assert response.status_code == 200
        assert general_settings.get("forward_llm_provider_auth_headers") is True

+    def test_update_ui_settings_syncs_public_health_readiness_details_to_general_settings(
+        self, mock_auth, monkeypatch
+    ):
+        """Public readiness details flag must be synced so the health route sees it."""
+        from unittest.mock import AsyncMock, MagicMock
+
+        from litellm.proxy._types import UserAPIKeyAuth
+        from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
+
+        mock_user_auth = UserAPIKeyAuth(
+            user_id="test-user-123",
+            user_role=LitellmUserRoles.PROXY_ADMIN,
+        )
+        app.dependency_overrides[user_api_key_auth] = lambda: mock_user_auth
+
+        monkeypatch.setattr("litellm.proxy.proxy_server.store_model_in_db", True)
+
+        general_settings: dict = {}
+        monkeypatch.setattr(
+            "litellm.proxy.proxy_server.general_settings", general_settings
+        )
+
+        mock_prisma = MagicMock()
+        mock_prisma.db.litellm_uisettings.upsert = AsyncMock()
+        mock_prisma.db.litellm_uisettings.find_unique = AsyncMock(return_value=None)
+        monkeypatch.setattr("litellm.proxy.proxy_server.prisma_client", mock_prisma)
+
+        payload = {"allow_public_health_readiness_details": True}
+
+        try:
+            response = client.patch("/update/ui_settings", json=payload)
+        finally:
+            app.dependency_overrides.clear()
+
+        assert response.status_code == 200
+        assert general_settings.get("allow_public_health_readiness_details") is True
+
    def test_update_ui_settings_persists_and_syncs_disable_key_generate_for_org_admin(
        self, mock_auth, monkeypatch
    ):
--- a/ui/litellm-dashboard/package-lock.json
+++ b/ui/litellm-dashboard/package-lock.json
@ -8,12 +8,12 @@
      "name": "litellm-dashboard",
      "version": "0.1.0",
      "dependencies": {
-        "@anthropic-ai/sdk": "0.54.0",
+        "@anthropic-ai/sdk": "0.92.0",
        "@headlessui/tailwindcss": "0.2.2",
        "@heroicons/react": "1.0.6",
        "@remixicon/react": "4.9.0",
        "@tanstack/react-pacer": "0.2.0",
-        "@tanstack/react-query": "5.90.20",
+        "@tanstack/react-query": "5.100.7",
        "@tanstack/react-table": "8.21.3",
        "@tremor/react": "3.18.7",
        "@types/papaparse": "5.5.2",
@ -23,18 +23,18 @@
        "jwt-decode": "4.0.0",
        "lucide-react": "0.513.0",
        "moment": "2.30.1",
-        "next": "16.1.7",
+        "next": "16.2.4",
        "openai": "4.104.0",
        "papaparse": "5.5.3",
        "react": "18.3.1",
-        "react-copy-to-clipboard": "5.1.0",
+        "react-copy-to-clipboard": "5.1.1",
        "react-dom": "18.3.1",
        "react-json-view-lite": "2.5.0",
        "react-markdown": "9.1.0",
        "react-syntax-highlighter": "15.6.6",
        "remark-gfm": "4.0.1",
        "tailwind-merge": "3.4.0",
-        "uuid": "11.1.0"
+        "uuid": "14.0.0"
      },
      "devDependencies": {
        "@playwright/test": "1.58.1",
@ -61,7 +61,7 @@
        "eslint-plugin-unused-imports": "4.3.0",
        "jsdom": "27.4.0",
        "knip": "5.83.1",
-        "postcss": "8.5.6",
+        "postcss": "8.5.13",
        "prettier": "3.2.5",
        "tailwindcss": "3.4.19",
        "typescript": "5.9.3",
@ -69,7 +69,7 @@
        "vitest": "3.2.4"
      },
      "engines": {
-        "node": ">=18.17.0",
+        "node": ">=20.9.0",
        "npm": ">=8.3.0"
      }
    },
@ -211,12 +211,23 @@
      }
    },
    "node_modules/@anthropic-ai/sdk": {
-      "version": "0.54.0",
-      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.54.0.tgz",
-      "integrity": "sha512-xyoCtHJnt/qg5GG6IgK+UJEndz8h8ljzt/caKXmq3LfBF81nC/BW6E4x2rOWCZcvsLyVW+e8U5mtIr6UCE/kJw==",
+      "version": "0.92.0",
+      "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.92.0.tgz",
+      "integrity": "sha512-l653JFC83wCglH8H83t1xpgDurCyPyslYW1maPRdCsfuNuGbLvQjQ81sWd3Go3LWRm0jNspzAhuqAYV8r9joSw==",
      "license": "MIT",
+      "dependencies": {
+        "json-schema-to-ts": "^3.1.1"
+      },
      "bin": {
        "anthropic-ai-sdk": "bin/cli"
+      },
+      "peerDependencies": {
+        "zod": "^3.25.0 || ^4.0.0"
+      },
+      "peerDependenciesMeta": {
+        "zod": {
+          "optional": true
+        }
      }
    },
    "node_modules/@asamuzakjp/css-color": {
@ -1817,9 +1828,9 @@
      }
    },
    "node_modules/@next/env": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/env/-/env-16.1.7.tgz",
-      "integrity": "sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==",
+      "version": "16.2.4",
+      "resolved": "https://registry.npmjs.org/@next/env/-/env-16.2.4.tgz",
+      "integrity": "sha512-dKkkOzOSwFYe5RX6y26fZgkSpVAlIOJKQHIiydQcrWH6y/97+RceSOAdjZ14Qa3zLduVUy0TXcn+EiM6t4rPgw==",
      "license": "MIT"
    },
    "node_modules/@next/eslint-plugin-next": {
@ -1833,9 +1844,9 @@
      }
    },
    "node_modules/@next/swc-darwin-arm64": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.1.7.tgz",
-      "integrity": "sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==",
+      "version": "16.2.4",
+      "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-16.2.4.tgz",
+      "integrity": "sha512-OXTFFox5EKN1Ym08vfrz+OXxmCcEjT4SFMbNRsWZE99dMqt2Kcusl5MqPXcW232RYkMLQTy0hqgAMEsfEd/l2A==",
      "cpu": [
        "arm64"
      ],
@ -1849,9 +1860,9 @@
      }
    },
    "node_modules/@next/swc-darwin-x64": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.1.7.tgz",
-      "integrity": "sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==",
+      "version": "16.2.4",
+      "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.2.4.tgz",
+      "integrity": "sha512-XhpVnUfmYWvD3YrXu55XdcAkQtOnvaI6wtQa8fuF5fGoKoxIUZ0kWPtcOfqJEWngFF/lOS9l3+O9CcownhiQxQ==",
      "cpu": [
        "x64"
      ],
@ -1865,12 +1876,15 @@
      }
    },
    "node_modules/@next/swc-linux-arm64-gnu": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.1.7.tgz",
-      "integrity": "sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==",
+      "version": "16.2.4",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.2.4.tgz",
+      "integrity": "sha512-Mx/tjlNA3G8kg14QvuGAJ4xBwPk1tUHq56JxZ8CXnZwz1Etz714soCEzGQQzVMz4bEnGPowzkV6Xrp6wAkEWOQ==",
      "cpu": [
        "arm64"
      ],
+      "libc": [
+        "glibc"
+      ],
      "license": "MIT",
      "optional": true,
      "os": [
@ -1881,12 +1895,15 @@
      }
    },
    "node_modules/@next/swc-linux-arm64-musl": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.1.7.tgz",
-      "integrity": "sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==",
+      "version": "16.2.4",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.2.4.tgz",
+      "integrity": "sha512-iVMMp14514u7Nup2umQS03nT/bN9HurK8ufylC3FZNykrwjtx7V1A7+4kvhbDSCeonTVqV3Txnv0Lu+m2oDXNg==",
      "cpu": [
        "arm64"
      ],
+      "libc": [
+        "musl"
+      ],
      "license": "MIT",
      "optional": true,
      "os": [
@ -1897,12 +1914,15 @@
      }
    },
    "node_modules/@next/swc-linux-x64-gnu": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.1.7.tgz",
-      "integrity": "sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==",
+      "version": "16.2.4",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.2.4.tgz",
+      "integrity": "sha512-EZOvm1aQWgnI/N/xcWOlnS3RQBk0VtVav5Zo7n4p0A7UKyTDx047k8opDbXgBpHl4CulRqRfbw3QrX2w5UOXMQ==",
      "cpu": [
        "x64"
      ],
+      "libc": [
+        "glibc"
+      ],
      "license": "MIT",
      "optional": true,
      "os": [
@ -1913,12 +1933,15 @@
      }
    },
    "node_modules/@next/swc-linux-x64-musl": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.1.7.tgz",
-      "integrity": "sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==",
+      "version": "16.2.4",
+      "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.2.4.tgz",
+      "integrity": "sha512-h9FxsngCm9cTBf71AR4fGznDEDx1hS7+kSEiIRjq5kO1oXWm07DxVGZjCvk0SGx7TSjlUqhI8oOyz7NfwAdPoA==",
      "cpu": [
        "x64"
      ],
+      "libc": [
+        "musl"
+      ],
      "license": "MIT",
      "optional": true,
      "os": [
@ -1929,9 +1952,9 @@
      }
    },
    "node_modules/@next/swc-win32-arm64-msvc": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.1.7.tgz",
-      "integrity": "sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==",
+      "version": "16.2.4",
+      "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.2.4.tgz",
+      "integrity": "sha512-3NdJV5OXMSOeJYijX+bjaLge3mJBlh4ybydbT4GFoB/2hAojWHtMhl3CYlYoMrjPuodp0nzFVi4Tj2+WaMg+Ow==",
      "cpu": [
        "arm64"
      ],
@ -1945,9 +1968,9 @@
      }
    },
    "node_modules/@next/swc-win32-x64-msvc": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.1.7.tgz",
-      "integrity": "sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==",
+      "version": "16.2.4",
+      "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.2.4.tgz",
+      "integrity": "sha512-kMVGgsqhO5YTYODD9IPGGhA6iprWidQckK3LmPeW08PIFENRmgfb4MjXHO+p//d+ts2rpjvK5gXWzXSMrPl9cw==",
      "cpu": [
        "x64"
      ],
@ -2981,9 +3004,9 @@
      }
    },
    "node_modules/@tanstack/query-core": {
-      "version": "5.90.20",
-      "resolved": "https://registry.npmjs.org/@tanstack/query-core/-/query-core-5.90.20.tgz",
-      "integrity": "sha512-OMD2HLpNouXEfZJWcKeVKUgQ5n+n3A2JFmBaScpNDUqSrQSjiveC7dKMe53uJUg1nDG16ttFPz2xfilz6i2uVg==",
+      "version": "5.100.7",
+      "resolved": "https://registry.npmjs.org/@tanstack/query-core/-/query-core-5.100.7.tgz",
+      "integrity": "sha512-5R7i6ENJLhVeeJrrUz7jKBXUXv/BJrxf9FQJSkR13bPrb3zOcE8A0Z0PxYCcsKPOsiIlTibrBL/zZbtUO1TFyQ==",
      "license": "MIT",
      "funding": {
        "type": "github",
@ -3011,12 +3034,12 @@
      }
    },
    "node_modules/@tanstack/react-query": {
-      "version": "5.90.20",
-      "resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.90.20.tgz",
-      "integrity": "sha512-vXBxa+qeyveVO7OA0jX1z+DeyCA4JKnThKv411jd5SORpBKgkcVnYKCiBgECvADvniBX7tobwBmg01qq9JmMJw==",
+      "version": "5.100.7",
+      "resolved": "https://registry.npmjs.org/@tanstack/react-query/-/react-query-5.100.7.tgz",
+      "integrity": "sha512-LoISYWz8dOOuQbeIctF8K6yi42TWtR1WPGpwGuRUpF3u79JVVIg/PVR0MQdIA0VSHqD/ydf/b7PhKTkg3I4fLQ==",
      "license": "MIT",
      "dependencies": {
-        "@tanstack/query-core": "5.90.20"
+        "@tanstack/query-core": "5.100.7"
      },
      "funding": {
        "type": "github",
@ -7872,6 +7895,19 @@
      "dev": true,
      "license": "MIT"
    },
+    "node_modules/json-schema-to-ts": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/json-schema-to-ts/-/json-schema-to-ts-3.1.1.tgz",
+      "integrity": "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/runtime": "^7.18.3",
+        "ts-algebra": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=16"
+      }
+    },
    "node_modules/json-schema-traverse": {
      "version": "0.4.1",
      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
@ -9299,12 +9335,12 @@
      "license": "MIT"
    },
    "node_modules/next": {
-      "version": "16.1.7",
-      "resolved": "https://registry.npmjs.org/next/-/next-16.1.7.tgz",
-      "integrity": "sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==",
+      "version": "16.2.4",
+      "resolved": "https://registry.npmjs.org/next/-/next-16.2.4.tgz",
+      "integrity": "sha512-kPvz56wF5frc+FxlHI5qnklCzbq53HTwORaWBGdT0vNoKh1Aya9XC8aPauH4NJxqtzbWsS5mAbctm4cr+EkQ2Q==",
      "license": "MIT",
      "dependencies": {
-        "@next/env": "16.1.7",
+        "@next/env": "16.2.4",
        "@swc/helpers": "0.5.15",
        "baseline-browser-mapping": "^2.9.19",
        "caniuse-lite": "^1.0.30001579",
@ -9318,15 +9354,15 @@
        "node": ">=20.9.0"
      },
      "optionalDependencies": {
-        "@next/swc-darwin-arm64": "16.1.7",
-        "@next/swc-darwin-x64": "16.1.7",
-        "@next/swc-linux-arm64-gnu": "16.1.7",
-        "@next/swc-linux-arm64-musl": "16.1.7",
-        "@next/swc-linux-x64-gnu": "16.1.7",
-        "@next/swc-linux-x64-musl": "16.1.7",
-        "@next/swc-win32-arm64-msvc": "16.1.7",
-        "@next/swc-win32-x64-msvc": "16.1.7",
-        "sharp": "^0.34.4"
+        "@next/swc-darwin-arm64": "16.2.4",
+        "@next/swc-darwin-x64": "16.2.4",
+        "@next/swc-linux-arm64-gnu": "16.2.4",
+        "@next/swc-linux-arm64-musl": "16.2.4",
+        "@next/swc-linux-x64-gnu": "16.2.4",
+        "@next/swc-linux-x64-musl": "16.2.4",
+        "@next/swc-win32-arm64-msvc": "16.2.4",
+        "@next/swc-win32-x64-msvc": "16.2.4",
+        "sharp": "^0.34.5"
      },
      "peerDependencies": {
        "@opentelemetry/api": "^1.1.0",
@ -9360,34 +9396,6 @@
        "tslib": "^2.8.0"
      }
    },
-    "node_modules/next/node_modules/postcss": {
-      "version": "8.4.31",
-      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.4.31.tgz",
-      "integrity": "sha512-PS08Iboia9mts/2ygV3eLpY5ghnUcfLV/EXTOW1E2qYxJKGGBUtNjN76FYHnMs36RmARn41bC0AZmn+rR0OVpQ==",
-      "funding": [
-        {
-          "type": "opencollective",
-          "url": "https://opencollective.com/postcss/"
-        },
-        {
-          "type": "tidelift",
-          "url": "https://tidelift.com/funding/github/npm/postcss"
-        },
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/ai"
-        }
-      ],
-      "license": "MIT",
-      "dependencies": {
-        "nanoid": "^3.3.6",
-        "picocolors": "^1.0.0",
-        "source-map-js": "^1.0.2"
-      },
-      "engines": {
-        "node": "^10 || ^12 || >=14"
-      }
-    },
    "node_modules/node-domexception": {
      "version": "1.0.0",
      "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
@ -9938,9 +9946,9 @@
      }
    },
    "node_modules/postcss": {
-      "version": "8.5.6",
-      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz",
-      "integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==",
+      "version": "8.5.13",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.13.tgz",
+      "integrity": "sha512-qif0+jGGZoLWdHey3UFHHWP0H7Gbmsk8T5VEqyYFbWqPr1XqvLGBbk/sl8V5exGmcYJklJOhOQq1pV9IcsiFag==",
      "funding": [
        {
          "type": "opencollective",
@ -10838,16 +10846,16 @@
      }
    },
    "node_modules/react-copy-to-clipboard": {
-      "version": "5.1.0",
-      "resolved": "https://registry.npmjs.org/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.0.tgz",
-      "integrity": "sha512-k61RsNgAayIJNoy9yDsYzDe/yAZAzEbEgcz3DZMhF686LEyukcE1hzurxe85JandPUG+yTfGVFzuEw3xt8WP/A==",
+      "version": "5.1.1",
+      "resolved": "https://registry.npmjs.org/react-copy-to-clipboard/-/react-copy-to-clipboard-5.1.1.tgz",
+      "integrity": "sha512-s+HrzLyJBxrpGTYXF15dTgMjAJpEPZT/Yp6NytAtZMRngejxt6Pt5WrfFxLAcsqUDU6sY1Jz6tyHwIicE1U2Xg==",
      "license": "MIT",
      "dependencies": {
-        "copy-to-clipboard": "^3.3.1",
+        "copy-to-clipboard": "^3.3.3",
        "prop-types": "^15.8.1"
      },
      "peerDependencies": {
-        "react": "^15.3.0 || 16 || 17 || 18"
+        "react": ">=15.3.0"
      }
    },
    "node_modules/react-day-picker": {
@ -12374,6 +12382,12 @@
        "url": "https://github.com/sponsors/wooorm"
      }
    },
+    "node_modules/ts-algebra": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/ts-algebra/-/ts-algebra-2.0.0.tgz",
+      "integrity": "sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==",
+      "license": "MIT"
+    },
    "node_modules/ts-api-utils": {
      "version": "2.4.0",
      "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.4.0.tgz",
@ -12712,16 +12726,16 @@
      "license": "MIT"
    },
    "node_modules/uuid": {
-      "version": "11.1.0",
-      "resolved": "https://registry.npmjs.org/uuid/-/uuid-11.1.0.tgz",
-      "integrity": "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==",
+      "version": "14.0.0",
+      "resolved": "https://registry.npmjs.org/uuid/-/uuid-14.0.0.tgz",
+      "integrity": "sha512-Qo+uWgilfSmAhXCMav1uYFynlQO7fMFiMVZsQqZRMIXp0O7rR7qjkj+cPvBHLgBqi960QCoo/PH2/6ZtVqKvrg==",
      "funding": [
        "https://github.com/sponsors/broofa",
        "https://github.com/sponsors/ctavan"
      ],
      "license": "MIT",
      "bin": {
-        "uuid": "dist/esm/bin/uuid"
+        "uuid": "dist-node/bin/uuid"
      }
    },
    "node_modules/vfile": {
--- a/ui/litellm-dashboard/package.json
+++ b/ui/litellm-dashboard/package.json
@ -20,12 +20,12 @@
    "knip:fix": "knip --fix"
  },
  "dependencies": {
-    "@anthropic-ai/sdk": "0.54.0",
+    "@anthropic-ai/sdk": "0.92.0",
    "@headlessui/tailwindcss": "0.2.2",
    "@heroicons/react": "1.0.6",
    "@remixicon/react": "4.9.0",
    "@tanstack/react-pacer": "0.2.0",
-    "@tanstack/react-query": "5.90.20",
+    "@tanstack/react-query": "5.100.7",
    "@tanstack/react-table": "8.21.3",
    "@tremor/react": "3.18.7",
    "@types/papaparse": "5.5.2",
@ -35,18 +35,18 @@
    "jwt-decode": "4.0.0",
    "lucide-react": "0.513.0",
    "moment": "2.30.1",
-    "next": "16.1.7",
+    "next": "16.2.4",
    "openai": "4.104.0",
    "papaparse": "5.5.3",
    "react": "18.3.1",
-    "react-copy-to-clipboard": "5.1.0",
+    "react-copy-to-clipboard": "5.1.1",
    "react-dom": "18.3.1",
    "react-json-view-lite": "2.5.0",
    "react-markdown": "9.1.0",
    "react-syntax-highlighter": "15.6.6",
    "remark-gfm": "4.0.1",
    "tailwind-merge": "3.4.0",
-    "uuid": "11.1.0"
+    "uuid": "14.0.0"
  },
  "devDependencies": {
    "@playwright/test": "1.58.1",
@ -73,7 +73,7 @@
    "eslint-plugin-unused-imports": "4.3.0",
    "jsdom": "27.4.0",
    "knip": "5.83.1",
-    "postcss": "8.5.6",
+    "postcss": "8.5.13",
    "prettier": "3.2.5",
    "tailwindcss": "3.4.19",
    "typescript": "5.9.3",
@ -88,10 +88,11 @@
    "lodash": "4.18.1",
    "ws": "8.19.0",
    "braces": "3.0.3",
-    "axios": "1.13.6"
+    "axios": "1.13.6",
+    "postcss": "8.5.13"
  },
  "engines": {
-    "node": ">=18.17.0",
+    "node": ">=20.9.0",
    "npm": ">=8.3.0"
  }
 }
--- a/ui/litellm-dashboard/src/app/(dashboard)/hooks/routingGroups/useRoutingGroups.ts
+++ b/ui/litellm-dashboard/src/app/(dashboard)/hooks/routingGroups/useRoutingGroups.ts
@ -0,0 +1,51 @@
+"use client";
+
+import useAuthorized from "@/app/(dashboard)/hooks/useAuthorized";
+import { useMutation, useQuery, useQueryClient, UseMutationResult, UseQueryResult } from "@tanstack/react-query";
+import { getRouterSettingsCall, setCallbacksCall } from "@/components/networking";
+import { createQueryKeys } from "../common/queryKeysFactory";
+import type { RoutingGroup } from "@/components/routing_groups/types";
+
+const routingGroupsKeys = createQueryKeys("routingGroups");
+
+interface RoutingGroupsQueryData {
+  routingGroups: RoutingGroup[];
+  routingStrategy: string | null;
+  availableStrategies: string[];
+}
+
+const fetchRoutingGroups = async (accessToken: string): Promise<RoutingGroupsQueryData> => {
+  const data = await getRouterSettingsCall(accessToken);
+  const currentValues = data?.current_values ?? {};
+  const fields = Array.isArray(data?.fields) ? data.fields : [];
+  const routingStrategyField = fields.find((f: any) => f?.field_name === "routing_strategy");
+
+  return {
+    routingGroups: Array.isArray(currentValues.routing_groups) ? currentValues.routing_groups : [],
+    routingStrategy: currentValues.routing_strategy ?? null,
+    availableStrategies: Array.isArray(routingStrategyField?.options) ? routingStrategyField.options : [],
+  };
+};
+
+export const useRoutingGroups = (): UseQueryResult<RoutingGroupsQueryData> => {
+  const { accessToken, userId, userRole } = useAuthorized();
+  return useQuery<RoutingGroupsQueryData>({
+    queryKey: routingGroupsKeys.lists(),
+    queryFn: () => fetchRoutingGroups(accessToken!),
+    enabled: Boolean(accessToken && userId && userRole),
+  });
+};
+
+export const useSaveRoutingGroups = (): UseMutationResult<unknown, Error, RoutingGroup[]> => {
+  const { accessToken } = useAuthorized();
+  const queryClient = useQueryClient();
+  return useMutation({
+    mutationFn: (routingGroups: RoutingGroup[]) =>
+      setCallbacksCall(accessToken!, {
+        router_settings: { routing_groups: routingGroups },
+      }),
+    onSuccess: () => {
+      queryClient.invalidateQueries({ queryKey: routingGroupsKeys.lists() });
+    },
+  });
+};
--- a/ui/litellm-dashboard/src/components/general_settings.tsx
+++ b/ui/litellm-dashboard/src/components/general_settings.tsx
@ -24,6 +24,7 @@ import { TrashIcon, CheckCircleIcon } from "@heroicons/react/outline";

 import RouterSettings from "./router_settings";
 import Fallbacks from "./Settings/RouterSettings/Fallbacks/Fallbacks";
+import RoutingGroups from "./routing_groups";
 interface GeneralSettingsPageProps {
  accessToken: string | null;
  userRole: string | null;
@ -110,8 +111,9 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({ accessToken, user
      <TabGroup className="h-[75vh] w-full">
        <TabList variant="line" defaultValue="1" className="px-8 pt-4">
          <Tab value="1">Loadbalancing</Tab>
-          <Tab value="2">Fallbacks</Tab>
-          <Tab value="3">General</Tab>
+          <Tab value="2">Routing Groups</Tab>
+          <Tab value="3">Fallbacks</Tab>
+          <Tab value="4">General</Tab>
        </TabList>
        <TabPanels className="px-8 py-6">
          <TabPanel>
@ -122,6 +124,9 @@ const GeneralSettings: React.FC<GeneralSettingsPageProps> = ({ accessToken, user
              modelData={modelData}
            />
          </TabPanel>
+          <TabPanel>
+            <RoutingGroups />
+          </TabPanel>
          <TabPanel>
            <Fallbacks
              accessToken={accessToken}
--- a/ui/litellm-dashboard/src/components/routing_groups/RoutingGroupModal.tsx
+++ b/ui/litellm-dashboard/src/components/routing_groups/RoutingGroupModal.tsx
@ -0,0 +1,194 @@
+"use client";
+
+import React, { useMemo } from "react";
+import { Form, Input, Modal, Select, Space, Typography } from "antd";
+import type { RoutingGroup, RoutingStrategy } from "./types";
+
+const { Text, Paragraph } = Typography;
+
+interface RoutingGroupModalProps {
+  open: boolean;
+  mode: "create" | "edit";
+  initialValue: RoutingGroup | null;
+  availableStrategies: string[];
+  strategyDescriptions: Record<string, string>;
+  modelOptions: string[];
+  existingGroupNames: string[];
+  onClose: () => void;
+  onSubmit: (group: RoutingGroup) => Promise<void> | void;
+  saving?: boolean;
+}
+
+interface FormValues {
+  group_name: string;
+  models: string[];
+  routing_strategy: RoutingStrategy | string;
+  routing_strategy_args?: string;
+}
+
+const STRATEGIES_WITH_ARGS = new Set<string>(["latency-based-routing", "usage-based-routing"]);
+
+const GROUP_NAME_PATTERN = /^[A-Za-z0-9._-]+$/;
+const GROUP_NAME_MAX_LENGTH = 64;
+
+const RoutingGroupModal: React.FC<RoutingGroupModalProps> = ({
+  open,
+  mode,
+  initialValue,
+  availableStrategies,
+  strategyDescriptions,
+  modelOptions,
+  existingGroupNames,
+  onClose,
+  onSubmit,
+  saving,
+}) => {
+  const [form] = Form.useForm<FormValues>();
+  const selectedStrategy = Form.useWatch("routing_strategy", form);
+
+  const initialValues: FormValues = {
+    group_name: initialValue?.group_name ?? "",
+    models: initialValue?.models ?? [],
+    routing_strategy: initialValue?.routing_strategy ?? availableStrategies[0] ?? "simple-shuffle",
+    routing_strategy_args: initialValue?.routing_strategy_args
+      ? JSON.stringify(initialValue.routing_strategy_args, null, 2)
+      : "",
+  };
+
+  const reservedNames = useMemo(() => {
+    const others = existingGroupNames.filter((n) => n !== initialValue?.group_name);
+    return new Set(others.map((n) => n.toLowerCase()));
+  }, [existingGroupNames, initialValue]);
+
+  const handleSubmit = async () => {
+    const values = await form.validateFields();
+    const strategySupportsArgs = STRATEGIES_WITH_ARGS.has(String(values.routing_strategy));
+    let parsedArgs: Record<string, unknown> | null = null;
+    if (strategySupportsArgs && values.routing_strategy_args && values.routing_strategy_args.trim()) {
+      try {
+        parsedArgs = JSON.parse(values.routing_strategy_args);
+      } catch {
+        form.setFields([
+          {
+            name: "routing_strategy_args",
+            errors: ["Must be valid JSON"],
+          },
+        ]);
+        return;
+      }
+    }
+
+    await onSubmit({
+      group_name: values.group_name.trim(),
+      models: values.models,
+      routing_strategy: values.routing_strategy,
+      routing_strategy_args: parsedArgs,
+    });
+  };
+
+  return (
+    <Modal
+      title={mode === "create" ? "Create Routing Group" : `Edit ${initialValue?.group_name ?? ""}`}
+      open={open}
+      onCancel={onClose}
+      onOk={handleSubmit}
+      okText={mode === "create" ? "Create Group" : "Save Changes"}
+      cancelText="Cancel"
+      confirmLoading={saving}
+      destroyOnClose
+      width={560}
+    >
+      <Form<FormValues>
+        key={mode === "edit" ? `edit-${initialValue?.group_name ?? ""}` : "create"}
+        form={form}
+        layout="vertical"
+        preserve={false}
+        initialValues={initialValues}
+      >
+        <Form.Item
+          label="Group Name"
+          name="group_name"
+          rules={[
+            { required: true, message: "Group name is required" },
+            { max: GROUP_NAME_MAX_LENGTH, message: `Must be ${GROUP_NAME_MAX_LENGTH} characters or fewer` },
+            {
+              pattern: GROUP_NAME_PATTERN,
+              message: "Only letters, numbers, dot, underscore, and dash are allowed",
+            },
+            {
+              validator: (_, value: string) => {
+                if (!value) return Promise.resolve();
+                if (reservedNames.has(value.trim().toLowerCase())) {
+                  return Promise.reject(new Error("A group with this name already exists"));
+                }
+                return Promise.resolve();
+              },
+            },
+          ]}
+          extra="Use this name as the model in API calls — LiteLLM routes the request to one of the group's models."
+        >
+          <Input placeholder="fast-chat" disabled={mode === "edit"} />
+        </Form.Item>
+
+        <Form.Item
+          label="Models"
+          name="models"
+          rules={[{ required: true, message: "Select at least one model" }]}
+          extra="Models from your model list that this group routes between."
+        >
+          <Select
+            mode="multiple"
+            allowClear
+            placeholder="Select models"
+            options={modelOptions.map((m) => ({ label: m, value: m }))}
+            optionFilterProp="label"
+          />
+        </Form.Item>
+
+        <Form.Item
+          label="Routing Strategy"
+          name="routing_strategy"
+          rules={[{ required: true, message: "Strategy is required" }]}
+        >
+          <Select
+            options={availableStrategies.map((s) => ({ label: s, value: s }))}
+            placeholder="Select strategy"
+          />
+        </Form.Item>
+
+        {selectedStrategy && strategyDescriptions[selectedStrategy] && (
+          <Paragraph className="text-xs text-gray-500 -mt-2 mb-4">
+            {strategyDescriptions[selectedStrategy]}
+          </Paragraph>
+        )}
+
+        {STRATEGIES_WITH_ARGS.has(String(selectedStrategy)) && (
+          <Form.Item
+            label="Strategy Arguments (JSON)"
+            name="routing_strategy_args"
+            extra={
+              selectedStrategy === "latency-based-routing"
+                ? "Example: { \"ttl\": 3600, \"lowest_latency_buffer\": 0 }"
+                : "Example: { \"ttl\": 60 }"
+            }
+          >
+            <Input.TextArea
+              rows={4}
+              placeholder='{ "ttl": 3600 }'
+              className="font-mono text-xs"
+            />
+          </Form.Item>
+        )}
+
+        <Space direction="vertical" className="w-full mt-2">
+          <Text type="secondary" className="text-xs">
+            Models not claimed by an explicit group fall through to the proxy&apos;s top-level routing
+            strategy.
+          </Text>
+        </Space>
+      </Form>
+    </Modal>
+  );
+};
+
+export default RoutingGroupModal;
--- a/ui/litellm-dashboard/src/components/routing_groups/RoutingGroupsTable.tsx
+++ b/ui/litellm-dashboard/src/components/routing_groups/RoutingGroupsTable.tsx
@ -0,0 +1,241 @@
+"use client";
+
+import React, { useState } from "react";
+import { Flex, Table, Tabs, Tag, Tooltip, Typography, Button } from "antd";
+import type { ColumnsType } from "antd/es/table";
+import { BranchesOutlined, DeleteOutlined, EditOutlined, CodeOutlined } from "@ant-design/icons";
+import type { RoutingGroup } from "./types";
+
+const { Text, Paragraph } = Typography;
+
+interface RoutingGroupsTableProps {
+  groups: RoutingGroup[];
+  loading?: boolean;
+  onEdit: (group: RoutingGroup) => void;
+  onDelete: (group: RoutingGroup) => void;
+  proxyBaseUrl?: string;
+}
+
+const formatStrategyLabel = (strategy: string): string => {
+  switch (strategy) {
+    case "simple-shuffle":
+      return "Simple Shuffle";
+    case "least-busy":
+      return "Least Busy";
+    case "usage-based-routing":
+      return "Usage Based";
+    case "latency-based-routing":
+      return "Latency Based";
+    default:
+      return strategy;
+  }
+};
+
+const resolveBaseUrl = (proxyBaseUrl?: string): string => {
+  if (proxyBaseUrl && proxyBaseUrl.trim()) return proxyBaseUrl;
+  if (typeof window !== "undefined" && window.location?.origin) return window.location.origin;
+  return "<your_proxy_base_url>";
+};
+
+const exampleModel = (group: RoutingGroup): string => group.models[0] ?? "<your-model>";
+
+const buildCurlSnippet = (group: RoutingGroup, baseUrl: string): string =>
+  `curl -X POST '${baseUrl}/v1/chat/completions' \\
+  -H 'Content-Type: application/json' \\
+  -H 'Authorization: Bearer $LITELLM_API_KEY' \\
+  -d '{
+    "model": "${exampleModel(group)}",
+    "messages": [{"role": "user", "content": "Hello!"}]
+  }'`;
+
+const buildPythonSnippet = (group: RoutingGroup, baseUrl: string): string =>
+  `from openai import OpenAI
+
+client = OpenAI(
+    api_key="$LITELLM_API_KEY",
+    base_url="${baseUrl}",
+)
+
+response = client.chat.completions.create(
+    model="${exampleModel(group)}",
+    messages=[{"role": "user", "content": "Hello!"}],
+)
+
+print(response)`;
+
+const buildJsSnippet = (group: RoutingGroup, baseUrl: string): string =>
+  `import OpenAI from "openai";
+
+const client = new OpenAI({
+  apiKey: process.env.LITELLM_API_KEY,
+  baseURL: "${baseUrl}",
+});
+
+const response = await client.chat.completions.create({
+  model: "${exampleModel(group)}",
+  messages: [{ role: "user", content: "Hello!" }],
+});
+
+console.log(response);`;
+
+interface RoutingGroupSnippetProps {
+  group: RoutingGroup;
+  baseUrl: string;
+}
+
+const SNIPPET_BLOCK_STYLE: React.CSSProperties = {
+  backgroundColor: "#111827",
+  color: "#f3f4f6",
+  borderRadius: 6,
+  padding: 16,
+  fontSize: 12,
+  whiteSpace: "pre",
+  overflowX: "auto",
+};
+
+const RoutingGroupSnippet: React.FC<RoutingGroupSnippetProps> = ({ group, baseUrl }) => {
+  const snippets = {
+    curl: buildCurlSnippet(group, baseUrl),
+    python: buildPythonSnippet(group, baseUrl),
+    javascript: buildJsSnippet(group, baseUrl),
+  } as const;
+  type SnippetKey = keyof typeof snippets;
+  const [activeKey, setActiveKey] = useState<SnippetKey>("curl");
+
+  const items = [
+    { key: "curl", label: "cURL" },
+    { key: "python", label: "Python (OpenAI SDK)" },
+    { key: "javascript", label: "JavaScript (OpenAI SDK)" },
+  ].map(({ key, label }) => ({
+    key,
+    label,
+    children: (
+      <Paragraph code className="!mb-0" style={SNIPPET_BLOCK_STYLE}>
+        {snippets[key as SnippetKey]}
+      </Paragraph>
+    ),
+  }));
+
+  return (
+    <Tabs
+      size="small"
+      activeKey={activeKey}
+      onChange={(k) => setActiveKey(k as SnippetKey)}
+      items={items}
+      tabBarExtraContent={
+        <Paragraph
+          copyable={{ text: snippets[activeKey], tooltips: ["Copy", "Copied"] }}
+          className="!mb-0"
+        />
+      }
+    />
+  );
+};
+
+const RoutingGroupsTable: React.FC<RoutingGroupsTableProps> = ({
+  groups,
+  loading,
+  onEdit,
+  onDelete,
+  proxyBaseUrl,
+}) => {
+  const [expandedRowKeys, setExpandedRowKeys] = useState<React.Key[]>([]);
+  const baseUrl = resolveBaseUrl(proxyBaseUrl);
+
+  const columns: ColumnsType<RoutingGroup> = [
+    {
+      title: "GROUP NAME",
+      dataIndex: "group_name",
+      key: "group_name",
+      render: (name: string) => (
+        <Text strong className="text-blue-600">
+          {name}
+        </Text>
+      ),
+    },
+    {
+      title: "MODELS",
+      dataIndex: "models",
+      key: "models",
+      render: (models: string[]) => (
+        <Flex wrap="wrap" gap={4}>
+          {models.map((m) => (
+            <Tag key={m}>{m}</Tag>
+          ))}
+        </Flex>
+      ),
+    },
+    {
+      title: "STRATEGY",
+      dataIndex: "routing_strategy",
+      key: "routing_strategy",
+      render: (strategy: string) => (
+        <span className="inline-flex items-center gap-1.5">
+          <BranchesOutlined className="text-gray-400" />
+          <Text>{formatStrategyLabel(strategy)}</Text>
+        </span>
+      ),
+    },
+    {
+      title: "ACTIONS",
+      key: "actions",
+      width: 120,
+      align: "right",
+      render: (_, group) => (
+        <Flex justify="flex-end" align="center" gap={8}>
+          <Tooltip title="Edit">
+            <Button
+              type="text"
+              icon={<EditOutlined />}
+              onClick={(e) => {
+                e.stopPropagation();
+                onEdit(group);
+              }}
+            />
+          </Tooltip>
+          <Tooltip title="Delete">
+            <Button
+              type="text"
+              danger
+              icon={<DeleteOutlined />}
+              onClick={(e) => {
+                e.stopPropagation();
+                onDelete(group);
+              }}
+            />
+          </Tooltip>
+        </Flex>
+      ),
+    },
+  ];
+
+  return (
+    <Table<RoutingGroup>
+      rowKey="group_name"
+      columns={columns}
+      dataSource={groups}
+      loading={loading}
+      pagination={false}
+      expandable={{
+        expandedRowKeys,
+        onExpandedRowsChange: (keys) => setExpandedRowKeys([...keys]),
+        expandedRowRender: (group) => (
+          <div className="bg-gray-50 border border-gray-200 rounded-md p-4 my-2">
+            <Flex align="center" gap={8} className="mb-2">
+              <CodeOutlined className="text-blue-500" />
+              <Text strong>How routing works for this group</Text>
+            </Flex>
+            <Paragraph className="text-sm text-gray-600 mb-3">
+              Callers request any model in the group by name — LiteLLM picks a deployment behind the
+              scenes using the{" "}
+              <Text strong>{formatStrategyLabel(group.routing_strategy)}</Text> strategy.
+            </Paragraph>
+            <RoutingGroupSnippet group={group} baseUrl={baseUrl} />
+          </div>
+        ),
+      }}
+    />
+  );
+};
+
+export default RoutingGroupsTable;
--- a/ui/litellm-dashboard/src/components/routing_groups/index.tsx
+++ b/ui/litellm-dashboard/src/components/routing_groups/index.tsx
@ -0,0 +1,177 @@
+"use client";
+
+import React, { useMemo, useState } from "react";
+import { Button, Card, Flex, Input, Modal, Space, Typography } from "antd";
+import { PlusOutlined, ReloadOutlined, SearchOutlined } from "@ant-design/icons";
+import { useRoutingGroups, useSaveRoutingGroups } from "@/app/(dashboard)/hooks/routingGroups/useRoutingGroups";
+import { useRouterFields } from "@/app/(dashboard)/hooks/router/useRouterFields";
+import { useModelHub } from "@/app/(dashboard)/hooks/models/useModels";
+import useProxySettings from "@/app/(dashboard)/hooks/proxySettings/useProxySettings";
+import RoutingGroupsTable from "./RoutingGroupsTable";
+import RoutingGroupModal from "./RoutingGroupModal";
+import NotificationsManager from "../molecules/notifications_manager";
+import type { RoutingGroup } from "./types";
+
+const { Text } = Typography;
+
+const RoutingGroups: React.FC = () => {
+  const { data, isLoading, refetch, isFetching } = useRoutingGroups();
+  const { data: routerFields } = useRouterFields();
+  const { data: modelHub } = useModelHub();
+  const proxySettings = useProxySettings();
+  const saveMutation = useSaveRoutingGroups();
+
+  const [searchQuery, setSearchQuery] = useState("");
+  const [drawerOpen, setDrawerOpen] = useState(false);
+  const [drawerMode, setDrawerMode] = useState<"create" | "edit">("create");
+  const [editingGroup, setEditingGroup] = useState<RoutingGroup | null>(null);
+  const [deletingGroup, setDeletingGroup] = useState<RoutingGroup | null>(null);
+
+  const groups = data?.routingGroups ?? [];
+
+  const filteredGroups = useMemo(() => {
+    const q = searchQuery.trim().toLowerCase();
+    if (!q) return groups;
+    return groups.filter(
+      (g) =>
+        g.group_name.toLowerCase().includes(q) ||
+        g.routing_strategy.toLowerCase().includes(q) ||
+        g.models.some((m) => m.toLowerCase().includes(q)),
+    );
+  }, [groups, searchQuery]);
+
+  const availableStrategies = useMemo(() => {
+    if (data?.availableStrategies?.length) return data.availableStrategies;
+    const fromFields = routerFields?.fields?.find((f) => f.field_name === "routing_strategy")?.options;
+    return fromFields ?? [];
+  }, [data?.availableStrategies, routerFields]);
+
+  const strategyDescriptions = routerFields?.routing_strategy_descriptions ?? {};
+
+  const modelOptions = useMemo<string[]>(() => {
+    const records = (modelHub?.data ?? []) as Array<{ model_group?: string }>;
+    const names = records.map((r) => r.model_group).filter((n): n is string => Boolean(n));
+    return Array.from(new Set(names));
+  }, [modelHub]);
+
+  const openCreate = () => {
+    setDrawerMode("create");
+    setEditingGroup(null);
+    setDrawerOpen(true);
+  };
+
+  const openEdit = (group: RoutingGroup) => {
+    setDrawerMode("edit");
+    setEditingGroup(group);
+    setDrawerOpen(true);
+  };
+
+  const handleSubmit = async (incoming: RoutingGroup) => {
+    const next: RoutingGroup[] =
+      drawerMode === "create"
+        ? [...groups, incoming]
+        : groups.map((g) => (g.group_name === editingGroup?.group_name ? incoming : g));
+
+    try {
+      await saveMutation.mutateAsync(next);
+      NotificationsManager.success(
+        drawerMode === "create"
+          ? `Created routing group "${incoming.group_name}"`
+          : `Updated routing group "${incoming.group_name}"`,
+      );
+      setDrawerOpen(false);
+    } catch (err) {
+      NotificationsManager.error(
+        err instanceof Error ? err.message : "Failed to save routing group",
+      );
+    }
+  };
+
+  const confirmDelete = async () => {
+    if (!deletingGroup) return;
+    const next = groups.filter((g) => g.group_name !== deletingGroup.group_name);
+    try {
+      await saveMutation.mutateAsync(next);
+      NotificationsManager.success(`Deleted routing group "${deletingGroup.group_name}"`);
+      setDeletingGroup(null);
+    } catch (err) {
+      NotificationsManager.error(
+        err instanceof Error ? err.message : "Failed to delete routing group",
+      );
+    }
+  };
+
+  return (
+    <Space direction="vertical" size={16} className="w-full">
+      <Card bodyStyle={{ padding: 16 }}>
+        <Flex justify="space-between" align="center" gap={12} className="mb-4">
+          <Input
+            allowClear
+            prefix={<SearchOutlined className="text-gray-400" />}
+            placeholder="Search groups..."
+            value={searchQuery}
+            onChange={(e) => setSearchQuery(e.target.value)}
+            className="max-w-sm"
+          />
+          <Flex align="center" gap={12}>
+            <Button
+              icon={<ReloadOutlined />}
+              onClick={() => refetch()}
+              loading={isFetching && !isLoading}
+            >
+              Refresh
+            </Button>
+            <Button type="primary" icon={<PlusOutlined />} onClick={openCreate}>
+              Create Group
+            </Button>
+            <Text type="secondary" className="text-sm whitespace-nowrap">
+              Showing {filteredGroups.length} {filteredGroups.length === 1 ? "result" : "results"}
+            </Text>
+          </Flex>
+        </Flex>
+
+        <RoutingGroupsTable
+          groups={filteredGroups}
+          loading={isLoading}
+          onEdit={openEdit}
+          onDelete={(g) => setDeletingGroup(g)}
+          proxyBaseUrl={
+            proxySettings.LITELLM_UI_API_DOC_BASE_URL?.trim() ||
+            proxySettings.PROXY_BASE_URL ||
+            ""
+          }
+        />
+      </Card>
+
+      <RoutingGroupModal
+        open={drawerOpen}
+        mode={drawerMode}
+        initialValue={editingGroup}
+        availableStrategies={availableStrategies}
+        strategyDescriptions={strategyDescriptions}
+        modelOptions={modelOptions}
+        existingGroupNames={groups.map((g) => g.group_name)}
+        onClose={() => setDrawerOpen(false)}
+        onSubmit={handleSubmit}
+        saving={saveMutation.isPending}
+      />
+
+      <Modal
+        open={Boolean(deletingGroup)}
+        title="Delete routing group?"
+        okText="Delete"
+        okButtonProps={{ danger: true, loading: saveMutation.isPending }}
+        cancelText="Cancel"
+        onOk={confirmDelete}
+        onCancel={() => setDeletingGroup(null)}
+      >
+        <Text>
+          Models in <Text strong>{deletingGroup?.group_name}</Text> will fall back to the proxy&apos;s
+          top-level routing strategy. This cannot be undone.
+        </Text>
+      </Modal>
+    </Space>
+  );
+};
+
+export default RoutingGroups;
--- a/ui/litellm-dashboard/src/components/routing_groups/types.ts
+++ b/ui/litellm-dashboard/src/components/routing_groups/types.ts
@ -0,0 +1,12 @@
+export type RoutingStrategy =
+  | "simple-shuffle"
+  | "least-busy"
+  | "usage-based-routing"
+  | "latency-based-routing";
+
+export interface RoutingGroup {
+  group_name: string;
+  models: string[];
+  routing_strategy: RoutingStrategy | string;
+  routing_strategy_args?: Record<string, unknown> | null;
+}
--- a/ui/litellm-dashboard/tests/setupTests.ts
+++ b/ui/litellm-dashboard/tests/setupTests.ts
@ -3,6 +3,91 @@ import { cleanup } from "@testing-library/react";
 import React from "react";
 import { afterEach, vi } from "vitest";

+const ensureTestLocalStorage = () => {
+  if (typeof window === "undefined" || typeof window.Storage === "undefined") {
+    return;
+  }
+
+  if (typeof window.localStorage?.getItem === "function" && typeof window.localStorage?.clear === "function") {
+    return;
+  }
+
+  const storageStores = new WeakMap<Storage, Map<string, string>>();
+  const storagePrototype = window.Storage.prototype;
+  const getStore = (storage: Storage) => {
+    let store = storageStores.get(storage);
+    if (store === undefined) {
+      store = new Map<string, string>();
+      storageStores.set(storage, store);
+    }
+    return store;
+  };
+
+  Object.defineProperties(storagePrototype, {
+    getItem: {
+      configurable: true,
+      writable: true,
+      value(this: Storage, key: string) {
+        const store = getStore(this);
+        const normalizedKey = String(key);
+        return store.has(normalizedKey) ? store.get(normalizedKey)! : null;
+      },
+    },
+    setItem: {
+      configurable: true,
+      writable: true,
+      value(this: Storage, key: string, value: string) {
+        const store = getStore(this);
+        store.set(String(key), String(value));
+      },
+    },
+    removeItem: {
+      configurable: true,
+      writable: true,
+      value(this: Storage, key: string) {
+        const store = getStore(this);
+        store.delete(String(key));
+      },
+    },
+    clear: {
+      configurable: true,
+      writable: true,
+      value(this: Storage) {
+        const store = getStore(this);
+        store.clear();
+      },
+    },
+    key: {
+      configurable: true,
+      writable: true,
+      value(this: Storage, index: number) {
+        const store = getStore(this);
+        return Array.from(store.keys())[index] ?? null;
+      },
+    },
+  });
+
+  const localStorage = Object.create(storagePrototype);
+  storageStores.set(localStorage, new Map<string, string>());
+  Object.defineProperty(localStorage, "length", {
+    configurable: true,
+    get() {
+      return getStore(localStorage).size;
+    },
+  });
+
+  Object.defineProperty(window, "localStorage", {
+    configurable: true,
+    value: localStorage,
+  });
+  Object.defineProperty(globalThis, "localStorage", {
+    configurable: true,
+    value: localStorage,
+  });
+};
+
+ensureTestLocalStorage();
+
 // Global mock for NotificationManager to prevent React rendering issues in tests
 // This avoids "window is not defined" errors when notifications try to render
 // after test environment is torn down
@ -31,7 +116,15 @@ vi.mock("@tremor/react", async (importOriginal) => {
      return React.createElement(React.Fragment, null, children);
    },
    // Render as a plain checkbox so toggle interactions are testable without Tremor internals
-    Switch: ({ checked, onChange, className }: { checked?: boolean; onChange?: (v: boolean) => void; className?: string }) =>
+    Switch: ({
+      checked,
+      onChange,
+      className,
+    }: {
+      checked?: boolean;
+      onChange?: (v: boolean) => void;
+      className?: string;
+    }) =>
      React.createElement("input", {
        type: "checkbox",
        role: "switch",
--- a/ui/litellm-dashboard/tsconfig.json
+++ b/ui/litellm-dashboard/tsconfig.json
@ -14,7 +14,7 @@
    "moduleResolution": "bundler",
    "resolveJsonModule": true,
    "isolatedModules": true,
-    "jsx": "preserve",
+    "jsx": "react-jsx",
    "incremental": true,
    "plugins": [
      {
--- a/ui/litellm-dashboard/tsconfig.tsbuildinfo
+++ b/ui/litellm-dashboard/tsconfig.tsbuildinfo
--- a/uv.lock
+++ b/uv.lock