Guardrails - add logging to all unified_guardrails + link to custom code guardrail templates (#20900)

* feat(guardrail_hooks/): add guardrail logging to all unified guardrails

ensures unified guardrails use the 'log_guardrail_information' decorator for logging

* fix(custom_guardrail.py): don't log inputs on guardrail response - just emit state

* refactor: don't double log bedrock guardrail information

* feat: add in-product nudges for contributing + trying community custom code guardrails

allows users to contribute / share custom code guardrails
This commit is contained in:
Krish Dholakia 2026-02-10 15:13:54 -08:00 committed by GitHub
parent a6f90586ac
commit 10d891a365
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
53 changed files with 418 additions and 116 deletions

View File

@ -2277,6 +2277,7 @@ jobs:
- run: python ./tests/code_coverage_tests/router_code_coverage.py
- run: python ./tests/code_coverage_tests/test_chat_completion_imports.py
- run: python ./tests/code_coverage_tests/info_log_check.py
- run: python ./tests/code_coverage_tests/check_guardrail_apply_decorator.py
- run: python ./tests/code_coverage_tests/test_ban_set_verbose.py
- run: python ./tests/code_coverage_tests/code_qa_check_tests.py
- run: python ./tests/code_coverage_tests/check_get_model_cost_key_performance.py

View File

@ -616,6 +616,7 @@ class CustomGuardrail(CustomLogger):
end_time: Optional[float] = None,
duration: Optional[float] = None,
event_type: Optional[GuardrailEventHooks] = None,
original_inputs: Optional[Dict] = None,
):
"""
Add StandardLoggingGuardrailInformation to the request data
@ -625,6 +626,17 @@ class CustomGuardrail(CustomLogger):
# Convert None to empty dict to satisfy type requirements
guardrail_response = {} if response is None else response
# For apply_guardrail functions in custom_code_guardrail scenario,
# simplify the logged response to "allow", "deny", or "mask"
if original_inputs is not None and isinstance(response, dict):
# Check if inputs were modified by comparing them
if self._inputs_were_modified(original_inputs, response):
guardrail_response = "mask"
else:
guardrail_response = "allow"
verbose_logger.debug(f"Guardrail response: {response}")
self.add_standard_logging_guardrail_information_to_request_data(
guardrail_json_response=guardrail_response,
request_data=request_data,
@ -650,8 +662,14 @@ class CustomGuardrail(CustomLogger):
This gets logged on downsteam Langfuse, DataDog, etc.
"""
# For custom_code_guardrail scenario, log as "deny" instead of full exception
# Check if this is from custom_code_guardrail by checking the class name
guardrail_response: Union[Exception, str] = e
if "CustomCodeGuardrail" in self.__class__.__name__:
guardrail_response = "deny"
self.add_standard_logging_guardrail_information_to_request_data(
guardrail_json_response=e,
guardrail_json_response=guardrail_response,
request_data=request_data,
guardrail_status="guardrail_failed_to_respond",
duration=duration,
@ -661,6 +679,25 @@ class CustomGuardrail(CustomLogger):
)
raise e
def _inputs_were_modified(self, original_inputs: Dict, response: Dict) -> bool:
"""
Compare original inputs with response to determine if content was modified.
Returns True if the inputs were modified (mask scenario), False otherwise (allow scenario).
"""
# Get all keys from both dictionaries
all_keys = set(original_inputs.keys()) | set(response.keys())
# Compare each key's value
for key in all_keys:
original_value = original_inputs.get(key)
response_value = response.get(key)
if original_value != response_value:
return True
# No modifications detected
return False
def mask_content_in_string(
self,
content_string: str,
@ -768,6 +805,12 @@ def log_guardrail_information(func):
self: CustomGuardrail = args[0]
request_data: dict = kwargs.get("data") or kwargs.get("request_data") or {}
event_type = _infer_event_type_from_function_name(func.__name__)
# Store original inputs for comparison (for apply_guardrail functions)
original_inputs = None
if func.__name__ == "apply_guardrail" and "inputs" in kwargs:
original_inputs = kwargs.get("inputs")
try:
response = await func(*args, **kwargs)
return self._process_response(
@ -777,6 +820,7 @@ def log_guardrail_information(func):
end_time=datetime.now().timestamp(),
duration=(datetime.now() - start_time).total_seconds(),
event_type=event_type,
original_inputs=original_inputs,
)
except Exception as e:
return self._process_error(
@ -794,6 +838,12 @@ def log_guardrail_information(func):
self: CustomGuardrail = args[0]
request_data: dict = kwargs.get("data") or kwargs.get("request_data") or {}
event_type = _infer_event_type_from_function_name(func.__name__)
# Store original inputs for comparison (for apply_guardrail functions)
original_inputs = None
if func.__name__ == "apply_guardrail" and "inputs" in kwargs:
original_inputs = kwargs.get("inputs")
try:
response = func(*args, **kwargs)
return self._process_response(
@ -801,6 +851,7 @@ def log_guardrail_information(func):
request_data=request_data,
duration=(datetime.now() - start_time).total_seconds(),
event_type=event_type,
original_inputs=original_inputs,
)
except Exception as e:
return self._process_error(

View File

@ -795,9 +795,9 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
#########################################################
########## 1. Make the Bedrock API request ##########
#########################################################
bedrock_guardrail_response: Optional[
Union[BedrockGuardrailResponse, str]
] = None
bedrock_guardrail_response: Optional[Union[BedrockGuardrailResponse, str]] = (
None
)
try:
bedrock_guardrail_response = await self.make_bedrock_api_request(
source="INPUT", messages=filtered_messages, request_data=data
@ -867,9 +867,9 @@ class BedrockGuardrail(CustomGuardrail, BaseAWSLLM):
#########################################################
########## 1. Make the Bedrock API request ##########
#########################################################
bedrock_guardrail_response: Optional[
Union[BedrockGuardrailResponse, str]
] = None
bedrock_guardrail_response: Optional[Union[BedrockGuardrailResponse, str]] = (
None
)
try:
bedrock_guardrail_response = await self.make_bedrock_api_request(
source="INPUT", messages=filtered_messages, request_data=data

View File

@ -35,7 +35,10 @@ from typing import TYPE_CHECKING, Any, Dict, Literal, Optional, Type, cast
from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_guardrail import (
CustomGuardrail,
log_guardrail_information,
)
from litellm.types.guardrails import GuardrailEventHooks
from litellm.types.proxy.guardrails.guardrail_hooks.base import GuardrailConfigModel
from litellm.types.utils import GenericGuardrailAPIInputs
@ -179,6 +182,7 @@ class CustomCodeGuardrail(CustomGuardrail):
self._compile_error = f"Failed to compile custom code: {e}"
raise CustomCodeCompilationError(self._compile_error) from e
@log_guardrail_information
async def apply_guardrail(
self,
inputs: GenericGuardrailAPIInputs,

View File

@ -23,7 +23,10 @@ import httpx
import litellm
from litellm._logging import verbose_proxy_logger
from litellm.caching.caching import DualCache
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_guardrail import (
CustomGuardrail,
log_guardrail_information,
)
from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
@ -483,6 +486,7 @@ class EnkryptAIGuardrails(CustomGuardrail):
request_data=data, guardrail_name=self.guardrail_name
)
@log_guardrail_information
async def apply_guardrail(
self,
inputs: "GenericGuardrailAPIInputs",

View File

@ -10,7 +10,10 @@ from typing import TYPE_CHECKING, Any, Dict, Literal, Optional
from litellm._logging import verbose_proxy_logger
from litellm.exceptions import GuardrailRaisedException
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_guardrail import (
CustomGuardrail,
log_guardrail_information,
)
from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
@ -150,6 +153,7 @@ class GenericGuardrailAPI(CustomGuardrail):
return result_metadata
@log_guardrail_information
async def apply_guardrail(
self,
inputs: GenericGuardrailAPIInputs,

View File

@ -9,7 +9,8 @@ from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_guardrail import (
CustomGuardrail,
ModifyResponseException
ModifyResponseException,
log_guardrail_information,
)
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
from litellm.litellm_core_utils.safe_json_loads import safe_json_loads
@ -108,7 +109,9 @@ class GraySwanGuardrail(CustomGuardrail):
self.categories = categories
self.policy_id = policy_id
self.fail_open = True if fail_open is None else bool(fail_open)
self.guardrail_timeout = 30.0 if guardrail_timeout is None else float(guardrail_timeout)
self.guardrail_timeout = (
30.0 if guardrail_timeout is None else float(guardrail_timeout)
)
# Streaming configuration
self.streaming_end_of_stream_only = streaming_end_of_stream_only
@ -155,6 +158,7 @@ class GraySwanGuardrail(CustomGuardrail):
# Unified Guardrail Interface (works with ALL endpoints automatically)
# ------------------------------------------------------------------
@log_guardrail_information
async def apply_guardrail(
self,
inputs: GenericGuardrailAPIInputs,
@ -208,7 +212,9 @@ class GraySwanGuardrail(CustomGuardrail):
messages = [{"role": role, "content": text} for text in texts]
# Get dynamic params from request metadata
dynamic_body = self.get_guardrail_dynamic_request_body_params(request_data) or {}
dynamic_body = (
self.get_guardrail_dynamic_request_body_params(request_data) or {}
)
if dynamic_body:
verbose_proxy_logger.debug(
"Gray Swan Guardrail: dynamic extra_body=%s", safe_dumps(dynamic_body)
@ -271,12 +277,12 @@ class GraySwanGuardrail(CustomGuardrail):
async def run_grayswan_guardrail(self, payload: dict) -> Dict[str, Any]:
"""
Run the GraySwan guardrail on a payload.
This is a legacy method for testing purposes.
Args:
payload: The payload to scan
Returns:
Dict containing the GraySwan API response
"""
@ -293,11 +299,11 @@ class GraySwanGuardrail(CustomGuardrail):
) -> None:
"""
Legacy method for processing GraySwan API responses.
This method is maintained for backward compatibility with existing tests.
It handles the test scenarios where responses need to be processed with
knowledge of the request context (pre/during/post call hooks).
Args:
response_json: Response from GraySwan API
data: Optional request data (for passthrough exceptions)
@ -365,7 +371,10 @@ class GraySwanGuardrail(CustomGuardrail):
)
# If hook_type is provided and in pre/during call, raise exception
if hook_type in [GuardrailEventHooks.pre_call, GuardrailEventHooks.during_call]:
if hook_type in [
GuardrailEventHooks.pre_call,
GuardrailEventHooks.during_call,
]:
# Raise ModifyResponseException to short-circuit LLM call
if data is None:
data = {}
@ -540,7 +549,9 @@ class GraySwanGuardrail(CustomGuardrail):
if isinstance(litellm_metadata, dict) and litellm_metadata:
cleaned_litellm_metadata = dict(litellm_metadata)
# cleaned_litellm_metadata.pop("user_api_key_auth", None)
sanitized = safe_json_loads(safe_dumps(cleaned_litellm_metadata), default={})
sanitized = safe_json_loads(
safe_dumps(cleaned_litellm_metadata), default={}
)
if isinstance(sanitized, dict) and sanitized:
payload["litellm_metadata"] = sanitized
@ -566,7 +577,9 @@ class GraySwanGuardrail(CustomGuardrail):
detection_info = detection_info[0]
# Extract fields from detection_info dict
detection_dict: dict = detection_info if isinstance(detection_info, dict) else {}
detection_dict: dict = (
detection_info if isinstance(detection_info, dict) else {}
)
violation_score = detection_dict.get("violation_score", 0.0)
violated_rules = detection_dict.get("violated_rules", [])
mutation = detection_dict.get("mutation", False)
@ -582,7 +595,9 @@ class GraySwanGuardrail(CustomGuardrail):
if violated_rules:
formatted_rules = self._format_violated_rules(violated_rules)
if formatted_rules:
message_parts.append(f"It was violating the rule(s): {formatted_rules}.")
message_parts.append(
f"It was violating the rule(s): {formatted_rules}."
)
if mutation:
message_parts.append(
@ -590,9 +605,7 @@ class GraySwanGuardrail(CustomGuardrail):
)
if ipi:
message_parts.append(
"Indirect Prompt Injection was DETECTED."
)
message_parts.append("Indirect Prompt Injection was DETECTED.")
return "\n".join(message_parts)

View File

@ -10,7 +10,10 @@ from httpx import HTTPStatusError
from requests.auth import HTTPBasicAuth
from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_guardrail import (
CustomGuardrail,
log_guardrail_information,
)
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
@ -110,6 +113,7 @@ class HiddenlayerGuardrail(CustomGuardrail):
)
super().__init__(**kwargs)
@log_guardrail_information
async def apply_guardrail(
self,
inputs: GenericGuardrailAPIInputs,

View File

@ -28,7 +28,10 @@ from fastapi import HTTPException
from litellm import Router
from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_guardrail import (
CustomGuardrail,
log_guardrail_information,
)
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.utils import ModelResponseStream
@ -50,6 +53,7 @@ from litellm.types.proxy.guardrails.guardrail_hooks.litellm_content_filter impor
ContentFilterDetection,
PatternDetection,
)
from .patterns import PATTERN_EXTRA_CONFIG, get_compiled_pattern
MAX_KEYWORD_VALUE_GAP_WORDS = 1
@ -168,9 +172,9 @@ class ContentFilterGuardrail(CustomGuardrail):
self.image_model = image_model
# Store loaded categories
self.loaded_categories: Dict[str, CategoryConfig] = {}
self.category_keywords: Dict[
str, Tuple[str, str, ContentFilterAction]
] = {} # keyword -> (category, severity, action)
self.category_keywords: Dict[str, Tuple[str, str, ContentFilterAction]] = (
{}
) # keyword -> (category, severity, action)
# Load categories if provided
if categories:
@ -994,6 +998,7 @@ class ContentFilterGuardrail(CustomGuardrail):
masked_entity_count=masked_entity_count,
)
@log_guardrail_information
async def apply_guardrail(
self,
inputs: "GenericGuardrailAPIInputs",

View File

@ -12,7 +12,10 @@ import httpx
from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_guardrail import (
CustomGuardrail,
log_guardrail_information,
)
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
@ -26,7 +29,11 @@ if TYPE_CHECKING:
class OnyxGuardrail(CustomGuardrail):
def __init__(
self, api_base: Optional[str] = None, api_key: Optional[str] = None, timeout: Optional[float] = 10.0, **kwargs
self,
api_base: Optional[str] = None,
api_key: Optional[str] = None,
timeout: Optional[float] = 10.0,
**kwargs,
):
timeout = timeout or int(os.getenv("ONYX_TIMEOUT", 10.0))
self.async_handler = get_async_httpx_client(
@ -79,6 +86,7 @@ class OnyxGuardrail(CustomGuardrail):
)
return result
@log_guardrail_information
async def apply_guardrail(
self,
inputs: GenericGuardrailAPIInputs,

View File

@ -58,7 +58,9 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
guardrail_name: str,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
model: Optional[Literal["omni-moderation-latest", "text-moderation-latest"]] = None,
model: Optional[
Literal["omni-moderation-latest", "text-moderation-latest"]
] = None,
**kwargs,
):
"""Initialize OpenAI Moderation guardrail handler."""
@ -75,7 +77,7 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
supported_event_hooks=supported_event_hooks,
**kwargs,
)
self.async_handler = get_async_httpx_client(
llm_provider=httpxSpecialProvider.GuardrailCallback
)
@ -83,10 +85,14 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
# Store configuration
self.api_key = api_key or self._get_api_key()
self.api_base = api_base or "https://api.openai.com/v1"
self.model: Literal["omni-moderation-latest", "text-moderation-latest"] = model or "omni-moderation-latest"
self.model: Literal["omni-moderation-latest", "text-moderation-latest"] = (
model or "omni-moderation-latest"
)
if not self.api_key:
raise ValueError("OpenAI Moderation: api_key is required. Set OPENAI_API_KEY environment variable or pass it in configuration.")
raise ValueError(
"OpenAI Moderation: api_key is required. Set OPENAI_API_KEY environment variable or pass it in configuration."
)
verbose_proxy_logger.debug(
f"Initialized OpenAI Moderation Guardrail: {guardrail_name} with model: {self.model}"
@ -98,7 +104,7 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
import litellm
from litellm.secret_managers.main import get_secret_str
return (
os.environ.get("OPENAI_API_KEY")
or litellm.api_key
@ -106,21 +112,14 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
or get_secret_str("OPENAI_API_KEY")
)
async def async_make_request(
self, input_text: str
) -> "OpenAIModerationResponse":
async def async_make_request(self, input_text: str) -> "OpenAIModerationResponse":
"""
Make a request to the OpenAI Moderation API.
"""
request_body = {
"model": self.model,
"input": input_text
}
verbose_proxy_logger.debug(
"OpenAI Moderation guard request: %s", request_body
)
request_body = {"model": self.model, "input": input_text}
verbose_proxy_logger.debug("OpenAI Moderation guard request: %s", request_body)
response = await self.async_handler.post(
url=f"{self.api_base}/moderations",
headers={
@ -133,7 +132,7 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
verbose_proxy_logger.debug(
"OpenAI Moderation guard response: %s", response.json()
)
if response.status_code != 200:
raise HTTPException(
status_code=response.status_code,
@ -144,9 +143,12 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
)
from litellm.types.llms.openai import OpenAIModerationResponse
return OpenAIModerationResponse(**response.json())
def _check_moderation_result(self, moderation_response: "OpenAIModerationResponse") -> None:
def _check_moderation_result(
self, moderation_response: "OpenAIModerationResponse"
) -> None:
"""
Check if the moderation response indicates harmful content and raise exception if needed.
"""
@ -168,10 +170,10 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
}
verbose_proxy_logger.warning(
"OpenAI Moderation: Content flagged for violations: %s",
violation_details
"OpenAI Moderation: Content flagged for violations: %s",
violation_details,
)
raise HTTPException(
status_code=400,
detail={
@ -180,6 +182,7 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
},
)
@log_guardrail_information
async def apply_guardrail(
self,
inputs: GenericGuardrailAPIInputs,
@ -189,51 +192,50 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
) -> GenericGuardrailAPIInputs:
"""
Apply OpenAI moderation guardrail using the unified guardrail interface.
This method is called by the UnifiedLLMGuardrails system for all endpoint types
(chat completions, embeddings, responses API, etc.).
Args:
inputs: GenericGuardrailAPIInputs containing texts and/or structured_messages
request_data: The original request data
input_type: Whether this is a "request" (pre-call) or "response" (post-call)
logging_obj: Optional logging object
Returns:
The inputs unchanged (moderation doesn't modify content, only blocks)
Raises:
HTTPException: If content violates moderation policy
"""
# Extract text to moderate from inputs
text_to_moderate: Optional[str] = None
# Prefer structured_messages if available (has role context)
if structured_messages := inputs.get("structured_messages"):
text_to_moderate = self.get_user_prompt(structured_messages)
# Fall back to texts
if not text_to_moderate:
if texts := inputs.get("texts"):
# Join all texts for moderation
text_to_moderate = "\n".join(texts)
if not text_to_moderate:
verbose_proxy_logger.debug(
"OpenAI Moderation: No text content to moderate in inputs"
)
return inputs
# Make moderation request
moderation_response = await self.async_make_request(input_text=text_to_moderate)
# Check if content is flagged and raise exception if needed
self._check_moderation_result(moderation_response)
# Moderation doesn't modify content, just blocks - return inputs unchanged
return inputs
@log_guardrail_information
async def async_post_call_streaming_iterator_hook(
self,
@ -252,9 +254,7 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
from litellm.main import stream_chunk_builder
from litellm.types.utils import TextCompletionResponse
verbose_proxy_logger.debug(
"OpenAI Moderation: Running streaming response scan"
)
verbose_proxy_logger.debug("OpenAI Moderation: Running streaming response scan")
# Collect all chunks to process them together
all_chunks: List["ModelResponseStream"] = []
@ -269,7 +269,7 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
)
if isinstance(assembled_model_response, (type(None), TextCompletionResponse)):
# If we can't assemble a ModelResponse or it's a text completion,
# If we can't assemble a ModelResponse or it's a text completion,
# just yield the original chunks without moderation
verbose_proxy_logger.warning(
"OpenAI Moderation: Could not assemble ModelResponse from chunks, skipping moderation"
@ -284,19 +284,17 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
verbose_proxy_logger.debug(
f"OpenAI Moderation: Streaming response text: {response_text[:100]}..." # Log first 100 chars
)
# Make moderation request - this will raise HTTPException if content is flagged
moderation_response = await self.async_make_request(
input_text=response_text,
)
# Check if content is flagged and raise exception if needed
self._check_moderation_result(moderation_response)
# If we reach here, content passed moderation - yield the original chunks
mock_response = MockResponseIterator(
model_response=assembled_model_response
)
mock_response = MockResponseIterator(model_response=assembled_model_response)
# Return the reconstructed stream
async for chunk in mock_response:
@ -306,34 +304,34 @@ class OpenAIModerationGuardrail(OpenAIGuardrailBase, CustomGuardrail):
"""
Extract text content from the model response for moderation.
"""
if not hasattr(response, 'choices') or not response.choices:
if not hasattr(response, "choices") or not response.choices:
return None
response_texts = []
for choice in response.choices:
try:
# Try to get content from message (chat completion)
message = getattr(choice, 'message', None)
message = getattr(choice, "message", None)
if message:
content = getattr(message, 'content', None)
content = getattr(message, "content", None)
if content and isinstance(content, str):
response_texts.append(content)
continue
# Try to get text (text completion)
text = getattr(choice, 'text', None)
text = getattr(choice, "text", None)
if text and isinstance(text, str):
response_texts.append(text)
continue
# Try to get content from delta (streaming)
delta = getattr(choice, 'delta', None)
delta = getattr(choice, "delta", None)
if delta:
content = getattr(delta, 'content', None)
content = getattr(delta, "content", None)
if content and isinstance(content, str):
response_texts.append(content)
continue
except (AttributeError, TypeError):
# Skip choices that don't have expected attributes
continue

View File

@ -9,10 +9,10 @@
import asyncio
import threading
import json
from datetime import datetime
import threading
from contextlib import asynccontextmanager
from datetime import datetime
from typing import (
TYPE_CHECKING,
Any,
@ -39,7 +39,10 @@ if TYPE_CHECKING:
from litellm._uuid import uuid
from litellm.caching.caching import DualCache
from litellm.exceptions import BlockedPiiEntityError
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_guardrail import (
CustomGuardrail,
log_guardrail_information,
)
from litellm.proxy._types import UserAPIKeyAuth
from litellm.types.guardrails import (
GuardrailEventHooks,
@ -568,9 +571,9 @@ class _OPTIONAL_PresidioPIIMasking(CustomGuardrail):
if messages is None:
return data
tasks = []
task_mappings: List[
Tuple[int, Optional[int]]
] = [] # Track (message_index, content_index) for each task
task_mappings: List[Tuple[int, Optional[int]]] = (
[]
) # Track (message_index, content_index) for each task
for msg_idx, m in enumerate(messages):
content = m.get("content", None)
@ -671,9 +674,9 @@ class _OPTIONAL_PresidioPIIMasking(CustomGuardrail):
): # /chat/completions requests
messages: Optional[List] = kwargs.get("messages", None)
tasks = []
task_mappings: List[
Tuple[int, Optional[int]]
] = [] # Track (message_index, content_index) for each task
task_mappings: List[Tuple[int, Optional[int]]] = (
[]
) # Track (message_index, content_index) for each task
if messages is None:
return kwargs, result
@ -792,11 +795,11 @@ class _OPTIONAL_PresidioPIIMasking(CustomGuardrail):
# Type narrowing: StreamingChoices doesn't have .message attribute
if not hasattr(choice, "message"):
continue
content = getattr(choice.message, "content", None)
content = getattr(choice.message, "content", None) # type: ignore
if content is None:
continue
if isinstance(content, str):
choice.message.content = await self.check_pii(
choice.message.content = await self.check_pii( # type: ignore
text=content,
output_parse_pii=False,
presidio_config=presidio_config,
@ -989,6 +992,7 @@ class _OPTIONAL_PresidioPIIMasking(CustomGuardrail):
except Exception:
pass
@log_guardrail_information
async def apply_guardrail(
self,
inputs: "GenericGuardrailAPIInputs",

View File

@ -6,7 +6,10 @@ from typing import TYPE_CHECKING, Any, List, Literal, Optional, Type
from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_guardrail import (
CustomGuardrail,
log_guardrail_information,
)
from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
@ -67,6 +70,7 @@ class PromptSecurityGuardrail(CustomGuardrail):
super().__init__(**kwargs)
@log_guardrail_information
async def apply_guardrail(
self,
inputs: GenericGuardrailAPIInputs,

View File

@ -12,10 +12,11 @@ from typing import Any, Dict, List, Literal, Optional, Type
from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.litellm_core_utils.litellm_logging import (
Logging as LiteLLMLoggingObj,
from litellm.integrations.custom_guardrail import (
CustomGuardrail,
log_guardrail_information,
)
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
@ -343,9 +344,7 @@ class QualifireGuardrail(CustomGuardrail):
)
url = f"{self.qualifire_api_base}/api/evaluation/evaluate"
verbose_proxy_logger.debug(
f"Qualifire Guardrail: Making request to {url}"
)
verbose_proxy_logger.debug(f"Qualifire Guardrail: Making request to {url}")
# Make the API request
response = await self.async_handler.post(
@ -393,6 +392,7 @@ class QualifireGuardrail(CustomGuardrail):
verbose_proxy_logger.exception(f"Qualifire Guardrail error: {e}")
raise
@log_guardrail_information
async def apply_guardrail(
self,
inputs: GenericGuardrailAPIInputs,

View File

@ -9,7 +9,10 @@ from typing import TYPE_CHECKING, Literal, Optional
from fastapi import HTTPException
from litellm._logging import verbose_proxy_logger
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_guardrail import (
CustomGuardrail,
log_guardrail_information,
)
from litellm.llms.custom_httpx.http_handler import (
get_async_httpx_client,
httpxSpecialProvider,
@ -70,6 +73,7 @@ class ZscalerAIGuard(CustomGuardrail):
return str(value).strip()
return "N/A"
@log_guardrail_information
async def apply_guardrail(
self,
inputs: "GenericGuardrailAPIInputs",
@ -92,7 +96,7 @@ class ZscalerAIGuard(CustomGuardrail):
Raises:
Exception: If content is blocked by Zscaler AI Guard
"""
texts = inputs.get("texts", [])
try:
verbose_proxy_logger.debug(f"ZscalerAIGuard: Checking {len(texts)} text(s)")
@ -102,8 +106,8 @@ class ZscalerAIGuard(CustomGuardrail):
team_metadata = metadata.get("team_metadata", {}) or {}
# Precedence for policy_id:
# 1. metadata.zguard_policy_id # request level
# 2. user_api_key_metadata.zguard_policy_id # Key level
# 1. metadata.zguard_policy_id # request level
# 2. user_api_key_metadata.zguard_policy_id # Key level
# 3. team_metadata.zguard_policy_id # Team level
# 4. self.policy_id (from environment) # Global
policy_id = (
@ -154,9 +158,7 @@ class ZscalerAIGuard(CustomGuardrail):
zscaler_ai_guard_result
and zscaler_ai_guard_result.get("action") == "BLOCK"
):
blocking_info = zscaler_ai_guard_result.get(
"zscaler_ai_guard_response"
)
blocking_info = zscaler_ai_guard_result.get("zscaler_ai_guard_response")
error_message = f"Content blocked by Zscaler AI Guard: {self.extract_blocking_info(blocking_info)}"
raise Exception(error_message)
except Exception as e:

View File

@ -0,0 +1,126 @@
"""
Test that all guardrail hooks with async def apply_guardrail use @log_guardrail_information decorator.
This ensures consistent logging and observability across all guardrail implementations.
"""
import ast
from pathlib import Path
from typing import List, Tuple
def find_apply_guardrail_methods(file_path: Path) -> List[Tuple[str, int, bool]]:
"""
Find all apply_guardrail methods and check if they have the decorator.
Returns:
List of tuples: (class_name, line_number, has_decorator)
"""
with open(file_path, "r") as f:
content = f.read()
try:
tree = ast.parse(content)
except SyntaxError:
return []
results = []
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef):
class_name = node.name
# Check if this class has apply_guardrail method
for item in node.body:
if (
isinstance(item, ast.AsyncFunctionDef)
and item.name == "apply_guardrail"
):
# Check if it has the log_guardrail_information decorator
has_decorator = False
for decorator in item.decorator_list:
if (
isinstance(decorator, ast.Name)
and decorator.id == "log_guardrail_information"
):
has_decorator = True
break
results.append((class_name, item.lineno, has_decorator))
return results
def test_guardrail_apply_decorator():
"""Test that all guardrail hooks with apply_guardrail have the decorator."""
# Path to the guardrail hooks directory
guardrail_hooks_dir = (
Path(__file__).parent.parent.parent
/ "litellm"
/ "proxy"
/ "guardrails"
/ "guardrail_hooks"
)
# Find all Python files in the guardrail hooks directory
python_files = list(guardrail_hooks_dir.rglob("*.py"))
# Track violations
violations = []
for python_file in python_files:
# Skip __init__.py files and test files
if python_file.name == "__init__.py" or python_file.name.startswith("test_"):
continue
# Skip base files and primitives
if python_file.name in ["base.py", "primitives.py", "patterns.py"]:
continue
# Skip bedrock_guardrails.py - it implements logging differently via
# add_standard_logging_guardrail_information_to_request_data calls
# in make_bedrock_api_request method instead of using the decorator
if python_file.name == "bedrock_guardrails.py":
continue
results = find_apply_guardrail_methods(python_file)
for class_name, line_num, has_decorator in results:
if not has_decorator:
relative_path = python_file.relative_to(
Path(__file__).parent.parent.parent
)
violations.append((relative_path, class_name, line_num))
# Assert no violations found
if violations:
print(
f"\nFound {len(violations)} guardrail hook(s) without @log_guardrail_information decorator:"
)
print(
"\nAll guardrail hooks must use @log_guardrail_information decorator on their apply_guardrail method."
)
print(
"This ensures consistent logging and observability across all guardrails.\n"
)
for file_path, class_name, line_num in violations:
print(f" - {file_path}:{line_num} ({class_name}.apply_guardrail)")
print("\nTo fix, add the decorator:")
print(
" from litellm.integrations.custom_guardrail import log_guardrail_information"
)
print(" ")
print(" @log_guardrail_information")
print(" async def apply_guardrail(self, ...):")
print(" ...")
raise AssertionError(
f"Found {len(violations)} guardrail hook(s) without @log_guardrail_information decorator"
)
if __name__ == "__main__":
test_guardrail_apply_decorator()
print("✓ All guardrail hooks have @log_guardrail_information decorator")

View File

@ -13159,6 +13159,21 @@
"type": "github",
"url": "https://github.com/sponsors/wooorm"
}
},
"node_modules/@next/swc-win32-ia32-msvc": {
"version": "14.2.33",
"resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.33.tgz",
"integrity": "sha512-pc9LpGNKhJ0dXQhZ5QMmYxtARwwmWLpeocFmVG5Z0DzWq5Uf0izcI8tLc+qOpqxO1PWqZ5A7J1blrUIKrIFc7Q==",
"cpu": [
"ia32"
],
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">= 10"
}
}
}
}

View File

@ -3,7 +3,7 @@
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev --webpack",
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint",

View File

@ -1,5 +1,5 @@
import React, { useState, useRef, useEffect } from "react";
import { Modal, Select, Switch, Collapse, Input } from "antd";
import { Modal, Select, Switch, Collapse, Input, Divider } from "antd";
import { Button, TextInput } from "@tremor/react";
import {
CodeOutlined,
@ -8,6 +8,8 @@ import {
CloseCircleOutlined,
CaretRightOutlined,
SaveOutlined,
UsergroupAddOutlined,
ExportOutlined,
} from "@ant-design/icons";
import { createGuardrailCall, updateGuardrailCall, testCustomCodeGuardrail } from "../../networking";
import NotificationsManager from "../../molecules/notifications_manager";
@ -91,6 +93,7 @@ const CODE_TEMPLATES = {
},
};
// Available primitives organized by category
const PRIMITIVES = {
"Return Values": [
@ -241,6 +244,8 @@ const CustomCodeModal: React.FC<CustomCodeModalProps> = ({
// Handle template change
const handleTemplateChange = (templateKey: string) => {
setSelectedTemplate(templateKey);
// Check if it's a standard template
setCode(CODE_TEMPLATES[templateKey as keyof typeof CODE_TEMPLATES].code);
};
@ -486,12 +491,45 @@ const CustomCodeModal: React.FC<CustomCodeModalProps> = ({
onChange={handleTemplateChange}
className="w-full"
size="middle"
dropdownRender={(menu) => (
<>
{menu}
<Divider style={{ margin: '8px 0' }} />
<div
style={{
padding: '8px 12px',
cursor: 'pointer',
color: '#1890ff',
fontSize: '12px',
display: 'flex',
alignItems: 'center',
gap: '4px',
}}
onClick={(e) => {
e.preventDefault();
window.open('https://models.litellm.ai/guardrails', '_blank');
}}
onMouseEnter={(e) => {
e.currentTarget.style.backgroundColor = '#f0f0f0';
}}
onMouseLeave={(e) => {
e.currentTarget.style.backgroundColor = 'transparent';
}}
>
<UsergroupAddOutlined />
<span>Browse Community templates</span>
<ExportOutlined style={{ fontSize: '10px' }} />
</div>
</>
)}
>
{Object.entries(CODE_TEMPLATES).map(([key, template]) => (
<Select.Option key={key} value={key}>
{template.name}
</Select.Option>
))}
<Select.OptGroup label="STANDARD">
{Object.entries(CODE_TEMPLATES).map(([key, template]) => (
<Select.Option key={key} value={key}>
{template.name}
</Select.Option>
))}
</Select.OptGroup>
</Select>
</div>
<div className="flex items-center gap-2 pt-5">
@ -632,6 +670,27 @@ const CustomCodeModal: React.FC<CustomCodeModalProps> = ({
</div>
</Panel>
</Collapse>
{/* Contribution CTA Banner */}
<div className="mt-3 p-4 bg-gradient-to-r from-blue-50 to-indigo-50 border border-blue-200 rounded-lg flex items-center justify-between flex-shrink-0">
<div className="flex items-center gap-3">
<div className="bg-blue-100 rounded-full p-2">
<UsergroupAddOutlined className="text-blue-600 text-lg" />
</div>
<div>
<div className="text-sm font-medium text-gray-900">Built a useful guardrail?</div>
<div className="text-xs text-gray-600">Share it with the community and help others build faster</div>
</div>
</div>
<Button
size="xs"
onClick={() => window.open('https://github.com/BerriAI/litellm-guardrails', '_blank')}
icon={ExportOutlined}
className="bg-blue-600 hover:bg-blue-700 text-white border-0"
>
Contribute Template
</Button>
</div>
</div>
{/* Primitives Panel */}

View File

@ -14,7 +14,7 @@
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "react-jsx",
"jsx": "preserve",
"incremental": true,
"plugins": [
{