Merge pull request #27403 from BerriAI/litellm_otelGenaiCaptureMessageContent

[Feat] Honor OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT
This commit is contained in:
yuneng-jiang 2026-05-08 18:17:00 -07:00 committed by GitHub
commit 0824c4c77e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 202 additions and 9 deletions

View File

@ -57,6 +57,17 @@ LITELLM_PROXY_REQUEST_SPAN_NAME = "Received Proxy Server Request"
RAW_REQUEST_SPAN_NAME = "raw_gen_ai_request"
LITELLM_REQUEST_SPAN_NAME = "litellm_request"
CAPTURE_MODE_NO_CONTENT = "NO_CONTENT"
CAPTURE_MODE_SPAN_ONLY = "SPAN_ONLY"
CAPTURE_MODE_EVENT_ONLY = "EVENT_ONLY"
CAPTURE_MODE_SPAN_AND_EVENT = "SPAN_AND_EVENT"
_VALID_CAPTURE_MODES = {
CAPTURE_MODE_NO_CONTENT,
CAPTURE_MODE_SPAN_ONLY,
CAPTURE_MODE_EVENT_ONLY,
CAPTURE_MODE_SPAN_AND_EVENT,
}
@dataclass
class OpenTelemetryConfig:
@ -71,6 +82,9 @@ class OpenTelemetryConfig:
ignore_context_propagation: Optional[bool] = None
# When True, create a private TracerProvider instead of reusing or setting the global one.
skip_set_global: bool = False
# Programmatic override for OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT.
# One of NO_CONTENT, SPAN_ONLY, EVENT_ONLY, SPAN_AND_EVENT (or "true" as legacy alias).
capture_message_content: Optional[str] = None
def __post_init__(self) -> None:
# If endpoint is specified but exporter is still the default "console",
@ -182,6 +196,9 @@ class OpenTelemetry(CustomLogger):
super().__init__(**kwargs)
self._init_metrics(meter_provider)
self._init_logs(logger_provider)
# Sample env-var / config / message_logging at init so subsequent
# _capture_in_span / _capture_in_event calls are deterministic.
self._capture_mode_cached = self._compute_capture_mode_from_init_state()
self._init_otel_logger_on_litellm_proxy()
@staticmethod
@ -306,6 +323,62 @@ class OpenTelemetry(CustomLogger):
hasattr(self, "callback_name") and self.callback_name == "langfuse_otel"
)
def _compute_capture_mode_from_init_state(self) -> Optional[str]:
"""Sample explicit settings at init. Returns the resolved mode or
None if nothing explicit is set (in which case the legacy
``self.message_logging`` flag is consulted dynamically per request).
``"true"``/``"1"`` map to ``EVENT_ONLY`` per the contrib convention.
``"false"``/``"0"`` map to ``NO_CONTENT``.
Unknown values are ignored.
"""
explicit = self.config.capture_message_content or os.getenv(
"OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT"
)
if not explicit:
return None
normalized = explicit.upper()
if normalized in ("TRUE", "1"):
return CAPTURE_MODE_EVENT_ONLY
if normalized in ("FALSE", "0"):
return CAPTURE_MODE_NO_CONTENT
if normalized in _VALID_CAPTURE_MODES:
return normalized
return None
def _resolve_capture_mode(self) -> str:
"""Return the active capture mode for this request.
Precedence:
1. ``litellm.turn_off_message_logging=True`` forces ``NO_CONTENT``
(kill-switch checked dynamically).
2. Explicit setting sampled at init from
``OpenTelemetryConfig.capture_message_content`` or
``OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT``.
3. Legacy ``self.message_logging`` (checked dynamically).
"""
if litellm.turn_off_message_logging:
return CAPTURE_MODE_NO_CONTENT
if self._capture_mode_cached is not None:
return self._capture_mode_cached
return (
CAPTURE_MODE_SPAN_AND_EVENT
if self.message_logging
else CAPTURE_MODE_NO_CONTENT
)
def _capture_in_span(self) -> bool:
return self._resolve_capture_mode() in (
CAPTURE_MODE_SPAN_ONLY,
CAPTURE_MODE_SPAN_AND_EVENT,
)
def _capture_in_event(self) -> bool:
return self._resolve_capture_mode() in (
CAPTURE_MODE_EVENT_ONLY,
CAPTURE_MODE_SPAN_AND_EVENT,
)
def _init_tracing(self, tracer_provider):
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
@ -825,8 +898,7 @@ class OpenTelemetry(CustomLogger):
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode
# only log raw LLM request/response if message_logging is on and not globally turned off
if litellm.turn_off_message_logging or not self.message_logging:
if not self._capture_in_span():
return
litellm_params = kwargs.get("litellm_params", {})
@ -1117,9 +1189,14 @@ class OpenTelemetry(CustomLogger):
}
if role == "tool" and msg.get("id"):
attrs["id"] = msg["id"]
if self.message_logging and msg.get("content"):
capture_event_content = self._capture_in_event()
if capture_event_content and msg.get("content"):
attrs["gen_ai.prompt"] = msg["content"]
body = msg.copy()
if not capture_event_content:
body.pop("content", None)
log_record = SdkLogRecord(
timestamp=self._to_ns(datetime.now()),
trace_id=parent_ctx.trace_id,
@ -1127,7 +1204,7 @@ class OpenTelemetry(CustomLogger):
trace_flags=parent_ctx.trace_flags,
severity_number=SeverityNumber.INFO,
severity_text="INFO",
body=msg.copy(),
body=body,
attributes=attrs,
)
otel_logger.emit(log_record)
@ -1141,14 +1218,15 @@ class OpenTelemetry(CustomLogger):
"finish_reason": choice.get("finish_reason"),
}
body_msg = choice.get("message", {})
if self.message_logging and body_msg.get("content"):
capture_event_content = self._capture_in_event()
if capture_event_content and body_msg.get("content"):
attrs["message.content"] = body_msg["content"]
body = {
"index": idx,
"finish_reason": choice.get("finish_reason"),
"message": {"role": body_msg.get("role", "assistant")},
}
if self.message_logging and body_msg.get("content"):
if capture_event_content and body_msg.get("content"):
body["message"]["content"] = body_msg["content"]
log_record = SdkLogRecord(
@ -1674,9 +1752,7 @@ class OpenTelemetry(CustomLogger):
########## LLM Request Medssages / tools / content Attributes ###########
#########################################################################
if litellm.turn_off_message_logging is True:
return
if self.message_logging is not True:
if not self._capture_in_span():
return
if optional_params.get("tools"):

View File

@ -442,6 +442,109 @@ class TestOpenTelemetryDualHandlerIsolation(unittest.TestCase):
)
class TestOpenTelemetryCaptureMessageContent(unittest.TestCase):
"""OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT and the
OpenTelemetryConfig.capture_message_content programmatic override
drive what the handler captures in spans vs events."""
@staticmethod
def _make(env=None, config_value=None, message_logging=True):
env_dict = (
{"OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT": env}
if env is not None
else {"OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT": ""}
)
with patch.dict(os.environ, env_dict):
handler = OpenTelemetry(
config=OpenTelemetryConfig(
exporter="console", capture_message_content=config_value
)
)
handler.message_logging = message_logging
return handler, handler._resolve_capture_mode()
def test_no_explicit_setting_falls_back_to_message_logging_true(self):
_, mode = self._make()
self.assertEqual(mode, "SPAN_AND_EVENT")
def test_no_explicit_setting_falls_back_to_message_logging_false(self):
_, mode = self._make(message_logging=False)
self.assertEqual(mode, "NO_CONTENT")
def test_env_var_no_content(self):
_, mode = self._make(env="NO_CONTENT")
self.assertEqual(mode, "NO_CONTENT")
def test_env_var_span_only(self):
_, mode = self._make(env="SPAN_ONLY")
self.assertEqual(mode, "SPAN_ONLY")
def test_env_var_event_only(self):
_, mode = self._make(env="EVENT_ONLY")
self.assertEqual(mode, "EVENT_ONLY")
def test_env_var_span_and_event(self):
_, mode = self._make(env="SPAN_AND_EVENT")
self.assertEqual(mode, "SPAN_AND_EVENT")
def test_env_var_legacy_true_maps_to_event_only(self):
_, mode = self._make(env="true")
self.assertEqual(mode, "EVENT_ONLY")
def test_env_var_legacy_false_maps_to_no_content(self):
for env in ("false", "0"):
with self.subTest(env=env):
_, mode = self._make(env=env)
self.assertEqual(mode, "NO_CONTENT")
def test_env_var_unknown_value_falls_through_to_legacy(self):
_, mode = self._make(env="garbage", message_logging=True)
self.assertEqual(mode, "SPAN_AND_EVENT")
def test_config_field_overrides_env(self):
_, mode = self._make(env="EVENT_ONLY", config_value="SPAN_ONLY")
self.assertEqual(mode, "SPAN_ONLY")
def test_turn_off_message_logging_forces_no_content(self):
with patch("litellm.turn_off_message_logging", True):
_, mode = self._make(env="SPAN_AND_EVENT", message_logging=True)
self.assertEqual(mode, "NO_CONTENT")
def test_capture_in_span_and_event_predicates(self):
cases = {
"NO_CONTENT": (False, False),
"SPAN_ONLY": (True, False),
"EVENT_ONLY": (False, True),
"SPAN_AND_EVENT": (True, True),
}
for mode, (in_span, in_event) in cases.items():
handler, _ = self._make(env=mode)
self.assertEqual(handler._capture_in_span(), in_span, msg=mode)
self.assertEqual(handler._capture_in_event(), in_event, msg=mode)
def test_two_handlers_can_have_different_modes(self):
# FIL's stated requirement: one handler strips content, the other keeps it.
with patch.dict(
os.environ, {"OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT": ""}
):
stripped = OpenTelemetry(
config=OpenTelemetryConfig(
exporter="console", capture_message_content="NO_CONTENT"
)
)
kept = OpenTelemetry(
config=OpenTelemetryConfig(
exporter="console", capture_message_content="SPAN_AND_EVENT"
)
)
self.assertEqual(stripped._resolve_capture_mode(), "NO_CONTENT")
self.assertEqual(kept._resolve_capture_mode(), "SPAN_AND_EVENT")
self.assertFalse(stripped._capture_in_span())
self.assertFalse(stripped._capture_in_event())
self.assertTrue(kept._capture_in_span())
self.assertTrue(kept._capture_in_event())
class TestOpenTelemetry(unittest.TestCase):
POLL_INTERVAL = 0.05
POLL_TIMEOUT = 2.0
@ -1067,6 +1170,7 @@ class TestOpenTelemetry(unittest.TestCase):
result = otel._get_span_name(kwargs)
self.assertEqual(result, LITELLM_REQUEST_SPAN_NAME)
@patch.dict(os.environ, {"OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT": ""})
@patch("litellm.turn_off_message_logging", False)
def test_maybe_log_raw_request_creates_span(self):
"""Test _maybe_log_raw_request creates span when logging enabled"""
@ -2194,6 +2298,19 @@ class TestOpenTelemetrySemanticConventions138(unittest.TestCase):
See: https://github.com/BerriAI/litellm/issues/17794
"""
def setUp(self):
# Insulate from a shell-set OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT
# so these tests exercise the legacy default path (message_logging=True).
self._prev = os.environ.pop(
"OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", None
)
def tearDown(self):
if self._prev is not None:
os.environ["OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT"] = (
self._prev
)
def test_input_messages_uses_parts_structure(self):
"""
Test that gen_ai.input.messages uses the OTEL 1.38 parts array structure.