From f047b1571ea931158a15d569c1260e61494a661a Mon Sep 17 00:00:00 2001 From: ryan-crabbe-berri Date: Tue, 2 Jun 2026 16:40:30 -0700 Subject: [PATCH] fix(otel): capture 401 error details in management endpoint spans (#29535) Auth failures on management endpoints such as team/list and organization/list (invalid or expired keys) were raised as ProxyException, whose __str__ returned an empty string, so the OTEL SERVER span recorded an error with no message. ProxyException now stringifies to its message, get_error_information prefers the explicit .message attribute, and the proxy exception handlers stamp a consistent error.type, error.code and error.message on the span Resolves LIT-3515 --- litellm/integrations/opentelemetry.py | 26 ++++++++++++++ litellm/litellm_core_utils/litellm_logging.py | 8 +++-- litellm/proxy/_types.py | 1 + litellm/proxy/proxy_server.py | 14 +++++--- .../test_otel_exception_handler.py | 31 +++++++++++++++- .../test_litellm_logging.py | 36 +++++++++++++++++++ tests/test_litellm/proxy/test_proxy_types.py | 19 ++++++++++ 7 files changed, 128 insertions(+), 7 deletions(-) diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py index cb619ae020..8bc61d960e 100644 --- a/litellm/integrations/opentelemetry.py +++ b/litellm/integrations/opentelemetry.py @@ -3287,6 +3287,32 @@ class OpenTelemetry(OTELGenAISemconvMixin, CustomLogger): value=int(status_code), ) + def record_error_attributes_on_span( + self, + span: Optional[Span], + exception: Optional[Exception], + status_code: int, + ) -> None: + """Stamp structured ``error.*`` attributes on the SERVER span from the + exception returned to the client, with ``error.code`` pinned to the real + response status. Idempotent (overwrites); emits no exception event.""" + if span is None or exception is None: + return + from litellm.litellm_core_utils.litellm_logging import ( + StandardLoggingPayloadSetup, + ) + + error_information = StandardLoggingPayloadSetup.get_error_information( + original_exception=exception + ) + error_information["error_code"] = str(status_code) + self._record_exception_on_span( + span=span, + kwargs={ + "standard_logging_object": {"error_information": error_information} + }, + ) + def set_preprocessing_duration_attribute( self, span: Optional[Span], container: Any ) -> None: diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index c127b3873a..abc22713be 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -5300,8 +5300,12 @@ class StandardLoggingPayloadSetup: tb_lines[:MAXIMUM_TRACEBACK_LINES_TO_LOG] ) # Limit to first 100 lines - # Get additional error details - error_message = str(original_exception) + explicit_message = getattr(original_exception, "message", None) + error_message = ( + explicit_message + if isinstance(explicit_message, str) and explicit_message + else str(original_exception) + ) return StandardLoggingPayloadErrorInformation( error_code=error_status, diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 09ee88239f..6168097733 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -3673,6 +3673,7 @@ class ProxyException(Exception): provider_specific_fields: Optional[dict] = None, ): self.message = str(message) + super().__init__(self.message) self.type = type self.param = param self.openai_code = openai_code or code diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index bd667298b4..a092df6cdf 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1271,7 +1271,7 @@ async def openai_exception_handler(request: Request, exc: ProxyException): headers = exc.headers error_dict = exc.to_dict() status_code = int(exc.code) if exc.code else status.HTTP_500_INTERNAL_SERVER_ERROR - _close_dangling_otel_server_span(request, status_code) + _close_dangling_otel_server_span(request, status_code, exc=exc) return JSONResponse( status_code=status_code, content={"error": error_dict}, @@ -1279,7 +1279,9 @@ async def openai_exception_handler(request: Request, exc: ProxyException): ) -def _close_dangling_otel_server_span(request: Request, status_code: int) -> None: +def _close_dangling_otel_server_span( + request: Request, status_code: int, exc: Optional[Exception] = None +) -> None: parent_otel_span = getattr(request.state, "parent_otel_span", None) if parent_otel_span is None: return @@ -1302,6 +1304,10 @@ def _close_dangling_otel_server_span(request: Request, status_code: int) -> None open_telemetry_logger.set_response_status_code_attribute( parent_otel_span, status_code ) + if status_code >= 400: + open_telemetry_logger.record_error_attributes_on_span( + parent_otel_span, exc, status_code + ) parent_otel_span.set_status( Status(StatusCode.ERROR if status_code >= 400 else StatusCode.OK) ) @@ -1318,7 +1324,7 @@ def _close_dangling_otel_server_span(request: Request, status_code: int) -> None async def otel_request_validation_exception_handler( request: Request, exc: RequestValidationError ): - _close_dangling_otel_server_span(request, 422) + _close_dangling_otel_server_span(request, 422, exc=exc) return JSONResponse( status_code=422, content={"detail": jsonable_encoder(exc.errors())}, @@ -1332,7 +1338,7 @@ async def otel_unhandled_exception_handler(request: Request, exc: Exception): verbose_proxy_logger.exception( "Unhandled exception in request: %s", type(exc).__name__ ) - _close_dangling_otel_server_span(request, 500) + _close_dangling_otel_server_span(request, 500, exc=exc) return JSONResponse( status_code=500, content={ diff --git a/tests/test_litellm/integrations/open_telemetry/test_otel_exception_handler.py b/tests/test_litellm/integrations/open_telemetry/test_otel_exception_handler.py index 56059c260c..348ef5082e 100644 --- a/tests/test_litellm/integrations/open_telemetry/test_otel_exception_handler.py +++ b/tests/test_litellm/integrations/open_telemetry/test_otel_exception_handler.py @@ -18,7 +18,9 @@ from litellm.proxy.proxy_server import ( otel_unhandled_exception_handler, ) -from ._helpers import assert_server_span_attrs +from litellm.integrations._types.open_inference import ErrorAttributes + +from ._helpers import assert_server_span_attrs, get_server_span def _fake_request(parent_otel_span=None): @@ -92,6 +94,33 @@ def test_exception_handler_closes_span( ) +@pytest.mark.parametrize("path", ["/team/list", "/organization/list"]) +def test_openai_exception_handler_stamps_structured_error_on_span( + wired_otel, server_span_factory, path +): + """A ProxyException 401 (invalid/expired key on a management endpoint) must + leave error.type, error.code AND error.message on the SERVER span. Pre-fix, + ProxyException stringified to "" so error.message was dropped — the span + showed an error with no message.""" + msg = "Authentication Error, Invalid proxy server token passed." + request = _fake_request(parent_otel_span=server_span_factory(path)) + exc = ProxyException(message=msg, type="auth_error", param="key", code=401) + + response = asyncio.run(openai_exception_handler(request, exc)) + assert response.status_code == 401 + + assert_server_span_attrs( + wired_otel, + expected_status=401, + expected_url_path=path, + where=f"openai_exception_handler ({path})", + ) + attrs = get_server_span(wired_otel).attributes + assert attrs.get(ErrorAttributes.ERROR_MESSAGE) == msg + assert attrs.get(ErrorAttributes.ERROR_TYPE) == "ProxyException" + assert attrs.get(ErrorAttributes.ERROR_CODE) == "401" + + def test_unhandled_handler_reraises_known_exceptions(wired_otel, server_span_factory): """ProxyException / HTTPException / RequestValidationError have dedicated handlers.""" request = _fake_request(parent_otel_span=server_span_factory("/key/generate")) diff --git a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py index b64cb7c690..8acda74a15 100644 --- a/tests/test_litellm/litellm_core_utils/test_litellm_logging.py +++ b/tests/test_litellm/litellm_core_utils/test_litellm_logging.py @@ -3078,3 +3078,39 @@ class TestFirstApiCallStartTimeSetOnce: assert obj.model_call_details["api_call_start_time"] > first assert obj.model_call_details["first_api_call_start_time"] == first assert user_meta == {} + + +def test_get_error_information_proxy_exception_preserves_message(): + """ProxyException keeps its text in ``.message`` (str() was empty pre-fix), + so error_information must still surface the message and code.""" + from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup + from litellm.proxy._types import ProxyException + + msg = "Authentication Error, Invalid proxy server token passed." + exc = ProxyException(message=msg, type="auth_error", param="key", code=401) + + info = StandardLoggingPayloadSetup.get_error_information(original_exception=exc) + assert info["error_message"] == msg + assert info["error_class"] == "ProxyException" + assert info["error_code"] == "401" + + +def test_get_error_information_prefers_message_attribute_over_empty_str(): + """error_message must come from a populated ``.message`` even when the + exception's __str__ is empty — guards classes that store the text on + ``.message`` without forwarding it to ``Exception.__init__``.""" + from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup + + class _SilentExc(Exception): + def __init__(self): + self.message = "real failure detail" + self.code = 401 + + def __str__(self): + return "" + + info = StandardLoggingPayloadSetup.get_error_information( + original_exception=_SilentExc() + ) + assert info["error_message"] == "real failure detail" + assert info["error_code"] == "401" diff --git a/tests/test_litellm/proxy/test_proxy_types.py b/tests/test_litellm/proxy/test_proxy_types.py index dbb952968e..b435244638 100644 --- a/tests/test_litellm/proxy/test_proxy_types.py +++ b/tests/test_litellm/proxy/test_proxy_types.py @@ -87,3 +87,22 @@ def test_user_api_key_auth_hashes_authorization_header_form_of_key(): assert from_header.api_key == baseline.api_key assert from_header.token == baseline.token assert not from_header.api_key.lower().startswith("bearer") + + +def test_proxy_exception_str_returns_message(): + """ProxyException must stringify to its message: OTEL's + ``span.record_exception`` and ``str(exc)``-based logging read the string + form, which was empty pre-fix. The OpenAI-mapped fields must stay intact.""" + from litellm.proxy._types import ProxyException + + msg = "Authentication Error, Invalid proxy server token passed." + exc = ProxyException(message=msg, type="auth_error", param="key", code=401) + + assert str(exc) == msg + assert exc.message == msg + assert exc.to_dict() == { + "message": msg, + "type": "auth_error", + "param": "key", + "code": "401", + }