fix(otel): capture 401 error details in management endpoint spans (#29535)
Auth failures on management endpoints such as team/list and organization/list (invalid or expired keys) were raised as ProxyException, whose __str__ returned an empty string, so the OTEL SERVER span recorded an error with no message. ProxyException now stringifies to its message, get_error_information prefers the explicit .message attribute, and the proxy exception handlers stamp a consistent error.type, error.code and error.message on the span Resolves LIT-3515
This commit is contained in:
parent
9d9558e78f
commit
f047b1571e
@ -3287,6 +3287,32 @@ class OpenTelemetry(OTELGenAISemconvMixin, CustomLogger):
|
||||
value=int(status_code),
|
||||
)
|
||||
|
||||
def record_error_attributes_on_span(
|
||||
self,
|
||||
span: Optional[Span],
|
||||
exception: Optional[Exception],
|
||||
status_code: int,
|
||||
) -> None:
|
||||
"""Stamp structured ``error.*`` attributes on the SERVER span from the
|
||||
exception returned to the client, with ``error.code`` pinned to the real
|
||||
response status. Idempotent (overwrites); emits no exception event."""
|
||||
if span is None or exception is None:
|
||||
return
|
||||
from litellm.litellm_core_utils.litellm_logging import (
|
||||
StandardLoggingPayloadSetup,
|
||||
)
|
||||
|
||||
error_information = StandardLoggingPayloadSetup.get_error_information(
|
||||
original_exception=exception
|
||||
)
|
||||
error_information["error_code"] = str(status_code)
|
||||
self._record_exception_on_span(
|
||||
span=span,
|
||||
kwargs={
|
||||
"standard_logging_object": {"error_information": error_information}
|
||||
},
|
||||
)
|
||||
|
||||
def set_preprocessing_duration_attribute(
|
||||
self, span: Optional[Span], container: Any
|
||||
) -> None:
|
||||
|
||||
@ -5300,8 +5300,12 @@ class StandardLoggingPayloadSetup:
|
||||
tb_lines[:MAXIMUM_TRACEBACK_LINES_TO_LOG]
|
||||
) # Limit to first 100 lines
|
||||
|
||||
# Get additional error details
|
||||
error_message = str(original_exception)
|
||||
explicit_message = getattr(original_exception, "message", None)
|
||||
error_message = (
|
||||
explicit_message
|
||||
if isinstance(explicit_message, str) and explicit_message
|
||||
else str(original_exception)
|
||||
)
|
||||
|
||||
return StandardLoggingPayloadErrorInformation(
|
||||
error_code=error_status,
|
||||
|
||||
@ -3673,6 +3673,7 @@ class ProxyException(Exception):
|
||||
provider_specific_fields: Optional[dict] = None,
|
||||
):
|
||||
self.message = str(message)
|
||||
super().__init__(self.message)
|
||||
self.type = type
|
||||
self.param = param
|
||||
self.openai_code = openai_code or code
|
||||
|
||||
@ -1271,7 +1271,7 @@ async def openai_exception_handler(request: Request, exc: ProxyException):
|
||||
headers = exc.headers
|
||||
error_dict = exc.to_dict()
|
||||
status_code = int(exc.code) if exc.code else status.HTTP_500_INTERNAL_SERVER_ERROR
|
||||
_close_dangling_otel_server_span(request, status_code)
|
||||
_close_dangling_otel_server_span(request, status_code, exc=exc)
|
||||
return JSONResponse(
|
||||
status_code=status_code,
|
||||
content={"error": error_dict},
|
||||
@ -1279,7 +1279,9 @@ async def openai_exception_handler(request: Request, exc: ProxyException):
|
||||
)
|
||||
|
||||
|
||||
def _close_dangling_otel_server_span(request: Request, status_code: int) -> None:
|
||||
def _close_dangling_otel_server_span(
|
||||
request: Request, status_code: int, exc: Optional[Exception] = None
|
||||
) -> None:
|
||||
parent_otel_span = getattr(request.state, "parent_otel_span", None)
|
||||
if parent_otel_span is None:
|
||||
return
|
||||
@ -1302,6 +1304,10 @@ def _close_dangling_otel_server_span(request: Request, status_code: int) -> None
|
||||
open_telemetry_logger.set_response_status_code_attribute(
|
||||
parent_otel_span, status_code
|
||||
)
|
||||
if status_code >= 400:
|
||||
open_telemetry_logger.record_error_attributes_on_span(
|
||||
parent_otel_span, exc, status_code
|
||||
)
|
||||
parent_otel_span.set_status(
|
||||
Status(StatusCode.ERROR if status_code >= 400 else StatusCode.OK)
|
||||
)
|
||||
@ -1318,7 +1324,7 @@ def _close_dangling_otel_server_span(request: Request, status_code: int) -> None
|
||||
async def otel_request_validation_exception_handler(
|
||||
request: Request, exc: RequestValidationError
|
||||
):
|
||||
_close_dangling_otel_server_span(request, 422)
|
||||
_close_dangling_otel_server_span(request, 422, exc=exc)
|
||||
return JSONResponse(
|
||||
status_code=422,
|
||||
content={"detail": jsonable_encoder(exc.errors())},
|
||||
@ -1332,7 +1338,7 @@ async def otel_unhandled_exception_handler(request: Request, exc: Exception):
|
||||
verbose_proxy_logger.exception(
|
||||
"Unhandled exception in request: %s", type(exc).__name__
|
||||
)
|
||||
_close_dangling_otel_server_span(request, 500)
|
||||
_close_dangling_otel_server_span(request, 500, exc=exc)
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={
|
||||
|
||||
@ -18,7 +18,9 @@ from litellm.proxy.proxy_server import (
|
||||
otel_unhandled_exception_handler,
|
||||
)
|
||||
|
||||
from ._helpers import assert_server_span_attrs
|
||||
from litellm.integrations._types.open_inference import ErrorAttributes
|
||||
|
||||
from ._helpers import assert_server_span_attrs, get_server_span
|
||||
|
||||
|
||||
def _fake_request(parent_otel_span=None):
|
||||
@ -92,6 +94,33 @@ def test_exception_handler_closes_span(
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("path", ["/team/list", "/organization/list"])
|
||||
def test_openai_exception_handler_stamps_structured_error_on_span(
|
||||
wired_otel, server_span_factory, path
|
||||
):
|
||||
"""A ProxyException 401 (invalid/expired key on a management endpoint) must
|
||||
leave error.type, error.code AND error.message on the SERVER span. Pre-fix,
|
||||
ProxyException stringified to "" so error.message was dropped — the span
|
||||
showed an error with no message."""
|
||||
msg = "Authentication Error, Invalid proxy server token passed."
|
||||
request = _fake_request(parent_otel_span=server_span_factory(path))
|
||||
exc = ProxyException(message=msg, type="auth_error", param="key", code=401)
|
||||
|
||||
response = asyncio.run(openai_exception_handler(request, exc))
|
||||
assert response.status_code == 401
|
||||
|
||||
assert_server_span_attrs(
|
||||
wired_otel,
|
||||
expected_status=401,
|
||||
expected_url_path=path,
|
||||
where=f"openai_exception_handler ({path})",
|
||||
)
|
||||
attrs = get_server_span(wired_otel).attributes
|
||||
assert attrs.get(ErrorAttributes.ERROR_MESSAGE) == msg
|
||||
assert attrs.get(ErrorAttributes.ERROR_TYPE) == "ProxyException"
|
||||
assert attrs.get(ErrorAttributes.ERROR_CODE) == "401"
|
||||
|
||||
|
||||
def test_unhandled_handler_reraises_known_exceptions(wired_otel, server_span_factory):
|
||||
"""ProxyException / HTTPException / RequestValidationError have dedicated handlers."""
|
||||
request = _fake_request(parent_otel_span=server_span_factory("/key/generate"))
|
||||
|
||||
@ -3078,3 +3078,39 @@ class TestFirstApiCallStartTimeSetOnce:
|
||||
assert obj.model_call_details["api_call_start_time"] > first
|
||||
assert obj.model_call_details["first_api_call_start_time"] == first
|
||||
assert user_meta == {}
|
||||
|
||||
|
||||
def test_get_error_information_proxy_exception_preserves_message():
|
||||
"""ProxyException keeps its text in ``.message`` (str() was empty pre-fix),
|
||||
so error_information must still surface the message and code."""
|
||||
from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup
|
||||
from litellm.proxy._types import ProxyException
|
||||
|
||||
msg = "Authentication Error, Invalid proxy server token passed."
|
||||
exc = ProxyException(message=msg, type="auth_error", param="key", code=401)
|
||||
|
||||
info = StandardLoggingPayloadSetup.get_error_information(original_exception=exc)
|
||||
assert info["error_message"] == msg
|
||||
assert info["error_class"] == "ProxyException"
|
||||
assert info["error_code"] == "401"
|
||||
|
||||
|
||||
def test_get_error_information_prefers_message_attribute_over_empty_str():
|
||||
"""error_message must come from a populated ``.message`` even when the
|
||||
exception's __str__ is empty — guards classes that store the text on
|
||||
``.message`` without forwarding it to ``Exception.__init__``."""
|
||||
from litellm.litellm_core_utils.litellm_logging import StandardLoggingPayloadSetup
|
||||
|
||||
class _SilentExc(Exception):
|
||||
def __init__(self):
|
||||
self.message = "real failure detail"
|
||||
self.code = 401
|
||||
|
||||
def __str__(self):
|
||||
return ""
|
||||
|
||||
info = StandardLoggingPayloadSetup.get_error_information(
|
||||
original_exception=_SilentExc()
|
||||
)
|
||||
assert info["error_message"] == "real failure detail"
|
||||
assert info["error_code"] == "401"
|
||||
|
||||
@ -87,3 +87,22 @@ def test_user_api_key_auth_hashes_authorization_header_form_of_key():
|
||||
assert from_header.api_key == baseline.api_key
|
||||
assert from_header.token == baseline.token
|
||||
assert not from_header.api_key.lower().startswith("bearer")
|
||||
|
||||
|
||||
def test_proxy_exception_str_returns_message():
|
||||
"""ProxyException must stringify to its message: OTEL's
|
||||
``span.record_exception`` and ``str(exc)``-based logging read the string
|
||||
form, which was empty pre-fix. The OpenAI-mapped fields must stay intact."""
|
||||
from litellm.proxy._types import ProxyException
|
||||
|
||||
msg = "Authentication Error, Invalid proxy server token passed."
|
||||
exc = ProxyException(message=msg, type="auth_error", param="key", code=401)
|
||||
|
||||
assert str(exc) == msg
|
||||
assert exc.message == msg
|
||||
assert exc.to_dict() == {
|
||||
"message": msg,
|
||||
"type": "auth_error",
|
||||
"param": "key",
|
||||
"code": "401",
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user