Merge pull request #26899 from BerriAI/litellm_suppress-spend-log-tracebacks-2208
feat(spend-logs): opt-in suppression of stack traces in spend-tracking error logs
This commit is contained in:
commit
790d8bbe1a
@ -60,6 +60,7 @@ from litellm.proxy.db.db_transaction_queue.tool_discovery_queue import (
|
|||||||
ToolDiscoveryQueue,
|
ToolDiscoveryQueue,
|
||||||
)
|
)
|
||||||
from litellm.proxy.route_llm_request import ROUTE_ENDPOINT_MAPPING
|
from litellm.proxy.route_llm_request import ROUTE_ENDPOINT_MAPPING
|
||||||
|
from litellm.proxy.spend_tracking.spend_log_error_logger import spend_log_error
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from litellm.proxy.utils import PrismaClient, ProxyLogging
|
from litellm.proxy.utils import PrismaClient, ProxyLogging
|
||||||
@ -192,17 +193,16 @@ class DBSpendUpdateWriter:
|
|||||||
|
|
||||||
verbose_proxy_logger.debug("Runs spend update on all tables")
|
verbose_proxy_logger.debug("Runs spend update on all tables")
|
||||||
except Exception:
|
except Exception:
|
||||||
verbose_proxy_logger.error(
|
spend_log_error(
|
||||||
"Spend tracking - update_database failed. Spend log insertion or daily transaction enqueue "
|
"Spend tracking - update_database failed. Spend log insertion or daily transaction enqueue "
|
||||||
"may not have completed for this request. "
|
"may not have completed for this request. "
|
||||||
"response_cost=%s, token=%s, user_id=%s, team_id=%s, org_id=%s, end_user_id=%s - %s",
|
"response_cost=%s, token=%s, user_id=%s, team_id=%s, org_id=%s, end_user_id=%s",
|
||||||
response_cost,
|
response_cost,
|
||||||
token,
|
token,
|
||||||
user_id,
|
user_id,
|
||||||
team_id,
|
team_id,
|
||||||
org_id,
|
org_id,
|
||||||
end_user_id,
|
end_user_id,
|
||||||
traceback.format_exc(),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _enqueue_tool_registry_upsert(
|
def _enqueue_tool_registry_upsert(
|
||||||
@ -491,9 +491,7 @@ class DBSpendUpdateWriter:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.exception(
|
spend_log_error("Update Key DB Call failed to execute - %s", str(e), exc=e)
|
||||||
f"Update Key DB Call failed to execute - {str(e)}"
|
|
||||||
)
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def _update_user_db(
|
async def _update_user_db(
|
||||||
@ -540,14 +538,14 @@ class DBSpendUpdateWriter:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
spend_log_error(
|
||||||
"Spend tracking - failed to enqueue user spend update. "
|
"Spend tracking - failed to enqueue user spend update. "
|
||||||
"user_id=%s, end_user_id=%s, response_cost=%s - %s\n%s",
|
"user_id=%s, end_user_id=%s, response_cost=%s - %s",
|
||||||
user_id,
|
user_id,
|
||||||
end_user_id,
|
end_user_id,
|
||||||
response_cost,
|
response_cost,
|
||||||
str(e),
|
str(e),
|
||||||
traceback.format_exc(),
|
exc=e,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _update_team_db(
|
async def _update_team_db(
|
||||||
@ -585,23 +583,23 @@ class DBSpendUpdateWriter:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
spend_log_error(
|
||||||
"Spend tracking - failed to enqueue team member spend update. "
|
"Spend tracking - failed to enqueue team member spend update. "
|
||||||
"team_id=%s, user_id=%s, response_cost=%s - %s\n%s",
|
"team_id=%s, user_id=%s, response_cost=%s - %s",
|
||||||
team_id,
|
team_id,
|
||||||
user_id,
|
user_id,
|
||||||
response_cost,
|
response_cost,
|
||||||
str(e),
|
str(e),
|
||||||
traceback.format_exc(),
|
exc=e,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
spend_log_error(
|
||||||
"Spend tracking - failed to enqueue team spend update. "
|
"Spend tracking - failed to enqueue team spend update. "
|
||||||
"team_id=%s, response_cost=%s - %s\n%s",
|
"team_id=%s, response_cost=%s - %s",
|
||||||
team_id,
|
team_id,
|
||||||
response_cost,
|
response_cost,
|
||||||
str(e),
|
str(e),
|
||||||
traceback.format_exc(),
|
exc=e,
|
||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
@ -626,13 +624,13 @@ class DBSpendUpdateWriter:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
spend_log_error(
|
||||||
"Spend tracking - failed to enqueue org spend update. "
|
"Spend tracking - failed to enqueue org spend update. "
|
||||||
"org_id=%s, response_cost=%s - %s\n%s",
|
"org_id=%s, response_cost=%s - %s",
|
||||||
org_id,
|
org_id,
|
||||||
response_cost,
|
response_cost,
|
||||||
str(e),
|
str(e),
|
||||||
traceback.format_exc(),
|
exc=e,
|
||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
@ -654,13 +652,13 @@ class DBSpendUpdateWriter:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
spend_log_error(
|
||||||
"Spend tracking - failed to enqueue agent spend update. "
|
"Spend tracking - failed to enqueue agent spend update. "
|
||||||
"agent_id=%s, response_cost=%s - %s\n%s",
|
"agent_id=%s, response_cost=%s - %s",
|
||||||
agent_id,
|
agent_id,
|
||||||
response_cost,
|
response_cost,
|
||||||
str(e),
|
str(e),
|
||||||
traceback.format_exc(),
|
exc=e,
|
||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
@ -707,13 +705,13 @@ class DBSpendUpdateWriter:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
spend_log_error(
|
||||||
"Spend tracking - failed to enqueue tag spend update. "
|
"Spend tracking - failed to enqueue tag spend update. "
|
||||||
"request_tags=%s, response_cost=%s - %s\n%s",
|
"request_tags=%s, response_cost=%s - %s",
|
||||||
request_tags,
|
request_tags,
|
||||||
response_cost,
|
response_cost,
|
||||||
str(e),
|
str(e),
|
||||||
traceback.format_exc(),
|
exc=e,
|
||||||
)
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
@ -906,11 +904,11 @@ class DBSpendUpdateWriter:
|
|||||||
daily_spend_transactions=daily_agent_spend_update_transactions,
|
daily_spend_transactions=daily_agent_spend_update_transactions,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
spend_log_error(
|
||||||
"Spend tracking - failed to commit spend updates from Redis to DB. "
|
"Spend tracking - failed to commit spend updates from Redis to DB. "
|
||||||
"Data already popped from Redis may be lost. Error: %s\n%s",
|
"Data already popped from Redis may be lost. Error: %s",
|
||||||
str(e),
|
str(e),
|
||||||
traceback.format_exc(),
|
exc=e,
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
await self.pod_lock_manager.release_lock(
|
await self.pod_lock_manager.release_lock(
|
||||||
@ -1074,11 +1072,11 @@ class DBSpendUpdateWriter:
|
|||||||
daily_spend_transactions=daily_tag_spend_update_transactions,
|
daily_spend_transactions=daily_tag_spend_update_transactions,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
spend_log_error(
|
||||||
"Spend tracking - failed to commit daily tag spend updates from Redis to DB. "
|
"Spend tracking - failed to commit daily tag spend updates from Redis to DB. "
|
||||||
"Data already popped from Redis may be lost. Error: %s\n%s",
|
"Data already popped from Redis may be lost. Error: %s",
|
||||||
str(e),
|
str(e),
|
||||||
traceback.format_exc(),
|
exc=e,
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
await self.pod_lock_manager.release_lock(
|
await self.pod_lock_manager.release_lock(
|
||||||
@ -1736,11 +1734,15 @@ class DBSpendUpdateWriter:
|
|||||||
except Exception as batch_error:
|
except Exception as batch_error:
|
||||||
# Log detailed error information for debugging batch upsert failures
|
# Log detailed error information for debugging batch upsert failures
|
||||||
# This helps diagnose issues like unique constraint violations
|
# This helps diagnose issues like unique constraint violations
|
||||||
verbose_proxy_logger.exception(
|
spend_log_error(
|
||||||
f"Daily {entity_type} spend batch upsert failed. "
|
"Daily %s spend batch upsert failed. "
|
||||||
f"Table: {table_name}, Constraint: {unique_constraint_name}, "
|
"Table: %s, Constraint: %s, Batch size: %d, Error: %s",
|
||||||
f"Batch size: {len(transactions_to_process)}, "
|
entity_type,
|
||||||
f"Error: {str(batch_error)}"
|
table_name,
|
||||||
|
unique_constraint_name,
|
||||||
|
len(transactions_to_process),
|
||||||
|
str(batch_error),
|
||||||
|
exc=batch_error,
|
||||||
)
|
)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|||||||
@ -19,6 +19,10 @@ from litellm.proxy.auth.auth_checks import (
|
|||||||
)
|
)
|
||||||
from litellm.proxy.auth.route_checks import RouteChecks
|
from litellm.proxy.auth.route_checks import RouteChecks
|
||||||
from litellm.proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
|
from litellm.proxy.litellm_pre_call_utils import LiteLLMProxyRequestSetup
|
||||||
|
from litellm.proxy.spend_tracking.spend_log_error_logger import (
|
||||||
|
should_suppress_spend_log_tracebacks,
|
||||||
|
spend_log_error,
|
||||||
|
)
|
||||||
from litellm.proxy.utils import ProxyUpdateSpend
|
from litellm.proxy.utils import ProxyUpdateSpend
|
||||||
from litellm.types.utils import StandardLoggingPayload
|
from litellm.types.utils import StandardLoggingPayload
|
||||||
from litellm.utils import get_end_user_id_for_cost_tracking
|
from litellm.utils import get_end_user_id_for_cost_tracking
|
||||||
@ -74,12 +78,18 @@ class _ProxyDBLogger(CustomLogger):
|
|||||||
)
|
)
|
||||||
_metadata["user_api_key"] = user_api_key_dict.api_key
|
_metadata["user_api_key"] = user_api_key_dict.api_key
|
||||||
_metadata["status"] = "failure"
|
_metadata["status"] = "failure"
|
||||||
_metadata["error_information"] = (
|
_error_information = StandardLoggingPayloadSetup.get_error_information(
|
||||||
StandardLoggingPayloadSetup.get_error_information(
|
original_exception=original_exception,
|
||||||
original_exception=original_exception,
|
traceback_str=traceback_str,
|
||||||
traceback_str=traceback_str,
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
if should_suppress_spend_log_tracebacks():
|
||||||
|
# Drop the traceback key entirely so the per-row Metadata pane in
|
||||||
|
# the UI (which renders the JSON blob verbatim) doesn't show a
|
||||||
|
# noisy ``"traceback": ""`` line. Downstream consumers all use
|
||||||
|
# ``.get("traceback")`` / truthy checks, and the TypedDict marks
|
||||||
|
# the field as optional, so omitting is type-safe.
|
||||||
|
_error_information.pop("traceback", None)
|
||||||
|
_metadata["error_information"] = _error_information
|
||||||
|
|
||||||
_metadata = await _ProxyDBLogger._enrich_failure_metadata_with_key_info(
|
_metadata = await _ProxyDBLogger._enrich_failure_metadata_with_key_info(
|
||||||
metadata=_metadata,
|
metadata=_metadata,
|
||||||
@ -302,9 +312,7 @@ class _ProxyDBLogger(CustomLogger):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
verbose_proxy_logger.exception(
|
spend_log_error("Error in tracking cost callback - %s", str(e), exc=e)
|
||||||
"Error in tracking cost callback - %s", str(e)
|
|
||||||
)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def _enrich_failure_metadata_with_key_info(metadata: dict) -> dict:
|
async def _enrich_failure_metadata_with_key_info(metadata: dict) -> dict:
|
||||||
|
|||||||
85
litellm/proxy/spend_tracking/spend_log_error_logger.py
Normal file
85
litellm/proxy/spend_tracking/spend_log_error_logger.py
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
"""
|
||||||
|
Logging helpers for spend-tracking error paths.
|
||||||
|
|
||||||
|
Proxy operators have asked for a way to keep both their downstream log sinks
|
||||||
|
and the SpendLogs UI free of the stack traces that the spend-tracking
|
||||||
|
machinery emits when it hits 4xx/5xx or transient DB errors. The errors still
|
||||||
|
need to be logged (and still flow to Sentry via
|
||||||
|
``proxy_logging_obj.failure_handler``), but the multi-line stack traces
|
||||||
|
dominate log volume and clutter the per-row Metadata pane in the UI.
|
||||||
|
|
||||||
|
The opt-in is a single env var, ``LITELLM_SUPPRESS_SPEND_LOG_TRACEBACKS=true``,
|
||||||
|
gated by ``should_suppress_spend_log_tracebacks``. When it returns ``True``:
|
||||||
|
* ``spend_log_error`` drops the traceback from the console / structured log
|
||||||
|
record (this module), and
|
||||||
|
* the failure callback in ``proxy_track_cost_callback`` drops the
|
||||||
|
``error_information.traceback`` field from the SpendLogs row before it is
|
||||||
|
persisted, so the UI's per-row Metadata pane (which renders the metadata
|
||||||
|
JSON verbatim) stays clean. The key is omitted entirely rather than set
|
||||||
|
to ``""`` — ``StandardLoggingPayloadErrorInformation`` marks the field
|
||||||
|
optional and every downstream consumer uses ``.get("traceback")``.
|
||||||
|
|
||||||
|
At DEBUG the full traceback is always preserved so operators can still
|
||||||
|
troubleshoot. The UI suppression follows the same gate.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import Any, Optional
|
||||||
|
|
||||||
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
from litellm.secret_managers.main import str_to_bool
|
||||||
|
|
||||||
|
SUPPRESS_SPEND_LOG_TRACEBACKS_ENV = "LITELLM_SUPPRESS_SPEND_LOG_TRACEBACKS"
|
||||||
|
|
||||||
|
|
||||||
|
def _is_suppression_env_enabled() -> bool:
|
||||||
|
"""Read the opt-in env var fresh each call so dynamic flips are honored.
|
||||||
|
|
||||||
|
Kept separate from ``should_suppress_spend_log_tracebacks`` so tests and
|
||||||
|
other call sites can introspect just the env-var state without also
|
||||||
|
consulting the live logger level.
|
||||||
|
"""
|
||||||
|
return str_to_bool(os.getenv(SUPPRESS_SPEND_LOG_TRACEBACKS_ENV)) is True
|
||||||
|
|
||||||
|
|
||||||
|
def should_suppress_spend_log_tracebacks() -> bool:
|
||||||
|
"""Return ``True`` when spend-log traceback suppression should apply.
|
||||||
|
|
||||||
|
Suppression only kicks in when both:
|
||||||
|
* the operator opted in via the env var, and
|
||||||
|
* the proxy logger is at INFO or above (i.e. not DEBUG) — at DEBUG we
|
||||||
|
still want full tracebacks for troubleshooting.
|
||||||
|
"""
|
||||||
|
if not _is_suppression_env_enabled():
|
||||||
|
return False
|
||||||
|
return not verbose_proxy_logger.isEnabledFor(logging.DEBUG)
|
||||||
|
|
||||||
|
|
||||||
|
def spend_log_error(
|
||||||
|
message: str,
|
||||||
|
*args: Any,
|
||||||
|
exc: Optional[BaseException] = None,
|
||||||
|
) -> None:
|
||||||
|
"""Log a spend-tracking error, with the traceback gated on the env var.
|
||||||
|
|
||||||
|
By default this behaves like ``verbose_proxy_logger.exception`` — the
|
||||||
|
active exception (or ``exc`` if supplied) is attached so the formatter
|
||||||
|
renders its traceback. When ``LITELLM_SUPPRESS_SPEND_LOG_TRACEBACKS`` is
|
||||||
|
truthy and the logger is at INFO or above, the traceback is dropped and
|
||||||
|
only ``message % args`` is emitted.
|
||||||
|
|
||||||
|
Sentry / ``proxy_logging_obj.failure_handler`` is NOT invoked here — call
|
||||||
|
sites still own the alerting path. This helper is purely about console /
|
||||||
|
structured-log output volume.
|
||||||
|
"""
|
||||||
|
if should_suppress_spend_log_tracebacks():
|
||||||
|
verbose_proxy_logger.error(message, *args)
|
||||||
|
return
|
||||||
|
|
||||||
|
if exc is not None:
|
||||||
|
verbose_proxy_logger.error(
|
||||||
|
message, *args, exc_info=(type(exc), exc, exc.__traceback__)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
verbose_proxy_logger.error(message, *args, exc_info=True)
|
||||||
@ -25,6 +25,7 @@ from litellm.litellm_core_utils.core_helpers import (
|
|||||||
)
|
)
|
||||||
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
|
from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
|
||||||
from litellm.proxy._types import SpendLogsMetadata, SpendLogsPayload
|
from litellm.proxy._types import SpendLogsMetadata, SpendLogsPayload
|
||||||
|
from litellm.proxy.spend_tracking.spend_log_error_logger import spend_log_error
|
||||||
from litellm.proxy.utils import PrismaClient, hash_token
|
from litellm.proxy.utils import PrismaClient, hash_token
|
||||||
from litellm.types.utils import (
|
from litellm.types.utils import (
|
||||||
CostBreakdown,
|
CostBreakdown,
|
||||||
@ -471,9 +472,7 @@ def get_logging_payload( # noqa: PLR0915
|
|||||||
|
|
||||||
return payload
|
return payload
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.exception(
|
spend_log_error("Error creating spendlogs object - %s", str(e), exc=e)
|
||||||
"Error creating spendlogs object - {}".format(str(e))
|
|
||||||
)
|
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -36,6 +36,7 @@ from litellm.proxy._types import (
|
|||||||
SpendLogsMetadata,
|
SpendLogsMetadata,
|
||||||
SpendLogsPayload,
|
SpendLogsPayload,
|
||||||
)
|
)
|
||||||
|
from litellm.proxy.spend_tracking.spend_log_error_logger import spend_log_error
|
||||||
from litellm.types.guardrails import GuardrailEventHooks
|
from litellm.types.guardrails import GuardrailEventHooks
|
||||||
from litellm.types.utils import CallTypes, CallTypesLiteral
|
from litellm.types.utils import CallTypes, CallTypesLiteral
|
||||||
|
|
||||||
@ -5106,6 +5107,11 @@ async def update_daily_tag_spend(
|
|||||||
proxy_logging_obj=proxy_logging_obj,
|
proxy_logging_obj=proxy_logging_obj,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
# NOTE: keep this as a plain ``error`` (no traceback) to match the
|
||||||
|
# historical behavior of this site. ``spend_log_error`` would attach
|
||||||
|
# the active exception's traceback whenever the suppression env var
|
||||||
|
# is unset, which would be a regression for operators who never saw
|
||||||
|
# one here before.
|
||||||
verbose_proxy_logger.error(f"Error updating daily tag spend: {e}")
|
verbose_proxy_logger.error(f"Error updating daily tag spend: {e}")
|
||||||
|
|
||||||
|
|
||||||
@ -5238,9 +5244,7 @@ async def _monitor_spend_logs_queue(
|
|||||||
|
|
||||||
await asyncio.sleep(current_interval)
|
await asyncio.sleep(current_interval)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_proxy_logger.error(
|
spend_log_error("Error in spend logs queue monitor: %s", str(e), exc=e)
|
||||||
f"Error in spend logs queue monitor: {str(e)}\n{traceback.format_exc()}"
|
|
||||||
)
|
|
||||||
# Continue monitoring even if there's an error, with exponential backoff
|
# Continue monitoring even if there's an error, with exponential backoff
|
||||||
current_interval = min(current_interval * backoff_multiplier, max_backoff)
|
current_interval = min(current_interval * backoff_multiplier, max_backoff)
|
||||||
await asyncio.sleep(current_interval)
|
await asyncio.sleep(current_interval)
|
||||||
|
|||||||
@ -1133,8 +1133,10 @@ async def test_update_daily_spend_logs_detailed_error_on_batch_upsert_failure():
|
|||||||
mock_proxy_logging = MagicMock()
|
mock_proxy_logging = MagicMock()
|
||||||
mock_proxy_logging.failure_handler = AsyncMock()
|
mock_proxy_logging.failure_handler = AsyncMock()
|
||||||
|
|
||||||
# Mock the logger to capture exception calls
|
# Capture the ERROR-level log emitted by the spend_log_error helper.
|
||||||
with patch.object(verbose_proxy_logger, "exception") as mock_exception_logger:
|
# We assert against the formatted message instead of patching a specific
|
||||||
|
# logger method so the test stays valid as the helper evolves.
|
||||||
|
with patch.object(verbose_proxy_logger, "error") as mock_error_logger:
|
||||||
# Call the method and expect it to raise the exception
|
# Call the method and expect it to raise the exception
|
||||||
with pytest.raises(Exception, match="Unique constraint violation"):
|
with pytest.raises(Exception, match="Unique constraint violation"):
|
||||||
await DBSpendUpdateWriter._update_daily_spend(
|
await DBSpendUpdateWriter._update_daily_spend(
|
||||||
@ -1148,17 +1150,20 @@ async def test_update_daily_spend_logs_detailed_error_on_batch_upsert_failure():
|
|||||||
unique_constraint_name="user_id_date_api_key_model_custom_llm_provider_mcp_namespaced_tool_name_endpoint",
|
unique_constraint_name="user_id_date_api_key_model_custom_llm_provider_mcp_namespaced_tool_name_endpoint",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify that exception was logged with detailed information
|
# Verify that the error was logged with detailed information.
|
||||||
assert mock_exception_logger.called
|
# spend_log_error formats the message via ``%`` interpolation, so
|
||||||
call_args = mock_exception_logger.call_args[0][0]
|
# render the call args before asserting on substrings.
|
||||||
assert "Daily user spend batch upsert failed" in call_args
|
assert mock_error_logger.called
|
||||||
assert "Table: litellm_dailyuserspend" in call_args
|
call = mock_error_logger.call_args
|
||||||
|
formatted = call.args[0] % call.args[1:]
|
||||||
|
assert "Daily user spend batch upsert failed" in formatted
|
||||||
|
assert "Table: litellm_dailyuserspend" in formatted
|
||||||
assert (
|
assert (
|
||||||
"Constraint: user_id_date_api_key_model_custom_llm_provider_mcp_namespaced_tool_name_endpoint"
|
"Constraint: user_id_date_api_key_model_custom_llm_provider_mcp_namespaced_tool_name_endpoint"
|
||||||
in call_args
|
in formatted
|
||||||
)
|
)
|
||||||
assert "Batch size: 1" in call_args
|
assert "Batch size: 1" in formatted
|
||||||
assert "Unique constraint violation" in call_args
|
assert "Unique constraint violation" in formatted
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|||||||
@ -990,3 +990,80 @@ async def test_async_post_call_failure_hook_uses_actual_start_time():
|
|||||||
# Duration should be approximately 60 seconds, not 0
|
# Duration should be approximately 60 seconds, not 0
|
||||||
duration = (call_args["end_time"] - call_args["start_time"]).total_seconds()
|
duration = (call_args["end_time"] - call_args["start_time"]).total_seconds()
|
||||||
assert duration >= 55, f"Duration should be ~60s, got {duration}s"
|
assert duration >= 55, f"Duration should be ~60s, got {duration}s"
|
||||||
|
|
||||||
|
|
||||||
|
async def _invoke_failure_hook_with_raised_exception():
|
||||||
|
"""Run the failure hook with an exception that has a real ``__traceback__``.
|
||||||
|
|
||||||
|
Returns the metadata dict that was forwarded to ``update_database`` so the
|
||||||
|
caller can assert on its ``error_information`` payload.
|
||||||
|
"""
|
||||||
|
logger = _ProxyDBLogger()
|
||||||
|
user_api_key_dict = UserAPIKeyAuth(
|
||||||
|
api_key="test_api_key",
|
||||||
|
user_id="u",
|
||||||
|
team_id="t",
|
||||||
|
)
|
||||||
|
request_data = {
|
||||||
|
"model": "gpt-4",
|
||||||
|
"messages": [{"role": "user", "content": "hi"}],
|
||||||
|
"metadata": {},
|
||||||
|
"proxy_server_request": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
raise RuntimeError("boom-with-traceback")
|
||||||
|
except RuntimeError as exc:
|
||||||
|
original_exception = exc
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"litellm.proxy.db.db_spend_update_writer.DBSpendUpdateWriter.update_database",
|
||||||
|
new_callable=AsyncMock,
|
||||||
|
) as mock_update_database:
|
||||||
|
await logger.async_post_call_failure_hook(
|
||||||
|
request_data=request_data,
|
||||||
|
original_exception=original_exception,
|
||||||
|
user_api_key_dict=user_api_key_dict,
|
||||||
|
)
|
||||||
|
call_args = mock_update_database.call_args[1]
|
||||||
|
return call_args["kwargs"]["litellm_params"]["metadata"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_failure_hook_keeps_error_information_traceback_by_default(monkeypatch):
|
||||||
|
"""Without the opt-in env var, the SpendLogs row carries the full traceback."""
|
||||||
|
monkeypatch.delenv("LITELLM_SUPPRESS_SPEND_LOG_TRACEBACKS", raising=False)
|
||||||
|
|
||||||
|
metadata = await _invoke_failure_hook_with_raised_exception()
|
||||||
|
|
||||||
|
error_information = metadata["error_information"]
|
||||||
|
assert error_information["error_class"] == "RuntimeError"
|
||||||
|
assert error_information["error_message"] == "boom-with-traceback"
|
||||||
|
assert error_information["traceback"], "expected a non-empty traceback by default"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_failure_hook_drops_error_information_traceback_when_env_set(
|
||||||
|
monkeypatch,
|
||||||
|
):
|
||||||
|
"""With the opt-in env var, the traceback key is omitted from the
|
||||||
|
SpendLogs row entirely so the per-row Metadata pane in the UI (which
|
||||||
|
renders ``error_information`` as a JSON blob) doesn't show a noisy empty
|
||||||
|
``"traceback": ""`` line. The other fields (error_class / error_message /
|
||||||
|
error_code) are preserved."""
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
|
||||||
|
monkeypatch.setenv("LITELLM_SUPPRESS_SPEND_LOG_TRACEBACKS", "true")
|
||||||
|
original_level = verbose_proxy_logger.level
|
||||||
|
verbose_proxy_logger.setLevel(logging.INFO)
|
||||||
|
try:
|
||||||
|
metadata = await _invoke_failure_hook_with_raised_exception()
|
||||||
|
finally:
|
||||||
|
verbose_proxy_logger.setLevel(original_level)
|
||||||
|
|
||||||
|
error_information = metadata["error_information"]
|
||||||
|
assert "traceback" not in error_information
|
||||||
|
assert error_information["error_class"] == "RuntimeError"
|
||||||
|
assert error_information["error_message"] == "boom-with-traceback"
|
||||||
|
|||||||
@ -0,0 +1,139 @@
|
|||||||
|
"""
|
||||||
|
Unit tests for ``litellm.proxy.spend_tracking.spend_log_error_logger``.
|
||||||
|
|
||||||
|
The helper exists to let proxy operators silence the multi-line stack traces
|
||||||
|
that the spend-tracking machinery normally emits on 4xx/5xx and DB errors.
|
||||||
|
These tests cover:
|
||||||
|
|
||||||
|
* the env-var gating behavior (opt-in, off by default),
|
||||||
|
* the interaction between the env var and the proxy log level (DEBUG always
|
||||||
|
keeps the traceback, INFO/WARNING honors the opt-in), and
|
||||||
|
* the fact that ``spend_log_error`` always emits an ERROR-level record so
|
||||||
|
operators can still see the failure summary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from litellm._logging import verbose_proxy_logger
|
||||||
|
from litellm.proxy.spend_tracking.spend_log_error_logger import (
|
||||||
|
SUPPRESS_SPEND_LOG_TRACEBACKS_ENV,
|
||||||
|
should_suppress_spend_log_tracebacks,
|
||||||
|
spend_log_error,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def reset_env_and_level(monkeypatch):
|
||||||
|
"""Restore both the env var and proxy logger level after each test."""
|
||||||
|
monkeypatch.delenv(SUPPRESS_SPEND_LOG_TRACEBACKS_ENV, raising=False)
|
||||||
|
original_level = verbose_proxy_logger.level
|
||||||
|
yield monkeypatch
|
||||||
|
verbose_proxy_logger.setLevel(original_level)
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_suppress_default_is_false(reset_env_and_level):
|
||||||
|
"""With no env var set, suppression is off so existing operators see no change."""
|
||||||
|
verbose_proxy_logger.setLevel(logging.INFO)
|
||||||
|
assert should_suppress_spend_log_tracebacks() is False
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("value", ["true", "True", "TRUE"])
|
||||||
|
def test_should_suppress_when_env_true_at_info(reset_env_and_level, value):
|
||||||
|
reset_env_and_level.setenv(SUPPRESS_SPEND_LOG_TRACEBACKS_ENV, value)
|
||||||
|
verbose_proxy_logger.setLevel(logging.INFO)
|
||||||
|
assert should_suppress_spend_log_tracebacks() is True
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("value", ["false", "False", "no", "0", "", "garbage"])
|
||||||
|
def test_should_not_suppress_when_env_falsy(reset_env_and_level, value):
|
||||||
|
if value == "":
|
||||||
|
# ``""`` would be ambiguous; ensure the var is genuinely unset.
|
||||||
|
reset_env_and_level.delenv(SUPPRESS_SPEND_LOG_TRACEBACKS_ENV, raising=False)
|
||||||
|
else:
|
||||||
|
reset_env_and_level.setenv(SUPPRESS_SPEND_LOG_TRACEBACKS_ENV, value)
|
||||||
|
verbose_proxy_logger.setLevel(logging.INFO)
|
||||||
|
assert should_suppress_spend_log_tracebacks() is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_debug_level_overrides_suppression(reset_env_and_level):
|
||||||
|
"""DEBUG always shows the traceback even when the env var is set."""
|
||||||
|
reset_env_and_level.setenv(SUPPRESS_SPEND_LOG_TRACEBACKS_ENV, "true")
|
||||||
|
verbose_proxy_logger.setLevel(logging.DEBUG)
|
||||||
|
assert should_suppress_spend_log_tracebacks() is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_spend_log_error_includes_traceback_by_default(reset_env_and_level, caplog):
|
||||||
|
"""Default behavior: ERROR record carries exc_info so the formatter renders it."""
|
||||||
|
verbose_proxy_logger.setLevel(logging.INFO)
|
||||||
|
caplog.set_level(logging.ERROR, logger=verbose_proxy_logger.name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
raise ValueError("boom")
|
||||||
|
except ValueError as e:
|
||||||
|
spend_log_error("update failed: %s", str(e), exc=e)
|
||||||
|
|
||||||
|
assert len(caplog.records) == 1
|
||||||
|
record = caplog.records[0]
|
||||||
|
assert record.levelno == logging.ERROR
|
||||||
|
assert "update failed: boom" in record.getMessage()
|
||||||
|
assert record.exc_info is not None
|
||||||
|
assert record.exc_info[0] is ValueError
|
||||||
|
|
||||||
|
|
||||||
|
def test_spend_log_error_drops_traceback_when_env_set(reset_env_and_level, caplog):
|
||||||
|
"""Opt-in path: ERROR record still emitted, but exc_info is stripped."""
|
||||||
|
reset_env_and_level.setenv(SUPPRESS_SPEND_LOG_TRACEBACKS_ENV, "true")
|
||||||
|
verbose_proxy_logger.setLevel(logging.INFO)
|
||||||
|
caplog.set_level(logging.ERROR, logger=verbose_proxy_logger.name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
raise ValueError("boom")
|
||||||
|
except ValueError as e:
|
||||||
|
spend_log_error("update failed: %s", str(e), exc=e)
|
||||||
|
|
||||||
|
assert len(caplog.records) == 1
|
||||||
|
record = caplog.records[0]
|
||||||
|
assert record.levelno == logging.ERROR
|
||||||
|
assert "update failed: boom" in record.getMessage()
|
||||||
|
assert record.exc_info is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_spend_log_error_keeps_traceback_at_debug_even_with_env(
|
||||||
|
reset_env_and_level, caplog
|
||||||
|
):
|
||||||
|
"""DEBUG operators always get tracebacks; the env var doesn't apply."""
|
||||||
|
reset_env_and_level.setenv(SUPPRESS_SPEND_LOG_TRACEBACKS_ENV, "true")
|
||||||
|
verbose_proxy_logger.setLevel(logging.DEBUG)
|
||||||
|
caplog.set_level(logging.DEBUG, logger=verbose_proxy_logger.name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
raise RuntimeError("boom-at-debug")
|
||||||
|
except RuntimeError as e:
|
||||||
|
spend_log_error("update failed: %s", str(e), exc=e)
|
||||||
|
|
||||||
|
error_records = [r for r in caplog.records if r.levelno == logging.ERROR]
|
||||||
|
assert len(error_records) == 1
|
||||||
|
record = error_records[0]
|
||||||
|
assert record.exc_info is not None
|
||||||
|
assert record.exc_info[0] is RuntimeError
|
||||||
|
|
||||||
|
|
||||||
|
def test_spend_log_error_uses_active_exception_when_exc_omitted(
|
||||||
|
reset_env_and_level, caplog
|
||||||
|
):
|
||||||
|
"""When called inside an ``except`` block without ``exc=``, the active
|
||||||
|
exception's traceback should still be attached."""
|
||||||
|
verbose_proxy_logger.setLevel(logging.INFO)
|
||||||
|
caplog.set_level(logging.ERROR, logger=verbose_proxy_logger.name)
|
||||||
|
|
||||||
|
try:
|
||||||
|
raise KeyError("missing")
|
||||||
|
except KeyError:
|
||||||
|
spend_log_error("update failed without exc kwarg")
|
||||||
|
|
||||||
|
assert len(caplog.records) == 1
|
||||||
|
record = caplog.records[0]
|
||||||
|
assert record.exc_info is not None
|
||||||
|
assert record.exc_info[0] is KeyError
|
||||||
Loading…
Reference in New Issue
Block a user