From efaafbbd025c55657f0783012c827bf7693f00d5 Mon Sep 17 00:00:00 2001
From: milan-berri <milan@berri.ai>
Date: Tue, 2 Jun 2026 22:07:11 +0300
Subject: [PATCH] fix(proxy): strip NUL bytes from spend log payloads to
 prevent PostgreSQL 22P05 (#29515)

A raw NUL byte (\x00) in request/response content is serialized by json.dumps
into the \u0000 JSON escape. When update_spend_logs writes this to the
LiteLLM_SpendLogs jsonb columns, Postgres rejects the whole batch with
error 22P05 ("unsupported Unicode escape sequence ... cannot be converted to
text"), crashing the periodic update_spend job and dropping the spend-log batch.

Centralize stripping in safe_dumps (covers metadata/response paths and any
future caller) and route the messages, proxy_server_request, request_tags, and
response (string branch) payloads through it instead of json.dumps. Dict keys
are stripped too.

Adds regression tests for safe_dumps and the spend-log message, response, and
request_tags payload builders.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 litellm/litellm_core_utils/safe_json_dumps.py | 14 +++-
 .../spend_tracking/spend_tracking_utils.py    | 12 ++--
 .../test_safe_json_dumps.py                   | 36 ++++++++++-
 .../test_spend_tracking_utils.py              | 64 +++++++++++++++++++
 4 files changed, 116 insertions(+), 10 deletions(-)

diff --git a/litellm/litellm_core_utils/safe_json_dumps.py b/litellm/litellm_core_utils/safe_json_dumps.py
index 051aa2f27a..154306d01b 100644
--- a/litellm/litellm_core_utils/safe_json_dumps.py
+++ b/litellm/litellm_core_utils/safe_json_dumps.py
@@ -6,10 +6,16 @@ from pydantic import BaseModel
 from litellm.constants import DEFAULT_MAX_RECURSE_DEPTH
 
 
+def strip_null_bytes(value: str) -> str:
+    """Strip NUL bytes, which PostgreSQL text/jsonb columns reject (error 22P05)."""
+    return value.replace("\x00", "")
+
+
 def safe_dumps(data: Any, max_depth: int = DEFAULT_MAX_RECURSE_DEPTH) -> str:
     """
     Recursively serialize data while detecting circular references.
     If a circular reference is detected then a marker string is returned.
+    NUL bytes are stripped from strings to prevent PostgreSQL 22P05 errors.
     """
 
     def _serialize(obj: Any, seen: set, depth: int) -> Any:
@@ -17,7 +23,9 @@ def safe_dumps(data: Any, max_depth: int = DEFAULT_MAX_RECURSE_DEPTH) -> str:
         if depth > max_depth:
             return "MaxDepthExceeded"
         # Base-case: if it is a primitive, simply return it.
-        if isinstance(obj, (str, int, float, bool, type(None))):
+        if isinstance(obj, str):
+            return strip_null_bytes(obj)
+        if isinstance(obj, (int, float, bool, type(None))):
             return obj
         # Check for circular reference.
         if id(obj) in seen:
@@ -28,7 +36,7 @@ def safe_dumps(data: Any, max_depth: int = DEFAULT_MAX_RECURSE_DEPTH) -> str:
             result = {}
             for k, v in obj.items():
                 if isinstance(k, (str)):
-                    result[k] = _serialize(v, seen, depth + 1)
+                    result[strip_null_bytes(k)] = _serialize(v, seen, depth + 1)
             seen.remove(id(obj))
             return result
         elif isinstance(obj, list):
@@ -51,7 +59,7 @@ def safe_dumps(data: Any, max_depth: int = DEFAULT_MAX_RECURSE_DEPTH) -> str:
         else:
             # Fall back to string conversion for non-serializable objects.
             try:
-                return str(obj)
+                return strip_null_bytes(str(obj))
             except Exception:
                 return "Unserializable Object"
 
diff --git a/litellm/proxy/spend_tracking/spend_tracking_utils.py b/litellm/proxy/spend_tracking/spend_tracking_utils.py
index e2881faca0..d215294fd0 100644
--- a/litellm/proxy/spend_tracking/spend_tracking_utils.py
+++ b/litellm/proxy/spend_tracking/spend_tracking_utils.py
@@ -24,7 +24,7 @@ from litellm.litellm_core_utils.core_helpers import (
     get_litellm_metadata_from_kwargs,
     reconstruct_model_name,
 )
-from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
+from litellm.litellm_core_utils.safe_json_dumps import safe_dumps, strip_null_bytes
 from litellm.proxy._types import SpendLogsMetadata, SpendLogsPayload
 from litellm.proxy.spend_tracking.spend_log_error_logger import spend_log_error
 from litellm.proxy.utils import PrismaClient, hash_token
@@ -304,7 +304,7 @@ def get_logging_payload(  # noqa: PLR0915
     # BUG FIX: Don't overwrite api_key when standard_logging_payload is None
     # The api_key was already extracted from metadata (line 243) and hashed (lines 256-259)
     request_tags = (
-        json.dumps(metadata.get("tags", []))
+        safe_dumps(metadata.get("tags", []))
         if isinstance(metadata.get("tags", []), list)
         else "[]"
     )
@@ -312,7 +312,7 @@ def get_logging_payload(  # noqa: PLR0915
         standard_logging_payload is not None
         and standard_logging_payload.get("request_tags") is not None
     ):  # use 'tags' from standard logging payload instead
-        request_tags = json.dumps(standard_logging_payload["request_tags"])
+        request_tags = safe_dumps(standard_logging_payload["request_tags"])
 
     _model_id = metadata.get("model_info", {}).get("id", "")
     _model_group = metadata.get("model_group", "")
@@ -606,7 +606,7 @@ def _get_messages_for_spend_logs_payload(
                 messages = standard_logging_payload.get("messages")
                 if messages is not None:
                     try:
-                        return json.dumps(messages, default=str)
+                        return safe_dumps(messages)
                     except Exception:
                         return "{}"
     return "{}"
@@ -976,7 +976,7 @@ def _get_proxy_server_request_for_spend_logs_payload(
                     perform_redaction(model_call_details=_request_body, result=None)
 
             _request_body = _sanitize_request_body_for_spend_logs_payload(_request_body)
-            _request_body_json_str = json.dumps(_request_body, default=str)
+            _request_body_json_str = safe_dumps(_request_body)
             if LITELLM_TRUNCATED_PAYLOAD_FIELD in _request_body_json_str:
                 verbose_proxy_logger.info(
                     "Spend Log: request body was truncated before storing in DB. %s",
@@ -1059,7 +1059,7 @@ def _get_response_for_spend_logs_payload(
         if sanitized_response is None:
             return "{}"
         if isinstance(sanitized_response, str):
-            result_str = sanitized_response
+            result_str = strip_null_bytes(sanitized_response)
         else:
             result_str = safe_dumps(sanitized_response)
         if LITELLM_TRUNCATED_PAYLOAD_FIELD in result_str:
diff --git a/tests/test_litellm/litellm_core_utils/test_safe_json_dumps.py b/tests/test_litellm/litellm_core_utils/test_safe_json_dumps.py
index c71a229cca..74574370e4 100644
--- a/tests/test_litellm/litellm_core_utils/test_safe_json_dumps.py
+++ b/tests/test_litellm/litellm_core_utils/test_safe_json_dumps.py
@@ -8,7 +8,7 @@ sys.path.insert(
     0, os.path.abspath("../../..")
 )  # Adds the parent directory to the system path
 
-from litellm.litellm_core_utils.safe_json_dumps import safe_dumps
+from litellm.litellm_core_utils.safe_json_dumps import safe_dumps, strip_null_bytes
 
 
 def test_primitive_types():
@@ -140,6 +140,40 @@ def test_non_standard_dict_keys_complex():
         raise e
 
 
+def test_strip_null_bytes_helper():
+    assert strip_null_bytes("hello\x00world") == "helloworld"
+    assert strip_null_bytes("\x00\x00abc\x00") == "abc"
+    assert strip_null_bytes("no null here") == "no null here"
+
+
+def test_null_byte_stripped_from_string():
+    out = safe_dumps("hello\x00world")
+    assert "\\u0000" not in out
+    assert json.loads(out) == "helloworld"
+
+
+def test_null_byte_stripped_in_nested_structure():
+    data = {
+        "messages": [{"role": "user", "content": "bad\x00content"}],
+        "nested": {"k\x00ey": "v\x00alue"},
+    }
+    out = safe_dumps(data)
+    assert "\\u0000" not in out
+    result = json.loads(out)
+    assert result["messages"][0]["content"] == "badcontent"
+    assert result["nested"] == {"key": "value"}
+
+
+def test_null_byte_stripped_in_fallback_str():
+    class WithNullStr:
+        def __str__(self):
+            return "obj\x00repr"
+
+    out = safe_dumps({"obj": WithNullStr()})
+    assert "\\u0000" not in out
+    assert json.loads(out)["obj"] == "objrepr"
+
+
 def test_pydantic_base_model():
     from pydantic import BaseModel
 
diff --git a/tests/test_litellm/proxy/spend_tracking/test_spend_tracking_utils.py b/tests/test_litellm/proxy/spend_tracking/test_spend_tracking_utils.py
index 5ca058fc8d..0c7511589d 100644
--- a/tests/test_litellm/proxy/spend_tracking/test_spend_tracking_utils.py
+++ b/tests/test_litellm/proxy/spend_tracking/test_spend_tracking_utils.py
@@ -300,6 +300,25 @@ def test_get_messages_for_spend_logs_realtime_returns_messages(mock_should_store
     assert parsed[1]["content"] == "What is the weather today?"
 
 
+@patch(
+    "litellm.proxy.spend_tracking.spend_tracking_utils._should_store_prompts_and_responses_in_spend_logs"
+)
+def test_get_messages_for_spend_logs_strips_null_bytes(mock_should_store):
+    """Regression for PostgreSQL 22P05: NUL bytes must be stripped from messages."""
+    mock_should_store.return_value = True
+    payload = cast(
+        StandardLoggingPayload,
+        {
+            "call_type": "_arealtime",
+            "messages": [{"role": "user", "content": "hello\x00world"}],
+        },
+    )
+    result = _get_messages_for_spend_logs_payload(payload)
+    assert "\\u0000" not in result
+    parsed = json.loads(result)
+    assert parsed[0]["content"] == "helloworld"
+
+
 @patch(
     "litellm.proxy.spend_tracking.spend_tracking_utils._should_store_prompts_and_responses_in_spend_logs"
 )
@@ -370,6 +389,21 @@ def test_get_response_for_spend_logs_payload_truncates_large_base64(mock_should_
     assert parsed["data"][0]["other_field"] == "value"
 
 
+@patch(
+    "litellm.proxy.spend_tracking.spend_tracking_utils._should_store_prompts_and_responses_in_spend_logs"
+)
+def test_get_response_for_spend_logs_payload_strips_null_bytes(mock_should_store):
+    """Regression for PostgreSQL 22P05: NUL bytes must be stripped from response."""
+    mock_should_store.return_value = True
+    payload = cast(
+        StandardLoggingPayload,
+        {"response": {"content": "answer\x00here"}},
+    )
+    response_json = _get_response_for_spend_logs_payload(payload)
+    assert "\\u0000" not in response_json
+    assert json.loads(response_json)["content"] == "answerhere"
+
+
 @patch(
     "litellm.proxy.spend_tracking.spend_tracking_utils._should_store_prompts_and_responses_in_spend_logs"
 )
@@ -936,6 +970,36 @@ def test_get_logging_payload_includes_overhead_in_spend_logs_metadata():
     ), f"Expected overhead '{test_overhead_ms}', got '{metadata.get('litellm_overhead_time_ms')}'"
 
 
+@patch("litellm.proxy.proxy_server.master_key", None)
+@patch("litellm.proxy.proxy_server.general_settings", {})
+def test_get_logging_payload_strips_null_bytes_from_request_tags():
+    """Regression for PostgreSQL 22P05: NUL bytes must be stripped from request_tags."""
+    kwargs = {
+        "model": "gpt-3.5-turbo",
+        "litellm_params": {
+            "metadata": {
+                "user_api_key": "sk-test-key",
+                "tags": ["clean-tag", "bad\x00tag"],
+            }
+        },
+    }
+
+    start_time = datetime.datetime.now(timezone.utc)
+    end_time = datetime.datetime.now(timezone.utc)
+
+    payload = get_logging_payload(
+        kwargs=kwargs,
+        response_obj={},
+        start_time=start_time,
+        end_time=end_time,
+    )
+
+    request_tags = payload.get("request_tags")
+    assert request_tags is not None
+    assert "\\u0000" not in request_tags
+    assert json.loads(request_tags) == ["clean-tag", "badtag"]
+
+
 @patch("litellm.proxy.proxy_server.master_key", None)
 @patch("litellm.proxy.proxy_server.general_settings", {})
 def test_get_logging_payload_handles_missing_overhead_gracefully():