fix(chat-completions): decode unified file_id when model_file_id_mapping is unavailable (#27406)

* fix(chat-completions): decode unified file_id when model_file_id_mapping is unavailable * fix(chat-completions): tolerate non-dict content items (e.g. token-ids from text_completion)
2026-05-07 22:17:04 +02:00 · 2026-05-07 22:17:04 +02:00 · 3b78a3a545
commit 3b78a3a545
parent b891a201f8
3 changed files with 165 additions and 11 deletions
--- a/litellm/litellm_core_utils/prompt_templates/common_utils.py
+++ b/litellm/litellm_core_utils/prompt_templates/common_utils.py
@ -436,12 +436,21 @@ def update_messages_with_model_file_ids(
    """
    Updates messages with model file ids.

+    For managed files (unified file IDs), uses model_file_id_mapping if it
+    resolves the id, otherwise decodes the base64-encoded unified file ID
+    and extracts the llm_output_file_id directly. Mirrors the Responses-API
+    sibling `update_responses_input_with_model_file_ids`.
+
    model_file_id_mapping: Dict[str, Dict[str, str]] = {
        "litellm_proxy/file_id": {
            "model_id": "provider_file_id"
        }
    }
    """
+    from litellm.proxy.openai_files_endpoints.common_utils import (
+        _is_base64_encoded_unified_file_id,
+        convert_b64_uid_to_unified_uid,
+    )

    for message in messages:
        if message.get("role") == "user":
@ -450,7 +459,13 @@ def update_messages_with_model_file_ids(
                if isinstance(content, str):
                    continue
                for c in content:
-                    if c["type"] == "file":
+                    if not isinstance(c, dict):
+                        # Content list items aren't always dicts. e.g.
+                        # text_completion forwards a token-ids list/list-of-
+                        # lists through this path. Skip non-dict items
+                        # instead of indexing into them.
+                        continue
+                    if c.get("type") == "file":
                        file_object = cast(ChatCompletionFileObject, c)
                        file_object_file_field = file_object.get("file")
                        if not isinstance(file_object_file_field, dict):
@ -468,9 +483,23 @@ def update_messages_with_model_file_ids(
                        if file_id:
                            provider_file_id = (
                                model_file_id_mapping.get(file_id, {}).get(model_id)
-                                or file_id
+                                if model_file_id_mapping
+                                else None
+                            )
+                            if (
+                                not provider_file_id
+                                and _is_base64_encoded_unified_file_id(file_id)
+                            ):
+                                unified_file_id = convert_b64_uid_to_unified_uid(
+                                    file_id
+                                )
+                                if "llm_output_file_id," in unified_file_id:
+                                    provider_file_id = unified_file_id.split(
+                                        "llm_output_file_id,"
+                                    )[1].split(";")[0]
+                            file_object_file_field["file_id"] = (
+                                provider_file_id or file_id
                            )
-                            file_object_file_field["file_id"] = provider_file_id
                        if format:
                            file_object_file_field["format"] = format
    return messages
--- a/litellm/main.py
+++ b/litellm/main.py
@ -1459,14 +1459,14 @@ def completion(  # type: ignore # noqa: PLR0915
            if eos_token:
                custom_prompt_dict[model]["eos_token"] = eos_token

-        if kwargs.get("model_file_id_mapping"):
-            messages = update_messages_with_model_file_ids(
-                messages=messages,
-                model_id=kwargs.get("model_info", {}).get("id", None),
-                model_file_id_mapping=cast(
-                    Dict[str, Dict[str, str]], kwargs.get("model_file_id_mapping")
-                ),
-            )
+        messages = update_messages_with_model_file_ids(
+            messages=messages,
+            model_id=kwargs.get("model_info", {}).get("id", None),
+            model_file_id_mapping=cast(
+                Dict[str, Dict[str, str]],
+                kwargs.get("model_file_id_mapping") or {},
+            ),
+        )

        provider_config: Optional[BaseConfig] = None
        if custom_llm_provider is not None and custom_llm_provider in [
--- a/tests/test_litellm/litellm_core_utils/prompt_templates/test_litellm_core_utils_prompt_templates_common_utils.py
+++ b/tests/test_litellm/litellm_core_utils/prompt_templates/test_litellm_core_utils_prompt_templates_common_utils.py
@ -367,3 +367,128 @@ def test_update_messages_with_model_file_ids_skips_non_openai_file_blocks():

    # Messages pass through unchanged when there is no `file` sub-dict to remap.
    assert updated == messages
+
+
+# Reusable fixture (decodes to: litellm_proxy:application/pdf;unified_id,...;
+# target_model_names,gpt-4o;llm_output_file_id,file-ECBPW7ML9g7XHdwGgUPZaM;
+# llm_output_file_model_id,...)
+UNIFIED_FILE_ID_B64 = (
+    "bGl0ZWxsbV9wcm94eTphcHBsaWNhdGlvbi9wZGY7dW5pZmllZF9pZCw2YzBiNTg5MC04OTE0"
+    "LTQ4ZTAtYjhmNC0wYWU1ZWQzYzE0YTU7dGFyZ2V0X21vZGVsX25hbWVzLGdwdC00bztsbG1f"
+    "b3V0cHV0X2ZpbGVfaWQsZmlsZS1FQ0JQVzdNTDlnN1hIZHdHZ1VQWmFNO2xsbV9vdXRwdXRf"
+    "ZmlsZV9tb2RlbF9pZCxlMjY0NTNmOWU3NmU3OTkzNjgwZDAwNjhkOThjMWY0Y2MyMDViYmFk"
+    "MDk2N2EzM2M2NjQ4OTM1NjhjYTc0M2My"
+)
+
+
+def test_update_messages_with_model_file_ids_decodes_unified_id_when_mapping_empty():
+    """When the mapping is empty (e.g. multi-replica cache miss), the function
+    must decode the base64-encoded unified file id and substitute the embedded
+    llm_output_file_id — mirroring the Responses-API sibling."""
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What is in this recording?"},
+                {
+                    "type": "file",
+                    "file": {
+                        "file_id": UNIFIED_FILE_ID_B64,
+                        "format": "audio/wav",
+                    },
+                },
+            ],
+        }
+    ]
+
+    updated = update_messages_with_model_file_ids(messages, "any-model-id", {})
+
+    assert updated[0]["content"][1]["file"]["file_id"] == "file-ECBPW7ML9g7XHdwGgUPZaM"
+    # Customer-supplied format is preserved (this is the field whose absence
+    # the misleading error message used to complain about).
+    assert updated[0]["content"][1]["file"]["format"] == "audio/wav"
+
+
+def test_update_messages_with_model_file_ids_mapping_takes_precedence_over_decode():
+    """When both mapping and decode would resolve, the mapping must win
+    (preserves per-deployment routing precision)."""
+    mapping = {UNIFIED_FILE_ID_B64: {"model-A": "mapped-provider-file-id"}}
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "file",
+                    "file": {
+                        "file_id": UNIFIED_FILE_ID_B64,
+                        "format": "application/pdf",
+                    },
+                },
+            ],
+        }
+    ]
+
+    updated = update_messages_with_model_file_ids(messages, "model-A", mapping)
+
+    assert updated[0]["content"][0]["file"]["file_id"] == "mapped-provider-file-id"
+
+
+def test_update_messages_with_model_file_ids_non_unified_passes_through():
+    """A raw provider id (e.g. gs:// URI or a random string) must be left
+    untouched when the mapping doesn't resolve it. The decode fallback must
+    not corrupt non-unified ids."""
+    raw_id = "gs://my-bucket/uploads/abc-123.wav"
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "file", "file": {"file_id": raw_id, "format": "audio/wav"}},
+            ],
+        }
+    ]
+
+    updated = update_messages_with_model_file_ids(messages, "model-A", {})
+
+    assert updated[0]["content"][0]["file"]["file_id"] == raw_id
+
+
+def test_update_messages_with_model_file_ids_mapping_miss_falls_back_to_decode():
+    """A mapping that exists but doesn't contain this file_id should still
+    trigger the decode fallback — covers the case where the hook resolved
+    *some* ids but not this one."""
+    other_id = "some-other-file-id"
+    mapping = {other_id: {"model-A": "other-provider-id"}}
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "file",
+                    "file": {"file_id": UNIFIED_FILE_ID_B64, "format": "audio/wav"},
+                },
+            ],
+        }
+    ]
+
+    updated = update_messages_with_model_file_ids(messages, "model-A", mapping)
+
+    assert updated[0]["content"][0]["file"]["file_id"] == "file-ECBPW7ML9g7XHdwGgUPZaM"
+
+
+def test_update_messages_with_model_file_ids_tolerates_non_dict_content_items():
+    """Content list items aren't always dicts. text_completion forwards
+    token-ids (list of ints, or list of list of ints for batch) through
+    this path. The function must skip non-dict items instead of indexing
+    into them."""
+    messages_token_ids = [{"role": "user", "content": [15496, 995]}]
+    messages_token_ids_batch = [{"role": "user", "content": [[15496, 995], [9906, 0]]}]
+
+    # Both should pass through unchanged without raising.
+    assert (
+        update_messages_with_model_file_ids(messages_token_ids, "model-A", {})
+        == messages_token_ids
+    )
+    assert (
+        update_messages_with_model_file_ids(messages_token_ids_batch, "model-A", {})
+        == messages_token_ids_batch
+    )