fix(chat-completions): decode unified file_id when model_file_id_mapping is unavailable (#27406)

* fix(chat-completions): decode unified file_id when model_file_id_mapping is unavailable

* fix(chat-completions): tolerate non-dict content items (e.g. token-ids from text_completion)
This commit is contained in:
michelligabriele 2026-05-07 22:17:04 +02:00 committed by GitHub
parent b891a201f8
commit 3b78a3a545
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 165 additions and 11 deletions

View File

@ -436,12 +436,21 @@ def update_messages_with_model_file_ids(
"""
Updates messages with model file ids.
For managed files (unified file IDs), uses model_file_id_mapping if it
resolves the id, otherwise decodes the base64-encoded unified file ID
and extracts the llm_output_file_id directly. Mirrors the Responses-API
sibling `update_responses_input_with_model_file_ids`.
model_file_id_mapping: Dict[str, Dict[str, str]] = {
"litellm_proxy/file_id": {
"model_id": "provider_file_id"
}
}
"""
from litellm.proxy.openai_files_endpoints.common_utils import (
_is_base64_encoded_unified_file_id,
convert_b64_uid_to_unified_uid,
)
for message in messages:
if message.get("role") == "user":
@ -450,7 +459,13 @@ def update_messages_with_model_file_ids(
if isinstance(content, str):
continue
for c in content:
if c["type"] == "file":
if not isinstance(c, dict):
# Content list items aren't always dicts. e.g.
# text_completion forwards a token-ids list/list-of-
# lists through this path. Skip non-dict items
# instead of indexing into them.
continue
if c.get("type") == "file":
file_object = cast(ChatCompletionFileObject, c)
file_object_file_field = file_object.get("file")
if not isinstance(file_object_file_field, dict):
@ -468,9 +483,23 @@ def update_messages_with_model_file_ids(
if file_id:
provider_file_id = (
model_file_id_mapping.get(file_id, {}).get(model_id)
or file_id
if model_file_id_mapping
else None
)
if (
not provider_file_id
and _is_base64_encoded_unified_file_id(file_id)
):
unified_file_id = convert_b64_uid_to_unified_uid(
file_id
)
if "llm_output_file_id," in unified_file_id:
provider_file_id = unified_file_id.split(
"llm_output_file_id,"
)[1].split(";")[0]
file_object_file_field["file_id"] = (
provider_file_id or file_id
)
file_object_file_field["file_id"] = provider_file_id
if format:
file_object_file_field["format"] = format
return messages

View File

@ -1459,14 +1459,14 @@ def completion( # type: ignore # noqa: PLR0915
if eos_token:
custom_prompt_dict[model]["eos_token"] = eos_token
if kwargs.get("model_file_id_mapping"):
messages = update_messages_with_model_file_ids(
messages=messages,
model_id=kwargs.get("model_info", {}).get("id", None),
model_file_id_mapping=cast(
Dict[str, Dict[str, str]], kwargs.get("model_file_id_mapping")
),
)
messages = update_messages_with_model_file_ids(
messages=messages,
model_id=kwargs.get("model_info", {}).get("id", None),
model_file_id_mapping=cast(
Dict[str, Dict[str, str]],
kwargs.get("model_file_id_mapping") or {},
),
)
provider_config: Optional[BaseConfig] = None
if custom_llm_provider is not None and custom_llm_provider in [

View File

@ -367,3 +367,128 @@ def test_update_messages_with_model_file_ids_skips_non_openai_file_blocks():
# Messages pass through unchanged when there is no `file` sub-dict to remap.
assert updated == messages
# Reusable fixture (decodes to: litellm_proxy:application/pdf;unified_id,...;
# target_model_names,gpt-4o;llm_output_file_id,file-ECBPW7ML9g7XHdwGgUPZaM;
# llm_output_file_model_id,...)
UNIFIED_FILE_ID_B64 = (
"bGl0ZWxsbV9wcm94eTphcHBsaWNhdGlvbi9wZGY7dW5pZmllZF9pZCw2YzBiNTg5MC04OTE0"
"LTQ4ZTAtYjhmNC0wYWU1ZWQzYzE0YTU7dGFyZ2V0X21vZGVsX25hbWVzLGdwdC00bztsbG1f"
"b3V0cHV0X2ZpbGVfaWQsZmlsZS1FQ0JQVzdNTDlnN1hIZHdHZ1VQWmFNO2xsbV9vdXRwdXRf"
"ZmlsZV9tb2RlbF9pZCxlMjY0NTNmOWU3NmU3OTkzNjgwZDAwNjhkOThjMWY0Y2MyMDViYmFk"
"MDk2N2EzM2M2NjQ4OTM1NjhjYTc0M2My"
)
def test_update_messages_with_model_file_ids_decodes_unified_id_when_mapping_empty():
"""When the mapping is empty (e.g. multi-replica cache miss), the function
must decode the base64-encoded unified file id and substitute the embedded
llm_output_file_id mirroring the Responses-API sibling."""
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "What is in this recording?"},
{
"type": "file",
"file": {
"file_id": UNIFIED_FILE_ID_B64,
"format": "audio/wav",
},
},
],
}
]
updated = update_messages_with_model_file_ids(messages, "any-model-id", {})
assert updated[0]["content"][1]["file"]["file_id"] == "file-ECBPW7ML9g7XHdwGgUPZaM"
# Customer-supplied format is preserved (this is the field whose absence
# the misleading error message used to complain about).
assert updated[0]["content"][1]["file"]["format"] == "audio/wav"
def test_update_messages_with_model_file_ids_mapping_takes_precedence_over_decode():
"""When both mapping and decode would resolve, the mapping must win
(preserves per-deployment routing precision)."""
mapping = {UNIFIED_FILE_ID_B64: {"model-A": "mapped-provider-file-id"}}
messages = [
{
"role": "user",
"content": [
{
"type": "file",
"file": {
"file_id": UNIFIED_FILE_ID_B64,
"format": "application/pdf",
},
},
],
}
]
updated = update_messages_with_model_file_ids(messages, "model-A", mapping)
assert updated[0]["content"][0]["file"]["file_id"] == "mapped-provider-file-id"
def test_update_messages_with_model_file_ids_non_unified_passes_through():
"""A raw provider id (e.g. gs:// URI or a random string) must be left
untouched when the mapping doesn't resolve it. The decode fallback must
not corrupt non-unified ids."""
raw_id = "gs://my-bucket/uploads/abc-123.wav"
messages = [
{
"role": "user",
"content": [
{"type": "file", "file": {"file_id": raw_id, "format": "audio/wav"}},
],
}
]
updated = update_messages_with_model_file_ids(messages, "model-A", {})
assert updated[0]["content"][0]["file"]["file_id"] == raw_id
def test_update_messages_with_model_file_ids_mapping_miss_falls_back_to_decode():
"""A mapping that exists but doesn't contain this file_id should still
trigger the decode fallback covers the case where the hook resolved
*some* ids but not this one."""
other_id = "some-other-file-id"
mapping = {other_id: {"model-A": "other-provider-id"}}
messages = [
{
"role": "user",
"content": [
{
"type": "file",
"file": {"file_id": UNIFIED_FILE_ID_B64, "format": "audio/wav"},
},
],
}
]
updated = update_messages_with_model_file_ids(messages, "model-A", mapping)
assert updated[0]["content"][0]["file"]["file_id"] == "file-ECBPW7ML9g7XHdwGgUPZaM"
def test_update_messages_with_model_file_ids_tolerates_non_dict_content_items():
"""Content list items aren't always dicts. text_completion forwards
token-ids (list of ints, or list of list of ints for batch) through
this path. The function must skip non-dict items instead of indexing
into them."""
messages_token_ids = [{"role": "user", "content": [15496, 995]}]
messages_token_ids_batch = [{"role": "user", "content": [[15496, 995], [9906, 0]]}]
# Both should pass through unchanged without raising.
assert (
update_messages_with_model_file_ids(messages_token_ids, "model-A", {})
== messages_token_ids
)
assert (
update_messages_with_model_file_ids(messages_token_ids_batch, "model-A", {})
== messages_token_ids_batch
)