Title: fix(proxy): resolve vector store file list credentials from team deployments (#29739)
* fix(proxy): resolve vector store file list credentials from team deployments
GET /v1/vector_stores/{id}/files now uses the same router credential routing as POST, including JWT team model hints and wildcard model selectors, so list requests no longer call OpenAI with Bearer None.
Co-authored-by: Cursor <cursoragent@cursor.com>
* fix(proxy): authorize model hints and fix credential routing for vector store file list
Resolves three review findings on the vector store file list path.
Authorize user-controlled model hints (?model= query param and the
x-litellm-model header) against the key's and team's allowed models via
can_key_call_model / _can_object_call_model before any deployment
credentials are resolved, closing a model access bypass where a normal
key could file-list using a restricted deployment's provider credentials.
Run the managed vector store registry resolution before the model routing
hint so the managed store sets the routing model first; the hint resolver
then selects credentials matching that model instead of a team fallback
deployment, avoiding a credential/model mismatch across deployments.
Skip team-fallback deployments whose provider cannot be determined instead
of treating them as OpenAI, so a deployment without an explicit
custom_llm_provider or "openai/" prefix no longer has its credentials
injected.
* fix(proxy): enforce vector store file model auth
Ensure vector store file listing routes authorize explicit and inferred model routing before resolving deployment credentials.
Co-authored-by: Cursor <cursoragent@cursor.com>
* fix(proxy): type guard vector store model hints
Keep vector store model hint authorization typed to string-only values so static checks pass.
Co-authored-by: Cursor <cursoragent@cursor.com>
---------
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
1fbb78d2a4
commit
fdade8a84e
@ -5,6 +5,7 @@ from fastapi.responses import ORJSONResponse
|
||||
|
||||
import litellm
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.proxy.auth.auth_checks import _can_object_call_model, can_key_call_model
|
||||
from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
|
||||
from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
|
||||
from litellm.proxy.common_utils.openai_endpoint_utils import (
|
||||
@ -191,6 +192,148 @@ def _replace_file_id_in_response(response, original_file_id: str):
|
||||
return response
|
||||
|
||||
|
||||
async def _authorize_model_routing_hint(
|
||||
*,
|
||||
model: str,
|
||||
llm_router: Optional["Router"],
|
||||
user_api_key_dict: Optional[UserAPIKeyAuth],
|
||||
) -> None:
|
||||
if user_api_key_dict is None:
|
||||
return
|
||||
|
||||
key_models = getattr(user_api_key_dict, "models", None)
|
||||
if not (isinstance(key_models, list) and "all-team-models" in key_models):
|
||||
await can_key_call_model(
|
||||
model=model,
|
||||
llm_model_list=None,
|
||||
valid_token=user_api_key_dict,
|
||||
llm_router=llm_router,
|
||||
)
|
||||
|
||||
team_models = getattr(user_api_key_dict, "team_models", None)
|
||||
if isinstance(team_models, list) and len(team_models) > 0:
|
||||
_can_object_call_model(
|
||||
model=model,
|
||||
llm_router=llm_router,
|
||||
models=team_models,
|
||||
team_model_aliases=user_api_key_dict.team_model_aliases,
|
||||
team_id=user_api_key_dict.team_id,
|
||||
object_type="team",
|
||||
)
|
||||
|
||||
|
||||
async def _update_request_data_with_model_routing_hint(
|
||||
data: Dict,
|
||||
request: Request,
|
||||
llm_router: Optional["Router"] = None,
|
||||
user_api_key_dict: Optional[UserAPIKeyAuth] = None,
|
||||
) -> Dict:
|
||||
if data.get("api_key") is not None or data.get("api_base") is not None:
|
||||
return data
|
||||
|
||||
user_controlled_model_hint = request.query_params.get(
|
||||
"model"
|
||||
) or request.headers.get("x-litellm-model")
|
||||
model_hint = data.get("model") or user_controlled_model_hint
|
||||
should_authorize_model_hint = (
|
||||
isinstance(model_hint, str) and model_hint == user_controlled_model_hint
|
||||
)
|
||||
|
||||
should_route = False
|
||||
credentials = None
|
||||
if isinstance(model_hint, str) and "*" in model_hint:
|
||||
if llm_router is not None:
|
||||
if should_authorize_model_hint:
|
||||
await _authorize_model_routing_hint(
|
||||
model=model_hint,
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
credentials = llm_router.get_deployment_credentials_with_provider(
|
||||
model_id=model_hint
|
||||
)
|
||||
should_route = credentials is not None
|
||||
else:
|
||||
if isinstance(model_hint, str) and should_authorize_model_hint:
|
||||
await _authorize_model_routing_hint(
|
||||
model=model_hint,
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
(
|
||||
should_route,
|
||||
_model_used,
|
||||
_original_file_id,
|
||||
credentials,
|
||||
) = handle_model_based_routing(
|
||||
file_id="",
|
||||
request=request,
|
||||
llm_router=llm_router,
|
||||
data=data,
|
||||
check_file_id_encoding=False,
|
||||
)
|
||||
|
||||
if should_route and credentials is not None:
|
||||
prepare_data_with_credentials(
|
||||
data=data,
|
||||
credentials=credentials,
|
||||
)
|
||||
return data
|
||||
|
||||
if llm_router is None or user_api_key_dict is None:
|
||||
return data
|
||||
|
||||
team_models = getattr(user_api_key_dict, "team_models", None) or []
|
||||
if not isinstance(team_models, list):
|
||||
return data
|
||||
|
||||
model_names_to_check = []
|
||||
for model_name in team_models:
|
||||
if not isinstance(model_name, str) or model_name in {
|
||||
"all-team-models",
|
||||
"all-proxy-models",
|
||||
"no-default-models",
|
||||
}:
|
||||
continue
|
||||
model_names_to_check.append(model_name)
|
||||
|
||||
openai_credentials = None
|
||||
for model_name in model_names_to_check:
|
||||
credentials = llm_router.get_deployment_credentials_with_provider(
|
||||
model_id=model_name
|
||||
)
|
||||
if credentials is None:
|
||||
continue
|
||||
|
||||
provider = credentials.get("custom_llm_provider")
|
||||
model = credentials.get("model")
|
||||
if provider is None and isinstance(model, str) and "/" in model:
|
||||
provider = model.split("/", 1)[0]
|
||||
if provider != LlmProviders.OPENAI.value:
|
||||
continue
|
||||
|
||||
await _authorize_model_routing_hint(
|
||||
model=model_name,
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
if openai_credentials is not None:
|
||||
return data
|
||||
openai_credentials = credentials
|
||||
|
||||
if openai_credentials is not None:
|
||||
prepare_data_with_credentials(data=data, credentials=openai_credentials)
|
||||
elif len(model_names_to_check) == 1:
|
||||
await _authorize_model_routing_hint(
|
||||
model=model_names_to_check[0],
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
data["model"] = model_names_to_check[0]
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def _update_request_data_with_litellm_managed_vector_store_registry(
|
||||
data: Dict,
|
||||
vector_store_id: str,
|
||||
@ -488,6 +631,13 @@ async def vector_store_file_list(
|
||||
should_lookup_registry=False,
|
||||
)
|
||||
|
||||
data = await _update_request_data_with_model_routing_hint(
|
||||
data=data,
|
||||
request=request,
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
provider_enum = await _resolve_provider(data=data, request=request)
|
||||
|
||||
_maybe_check_permissions(
|
||||
|
||||
@ -21,6 +21,9 @@ from litellm.proxy.vector_store_endpoints.endpoints import (
|
||||
_update_request_data_with_litellm_managed_vector_store_registry,
|
||||
index_create,
|
||||
)
|
||||
from litellm.proxy.vector_store_files_endpoints.endpoints import (
|
||||
_update_request_data_with_model_routing_hint,
|
||||
)
|
||||
from litellm.proxy.vector_store_endpoints.management_endpoints import (
|
||||
_check_vector_store_access,
|
||||
_resolve_embedding_config,
|
||||
@ -144,6 +147,309 @@ def test_router_vector_store_file_delete_passes_correct_args():
|
||||
assert call_kwargs["custom_llm_provider"] == "openai"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vector_store_file_list_resolves_credentials_from_model_query_param():
|
||||
request = MagicMock(spec=Request)
|
||||
request.query_params = {"model": "team-openai"}
|
||||
request.headers = {}
|
||||
|
||||
llm_router = MagicMock()
|
||||
llm_router.get_deployment_credentials_with_provider.return_value = {
|
||||
"api_key": "sk-team-openai",
|
||||
"api_base": "https://api.openai.com/v1",
|
||||
"custom_llm_provider": "openai",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
}
|
||||
|
||||
data = {
|
||||
"vector_store_id": "vs_123",
|
||||
"limit": "20",
|
||||
}
|
||||
|
||||
result = await _update_request_data_with_model_routing_hint(
|
||||
data=data,
|
||||
request=request,
|
||||
llm_router=llm_router,
|
||||
)
|
||||
|
||||
assert result["api_key"] == "sk-team-openai"
|
||||
assert result["api_base"] == "https://api.openai.com/v1"
|
||||
assert result["model"] == "openai/gpt-4o-mini"
|
||||
assert "custom_llm_provider" not in result
|
||||
llm_router.get_deployment_credentials_with_provider.assert_called_once_with(
|
||||
model_id="team-openai"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vector_store_file_list_resolves_single_openai_team_deployment():
|
||||
request = MagicMock(spec=Request)
|
||||
request.query_params = {}
|
||||
request.headers = {}
|
||||
|
||||
llm_router = MagicMock()
|
||||
llm_router.get_deployment_credentials_with_provider.return_value = {
|
||||
"api_key": "sk-team-openai",
|
||||
"api_base": "https://api.openai.com/v1",
|
||||
"custom_llm_provider": "openai",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
}
|
||||
|
||||
data = {"vector_store_id": "vs_123"}
|
||||
user_api_key_dict = UserAPIKeyAuth(team_models=["team-openai"])
|
||||
|
||||
result = await _update_request_data_with_model_routing_hint(
|
||||
data=data,
|
||||
request=request,
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
assert result["api_key"] == "sk-team-openai"
|
||||
assert result["api_base"] == "https://api.openai.com/v1"
|
||||
assert result["model"] == "openai/gpt-4o-mini"
|
||||
assert "custom_llm_provider" not in result
|
||||
llm_router.get_deployment_credentials_with_provider.assert_called_once_with(
|
||||
model_id="team-openai"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vector_store_file_list_wildcard_model_hint_falls_back_to_team_deployment():
|
||||
request = MagicMock(spec=Request)
|
||||
request.query_params = {"model": "openai/*"}
|
||||
request.headers = {}
|
||||
|
||||
llm_router = MagicMock()
|
||||
llm_router.model_group_alias = {}
|
||||
llm_router.get_deployment_credentials_with_provider.side_effect = [
|
||||
None,
|
||||
None,
|
||||
{
|
||||
"api_key": "sk-team-openai",
|
||||
"api_base": "https://api.openai.com/v1",
|
||||
"custom_llm_provider": "openai",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
},
|
||||
]
|
||||
|
||||
data = {"vector_store_id": "vs_123", "model": "openai/*"}
|
||||
user_api_key_dict = UserAPIKeyAuth(team_models=["openai/*", "team-openai"])
|
||||
|
||||
result = await _update_request_data_with_model_routing_hint(
|
||||
data=data,
|
||||
request=request,
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
assert result["api_key"] == "sk-team-openai"
|
||||
assert result["api_base"] == "https://api.openai.com/v1"
|
||||
assert result["model"] == "openai/gpt-4o-mini"
|
||||
assert "custom_llm_provider" not in result
|
||||
assert llm_router.get_deployment_credentials_with_provider.call_count == 3
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vector_store_file_list_authorizes_wildcard_query_param_before_credentials():
|
||||
from litellm.proxy.auth.auth_checks import ProxyException
|
||||
|
||||
request = MagicMock(spec=Request)
|
||||
request.query_params = {"model": "openai/*"}
|
||||
request.headers = {}
|
||||
|
||||
llm_router = MagicMock()
|
||||
llm_router.model_group_alias = {}
|
||||
data = {"vector_store_id": "vs_123"}
|
||||
user_api_key_dict = UserAPIKeyAuth(
|
||||
models=["restricted-deployment"],
|
||||
team_models=["openai/*"],
|
||||
)
|
||||
|
||||
with pytest.raises(ProxyException):
|
||||
await _update_request_data_with_model_routing_hint(
|
||||
data=data,
|
||||
request=request,
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
llm_router.get_deployment_credentials_with_provider.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vector_store_file_list_uses_single_team_model_for_router_routing():
|
||||
request = MagicMock(spec=Request)
|
||||
request.query_params = {}
|
||||
request.headers = {}
|
||||
|
||||
llm_router = MagicMock()
|
||||
llm_router.get_model_access_groups.return_value = {}
|
||||
llm_router.get_deployment_credentials_with_provider.return_value = None
|
||||
|
||||
data = {"vector_store_id": "vs_123"}
|
||||
user_api_key_dict = UserAPIKeyAuth(
|
||||
team_id="team-123",
|
||||
team_models=["provider/*", "all-proxy-models"],
|
||||
)
|
||||
|
||||
result = await _update_request_data_with_model_routing_hint(
|
||||
data=data,
|
||||
request=request,
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
assert result["model"] == "provider/*"
|
||||
assert "api_key" not in result
|
||||
assert "api_base" not in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vector_store_file_list_authorizes_inferred_team_model():
|
||||
from litellm.proxy.auth.auth_checks import ProxyException
|
||||
|
||||
request = MagicMock(spec=Request)
|
||||
request.query_params = {}
|
||||
request.headers = {}
|
||||
|
||||
llm_router = MagicMock()
|
||||
llm_router.model_group_alias = {}
|
||||
llm_router.get_deployment_credentials_with_provider.return_value = {
|
||||
"api_key": "sk-team-openai",
|
||||
"api_base": "https://api.openai.com/v1",
|
||||
"custom_llm_provider": "openai",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
}
|
||||
|
||||
data = {"vector_store_id": "vs_123"}
|
||||
user_api_key_dict = UserAPIKeyAuth(
|
||||
models=["restricted-deployment"],
|
||||
team_models=["team-openai"],
|
||||
)
|
||||
|
||||
with pytest.raises(ProxyException):
|
||||
await _update_request_data_with_model_routing_hint(
|
||||
data=data,
|
||||
request=request,
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vector_store_file_list_does_not_guess_ambiguous_team_deployment():
|
||||
request = MagicMock(spec=Request)
|
||||
request.query_params = {}
|
||||
request.headers = {}
|
||||
|
||||
llm_router = MagicMock()
|
||||
llm_router.get_deployment_credentials_with_provider.side_effect = [
|
||||
{
|
||||
"api_key": "sk-team-openai-1",
|
||||
"custom_llm_provider": "openai",
|
||||
"model": "openai/gpt-4o-mini",
|
||||
},
|
||||
{
|
||||
"api_key": "sk-team-openai-2",
|
||||
"custom_llm_provider": "openai",
|
||||
"model": "openai/gpt-4.1-mini",
|
||||
},
|
||||
]
|
||||
|
||||
data = {"vector_store_id": "vs_123"}
|
||||
user_api_key_dict = UserAPIKeyAuth(team_models=["team-openai-1", "team-openai-2"])
|
||||
|
||||
result = await _update_request_data_with_model_routing_hint(
|
||||
data=data,
|
||||
request=request,
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
assert "api_key" not in result
|
||||
assert "api_base" not in result
|
||||
assert llm_router.get_deployment_credentials_with_provider.call_count == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vector_store_file_list_does_not_override_existing_credentials():
|
||||
request = MagicMock(spec=Request)
|
||||
request.query_params = {"model": "team-openai"}
|
||||
request.headers = {}
|
||||
|
||||
llm_router = MagicMock()
|
||||
data = {
|
||||
"vector_store_id": "vs_123",
|
||||
"api_key": "sk-explicit",
|
||||
"api_base": "https://example.com/v1",
|
||||
}
|
||||
|
||||
result = await _update_request_data_with_model_routing_hint(
|
||||
data=data,
|
||||
request=request,
|
||||
llm_router=llm_router,
|
||||
)
|
||||
|
||||
assert result["api_key"] == "sk-explicit"
|
||||
assert result["api_base"] == "https://example.com/v1"
|
||||
llm_router.get_deployment_credentials_with_provider.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vector_store_file_list_requires_explicit_openai_provider_for_team_fallback():
|
||||
request = MagicMock(spec=Request)
|
||||
request.query_params = {}
|
||||
request.headers = {}
|
||||
|
||||
llm_router = MagicMock()
|
||||
llm_router.get_deployment_credentials_with_provider.return_value = {
|
||||
"api_key": "sk-unknown-provider",
|
||||
"api_base": "https://example.com/v1",
|
||||
"model": "gpt-4o",
|
||||
}
|
||||
|
||||
data = {"vector_store_id": "vs_123"}
|
||||
user_api_key_dict = UserAPIKeyAuth(team_models=["team-deployment"])
|
||||
|
||||
result = await _update_request_data_with_model_routing_hint(
|
||||
data=data,
|
||||
request=request,
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
assert "api_key" not in result
|
||||
assert "api_base" not in result
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vector_store_file_list_authorizes_model_query_param_before_credentials():
|
||||
from litellm.proxy.auth.auth_checks import ProxyException
|
||||
|
||||
request = MagicMock(spec=Request)
|
||||
request.query_params = {"model": "restricted-deployment"}
|
||||
request.headers = {}
|
||||
|
||||
llm_router = MagicMock()
|
||||
llm_router.model_group_alias = {}
|
||||
data = {"vector_store_id": "vs_123"}
|
||||
user_api_key_dict = UserAPIKeyAuth(
|
||||
models=["allowed-deployment"],
|
||||
team_models=["allowed-deployment"],
|
||||
)
|
||||
|
||||
with pytest.raises(ProxyException):
|
||||
await _update_request_data_with_model_routing_hint(
|
||||
data=data,
|
||||
request=request,
|
||||
llm_router=llm_router,
|
||||
user_api_key_dict=user_api_key_dict,
|
||||
)
|
||||
|
||||
llm_router.get_deployment_credentials_with_provider.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_request_data_with_litellm_managed_vector_store_registry():
|
||||
"""
|
||||
|
||||
@ -106,6 +106,75 @@ async def test_vector_store_file_create_forces_path_id_over_body_id():
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vector_store_file_list_resolves_managed_vector_store_before_team_fallback():
|
||||
import base64
|
||||
|
||||
from litellm.proxy.vector_store_files_endpoints.endpoints import (
|
||||
vector_store_file_list,
|
||||
)
|
||||
|
||||
captured_data = {}
|
||||
|
||||
async def fake_base_process(self, **kwargs):
|
||||
captured_data.update(self.data)
|
||||
return {"ok": True}
|
||||
|
||||
raw_vector_store_id = (
|
||||
"litellm_proxy:vector_store;"
|
||||
"unified_id,managed-vs;"
|
||||
"target_model_names,managed-deployment;"
|
||||
"provider_resource_id,vs_provider_native;"
|
||||
"model_id,managed-deployment"
|
||||
)
|
||||
vector_store_id = (
|
||||
base64.urlsafe_b64encode(raw_vector_store_id.encode()).decode().rstrip("=")
|
||||
)
|
||||
|
||||
request = _mock_request()
|
||||
request.method = "GET"
|
||||
request.query_params = {"limit": "10"}
|
||||
request.url.path = f"/v1/vector_stores/{vector_store_id}/files"
|
||||
|
||||
llm_router = MagicMock()
|
||||
|
||||
def get_credentials(model_id):
|
||||
return {
|
||||
"api_key": f"sk-{model_id}",
|
||||
"api_base": "https://api.openai.com/v1",
|
||||
"custom_llm_provider": "openai",
|
||||
"model": f"openai/{model_id}",
|
||||
}
|
||||
|
||||
llm_router.get_deployment_credentials_with_provider.side_effect = get_credentials
|
||||
|
||||
with (
|
||||
patch(
|
||||
"litellm.proxy.vector_store_files_endpoints.endpoints.assert_user_can_access_vector_store_id",
|
||||
new=AsyncMock(return_value=None),
|
||||
),
|
||||
patch("litellm.proxy.proxy_server.llm_router", llm_router),
|
||||
patch(
|
||||
"litellm.proxy.vector_store_files_endpoints.endpoints.ProxyBaseLLMRequestProcessing.base_process_llm_request",
|
||||
new=fake_base_process,
|
||||
),
|
||||
):
|
||||
response = await vector_store_file_list(
|
||||
vector_store_id=vector_store_id,
|
||||
request=request,
|
||||
fastapi_response=Response(),
|
||||
user_api_key_dict=UserAPIKeyAuth(team_models=["team-openai"]),
|
||||
)
|
||||
|
||||
assert response == {"ok": True}
|
||||
assert captured_data["vector_store_id"] == "vs_provider_native"
|
||||
assert captured_data["api_key"] == "sk-managed-deployment"
|
||||
assert captured_data["model"] == "openai/managed-deployment"
|
||||
llm_router.get_deployment_credentials_with_provider.assert_called_once_with(
|
||||
model_id="managed-deployment"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_vector_store_file_create_denies_other_team_path_store():
|
||||
from litellm.proxy.vector_store_files_endpoints.endpoints import (
|
||||
|
||||
Loading…
Reference in New Issue
Block a user