From fdade8a84ed2d5f06d18a558a2404aa84c1aa0e3 Mon Sep 17 00:00:00 2001 From: Shivam Rawat Date: Sat, 6 Jun 2026 12:36:05 -0700 Subject: [PATCH] Title: fix(proxy): resolve vector store file list credentials from team deployments (#29739) * fix(proxy): resolve vector store file list credentials from team deployments GET /v1/vector_stores/{id}/files now uses the same router credential routing as POST, including JWT team model hints and wildcard model selectors, so list requests no longer call OpenAI with Bearer None. Co-authored-by: Cursor * fix(proxy): authorize model hints and fix credential routing for vector store file list Resolves three review findings on the vector store file list path. Authorize user-controlled model hints (?model= query param and the x-litellm-model header) against the key's and team's allowed models via can_key_call_model / _can_object_call_model before any deployment credentials are resolved, closing a model access bypass where a normal key could file-list using a restricted deployment's provider credentials. Run the managed vector store registry resolution before the model routing hint so the managed store sets the routing model first; the hint resolver then selects credentials matching that model instead of a team fallback deployment, avoiding a credential/model mismatch across deployments. Skip team-fallback deployments whose provider cannot be determined instead of treating them as OpenAI, so a deployment without an explicit custom_llm_provider or "openai/" prefix no longer has its credentials injected. * fix(proxy): enforce vector store file model auth Ensure vector store file listing routes authorize explicit and inferred model routing before resolving deployment credentials. Co-authored-by: Cursor * fix(proxy): type guard vector store model hints Keep vector store model hint authorization typed to string-only values so static checks pass. Co-authored-by: Cursor --------- Co-authored-by: Cursor --- .../vector_store_files_endpoints/endpoints.py | 150 +++++++++ .../test_vector_store_endpoints.py | 306 ++++++++++++++++++ .../test_vector_store_tenant_guard.py | 69 ++++ 3 files changed, 525 insertions(+) diff --git a/litellm/proxy/vector_store_files_endpoints/endpoints.py b/litellm/proxy/vector_store_files_endpoints/endpoints.py index 346a847c5d..f6ceae3977 100644 --- a/litellm/proxy/vector_store_files_endpoints/endpoints.py +++ b/litellm/proxy/vector_store_files_endpoints/endpoints.py @@ -5,6 +5,7 @@ from fastapi.responses import ORJSONResponse import litellm from litellm.proxy._types import UserAPIKeyAuth +from litellm.proxy.auth.auth_checks import _can_object_call_model, can_key_call_model from litellm.proxy.auth.user_api_key_auth import user_api_key_auth from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing from litellm.proxy.common_utils.openai_endpoint_utils import ( @@ -191,6 +192,148 @@ def _replace_file_id_in_response(response, original_file_id: str): return response +async def _authorize_model_routing_hint( + *, + model: str, + llm_router: Optional["Router"], + user_api_key_dict: Optional[UserAPIKeyAuth], +) -> None: + if user_api_key_dict is None: + return + + key_models = getattr(user_api_key_dict, "models", None) + if not (isinstance(key_models, list) and "all-team-models" in key_models): + await can_key_call_model( + model=model, + llm_model_list=None, + valid_token=user_api_key_dict, + llm_router=llm_router, + ) + + team_models = getattr(user_api_key_dict, "team_models", None) + if isinstance(team_models, list) and len(team_models) > 0: + _can_object_call_model( + model=model, + llm_router=llm_router, + models=team_models, + team_model_aliases=user_api_key_dict.team_model_aliases, + team_id=user_api_key_dict.team_id, + object_type="team", + ) + + +async def _update_request_data_with_model_routing_hint( + data: Dict, + request: Request, + llm_router: Optional["Router"] = None, + user_api_key_dict: Optional[UserAPIKeyAuth] = None, +) -> Dict: + if data.get("api_key") is not None or data.get("api_base") is not None: + return data + + user_controlled_model_hint = request.query_params.get( + "model" + ) or request.headers.get("x-litellm-model") + model_hint = data.get("model") or user_controlled_model_hint + should_authorize_model_hint = ( + isinstance(model_hint, str) and model_hint == user_controlled_model_hint + ) + + should_route = False + credentials = None + if isinstance(model_hint, str) and "*" in model_hint: + if llm_router is not None: + if should_authorize_model_hint: + await _authorize_model_routing_hint( + model=model_hint, + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + credentials = llm_router.get_deployment_credentials_with_provider( + model_id=model_hint + ) + should_route = credentials is not None + else: + if isinstance(model_hint, str) and should_authorize_model_hint: + await _authorize_model_routing_hint( + model=model_hint, + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + ( + should_route, + _model_used, + _original_file_id, + credentials, + ) = handle_model_based_routing( + file_id="", + request=request, + llm_router=llm_router, + data=data, + check_file_id_encoding=False, + ) + + if should_route and credentials is not None: + prepare_data_with_credentials( + data=data, + credentials=credentials, + ) + return data + + if llm_router is None or user_api_key_dict is None: + return data + + team_models = getattr(user_api_key_dict, "team_models", None) or [] + if not isinstance(team_models, list): + return data + + model_names_to_check = [] + for model_name in team_models: + if not isinstance(model_name, str) or model_name in { + "all-team-models", + "all-proxy-models", + "no-default-models", + }: + continue + model_names_to_check.append(model_name) + + openai_credentials = None + for model_name in model_names_to_check: + credentials = llm_router.get_deployment_credentials_with_provider( + model_id=model_name + ) + if credentials is None: + continue + + provider = credentials.get("custom_llm_provider") + model = credentials.get("model") + if provider is None and isinstance(model, str) and "/" in model: + provider = model.split("/", 1)[0] + if provider != LlmProviders.OPENAI.value: + continue + + await _authorize_model_routing_hint( + model=model_name, + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + if openai_credentials is not None: + return data + openai_credentials = credentials + + if openai_credentials is not None: + prepare_data_with_credentials(data=data, credentials=openai_credentials) + elif len(model_names_to_check) == 1: + await _authorize_model_routing_hint( + model=model_names_to_check[0], + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + data["model"] = model_names_to_check[0] + + return data + + def _update_request_data_with_litellm_managed_vector_store_registry( data: Dict, vector_store_id: str, @@ -488,6 +631,13 @@ async def vector_store_file_list( should_lookup_registry=False, ) + data = await _update_request_data_with_model_routing_hint( + data=data, + request=request, + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + provider_enum = await _resolve_provider(data=data, request=request) _maybe_check_permissions( diff --git a/tests/test_litellm/proxy/vector_store_endpoints/test_vector_store_endpoints.py b/tests/test_litellm/proxy/vector_store_endpoints/test_vector_store_endpoints.py index 882081b33c..1434dd6b1b 100644 --- a/tests/test_litellm/proxy/vector_store_endpoints/test_vector_store_endpoints.py +++ b/tests/test_litellm/proxy/vector_store_endpoints/test_vector_store_endpoints.py @@ -21,6 +21,9 @@ from litellm.proxy.vector_store_endpoints.endpoints import ( _update_request_data_with_litellm_managed_vector_store_registry, index_create, ) +from litellm.proxy.vector_store_files_endpoints.endpoints import ( + _update_request_data_with_model_routing_hint, +) from litellm.proxy.vector_store_endpoints.management_endpoints import ( _check_vector_store_access, _resolve_embedding_config, @@ -144,6 +147,309 @@ def test_router_vector_store_file_delete_passes_correct_args(): assert call_kwargs["custom_llm_provider"] == "openai" +@pytest.mark.asyncio +async def test_vector_store_file_list_resolves_credentials_from_model_query_param(): + request = MagicMock(spec=Request) + request.query_params = {"model": "team-openai"} + request.headers = {} + + llm_router = MagicMock() + llm_router.get_deployment_credentials_with_provider.return_value = { + "api_key": "sk-team-openai", + "api_base": "https://api.openai.com/v1", + "custom_llm_provider": "openai", + "model": "openai/gpt-4o-mini", + } + + data = { + "vector_store_id": "vs_123", + "limit": "20", + } + + result = await _update_request_data_with_model_routing_hint( + data=data, + request=request, + llm_router=llm_router, + ) + + assert result["api_key"] == "sk-team-openai" + assert result["api_base"] == "https://api.openai.com/v1" + assert result["model"] == "openai/gpt-4o-mini" + assert "custom_llm_provider" not in result + llm_router.get_deployment_credentials_with_provider.assert_called_once_with( + model_id="team-openai" + ) + + +@pytest.mark.asyncio +async def test_vector_store_file_list_resolves_single_openai_team_deployment(): + request = MagicMock(spec=Request) + request.query_params = {} + request.headers = {} + + llm_router = MagicMock() + llm_router.get_deployment_credentials_with_provider.return_value = { + "api_key": "sk-team-openai", + "api_base": "https://api.openai.com/v1", + "custom_llm_provider": "openai", + "model": "openai/gpt-4o-mini", + } + + data = {"vector_store_id": "vs_123"} + user_api_key_dict = UserAPIKeyAuth(team_models=["team-openai"]) + + result = await _update_request_data_with_model_routing_hint( + data=data, + request=request, + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + + assert result["api_key"] == "sk-team-openai" + assert result["api_base"] == "https://api.openai.com/v1" + assert result["model"] == "openai/gpt-4o-mini" + assert "custom_llm_provider" not in result + llm_router.get_deployment_credentials_with_provider.assert_called_once_with( + model_id="team-openai" + ) + + +@pytest.mark.asyncio +async def test_vector_store_file_list_wildcard_model_hint_falls_back_to_team_deployment(): + request = MagicMock(spec=Request) + request.query_params = {"model": "openai/*"} + request.headers = {} + + llm_router = MagicMock() + llm_router.model_group_alias = {} + llm_router.get_deployment_credentials_with_provider.side_effect = [ + None, + None, + { + "api_key": "sk-team-openai", + "api_base": "https://api.openai.com/v1", + "custom_llm_provider": "openai", + "model": "openai/gpt-4o-mini", + }, + ] + + data = {"vector_store_id": "vs_123", "model": "openai/*"} + user_api_key_dict = UserAPIKeyAuth(team_models=["openai/*", "team-openai"]) + + result = await _update_request_data_with_model_routing_hint( + data=data, + request=request, + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + + assert result["api_key"] == "sk-team-openai" + assert result["api_base"] == "https://api.openai.com/v1" + assert result["model"] == "openai/gpt-4o-mini" + assert "custom_llm_provider" not in result + assert llm_router.get_deployment_credentials_with_provider.call_count == 3 + + +@pytest.mark.asyncio +async def test_vector_store_file_list_authorizes_wildcard_query_param_before_credentials(): + from litellm.proxy.auth.auth_checks import ProxyException + + request = MagicMock(spec=Request) + request.query_params = {"model": "openai/*"} + request.headers = {} + + llm_router = MagicMock() + llm_router.model_group_alias = {} + data = {"vector_store_id": "vs_123"} + user_api_key_dict = UserAPIKeyAuth( + models=["restricted-deployment"], + team_models=["openai/*"], + ) + + with pytest.raises(ProxyException): + await _update_request_data_with_model_routing_hint( + data=data, + request=request, + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + + llm_router.get_deployment_credentials_with_provider.assert_not_called() + + +@pytest.mark.asyncio +async def test_vector_store_file_list_uses_single_team_model_for_router_routing(): + request = MagicMock(spec=Request) + request.query_params = {} + request.headers = {} + + llm_router = MagicMock() + llm_router.get_model_access_groups.return_value = {} + llm_router.get_deployment_credentials_with_provider.return_value = None + + data = {"vector_store_id": "vs_123"} + user_api_key_dict = UserAPIKeyAuth( + team_id="team-123", + team_models=["provider/*", "all-proxy-models"], + ) + + result = await _update_request_data_with_model_routing_hint( + data=data, + request=request, + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + + assert result["model"] == "provider/*" + assert "api_key" not in result + assert "api_base" not in result + + +@pytest.mark.asyncio +async def test_vector_store_file_list_authorizes_inferred_team_model(): + from litellm.proxy.auth.auth_checks import ProxyException + + request = MagicMock(spec=Request) + request.query_params = {} + request.headers = {} + + llm_router = MagicMock() + llm_router.model_group_alias = {} + llm_router.get_deployment_credentials_with_provider.return_value = { + "api_key": "sk-team-openai", + "api_base": "https://api.openai.com/v1", + "custom_llm_provider": "openai", + "model": "openai/gpt-4o-mini", + } + + data = {"vector_store_id": "vs_123"} + user_api_key_dict = UserAPIKeyAuth( + models=["restricted-deployment"], + team_models=["team-openai"], + ) + + with pytest.raises(ProxyException): + await _update_request_data_with_model_routing_hint( + data=data, + request=request, + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + + +@pytest.mark.asyncio +async def test_vector_store_file_list_does_not_guess_ambiguous_team_deployment(): + request = MagicMock(spec=Request) + request.query_params = {} + request.headers = {} + + llm_router = MagicMock() + llm_router.get_deployment_credentials_with_provider.side_effect = [ + { + "api_key": "sk-team-openai-1", + "custom_llm_provider": "openai", + "model": "openai/gpt-4o-mini", + }, + { + "api_key": "sk-team-openai-2", + "custom_llm_provider": "openai", + "model": "openai/gpt-4.1-mini", + }, + ] + + data = {"vector_store_id": "vs_123"} + user_api_key_dict = UserAPIKeyAuth(team_models=["team-openai-1", "team-openai-2"]) + + result = await _update_request_data_with_model_routing_hint( + data=data, + request=request, + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + + assert "api_key" not in result + assert "api_base" not in result + assert llm_router.get_deployment_credentials_with_provider.call_count == 2 + + +@pytest.mark.asyncio +async def test_vector_store_file_list_does_not_override_existing_credentials(): + request = MagicMock(spec=Request) + request.query_params = {"model": "team-openai"} + request.headers = {} + + llm_router = MagicMock() + data = { + "vector_store_id": "vs_123", + "api_key": "sk-explicit", + "api_base": "https://example.com/v1", + } + + result = await _update_request_data_with_model_routing_hint( + data=data, + request=request, + llm_router=llm_router, + ) + + assert result["api_key"] == "sk-explicit" + assert result["api_base"] == "https://example.com/v1" + llm_router.get_deployment_credentials_with_provider.assert_not_called() + + +@pytest.mark.asyncio +async def test_vector_store_file_list_requires_explicit_openai_provider_for_team_fallback(): + request = MagicMock(spec=Request) + request.query_params = {} + request.headers = {} + + llm_router = MagicMock() + llm_router.get_deployment_credentials_with_provider.return_value = { + "api_key": "sk-unknown-provider", + "api_base": "https://example.com/v1", + "model": "gpt-4o", + } + + data = {"vector_store_id": "vs_123"} + user_api_key_dict = UserAPIKeyAuth(team_models=["team-deployment"]) + + result = await _update_request_data_with_model_routing_hint( + data=data, + request=request, + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + + assert "api_key" not in result + assert "api_base" not in result + + +@pytest.mark.asyncio +async def test_vector_store_file_list_authorizes_model_query_param_before_credentials(): + from litellm.proxy.auth.auth_checks import ProxyException + + request = MagicMock(spec=Request) + request.query_params = {"model": "restricted-deployment"} + request.headers = {} + + llm_router = MagicMock() + llm_router.model_group_alias = {} + data = {"vector_store_id": "vs_123"} + user_api_key_dict = UserAPIKeyAuth( + models=["allowed-deployment"], + team_models=["allowed-deployment"], + ) + + with pytest.raises(ProxyException): + await _update_request_data_with_model_routing_hint( + data=data, + request=request, + llm_router=llm_router, + user_api_key_dict=user_api_key_dict, + ) + + llm_router.get_deployment_credentials_with_provider.assert_not_called() + + @pytest.mark.asyncio async def test_update_request_data_with_litellm_managed_vector_store_registry(): """ diff --git a/tests/test_litellm/proxy/vector_store_endpoints/test_vector_store_tenant_guard.py b/tests/test_litellm/proxy/vector_store_endpoints/test_vector_store_tenant_guard.py index 48262afd36..b1bd7ccbf0 100644 --- a/tests/test_litellm/proxy/vector_store_endpoints/test_vector_store_tenant_guard.py +++ b/tests/test_litellm/proxy/vector_store_endpoints/test_vector_store_tenant_guard.py @@ -106,6 +106,75 @@ async def test_vector_store_file_create_forces_path_id_over_body_id(): ) +@pytest.mark.asyncio +async def test_vector_store_file_list_resolves_managed_vector_store_before_team_fallback(): + import base64 + + from litellm.proxy.vector_store_files_endpoints.endpoints import ( + vector_store_file_list, + ) + + captured_data = {} + + async def fake_base_process(self, **kwargs): + captured_data.update(self.data) + return {"ok": True} + + raw_vector_store_id = ( + "litellm_proxy:vector_store;" + "unified_id,managed-vs;" + "target_model_names,managed-deployment;" + "provider_resource_id,vs_provider_native;" + "model_id,managed-deployment" + ) + vector_store_id = ( + base64.urlsafe_b64encode(raw_vector_store_id.encode()).decode().rstrip("=") + ) + + request = _mock_request() + request.method = "GET" + request.query_params = {"limit": "10"} + request.url.path = f"/v1/vector_stores/{vector_store_id}/files" + + llm_router = MagicMock() + + def get_credentials(model_id): + return { + "api_key": f"sk-{model_id}", + "api_base": "https://api.openai.com/v1", + "custom_llm_provider": "openai", + "model": f"openai/{model_id}", + } + + llm_router.get_deployment_credentials_with_provider.side_effect = get_credentials + + with ( + patch( + "litellm.proxy.vector_store_files_endpoints.endpoints.assert_user_can_access_vector_store_id", + new=AsyncMock(return_value=None), + ), + patch("litellm.proxy.proxy_server.llm_router", llm_router), + patch( + "litellm.proxy.vector_store_files_endpoints.endpoints.ProxyBaseLLMRequestProcessing.base_process_llm_request", + new=fake_base_process, + ), + ): + response = await vector_store_file_list( + vector_store_id=vector_store_id, + request=request, + fastapi_response=Response(), + user_api_key_dict=UserAPIKeyAuth(team_models=["team-openai"]), + ) + + assert response == {"ok": True} + assert captured_data["vector_store_id"] == "vs_provider_native" + assert captured_data["api_key"] == "sk-managed-deployment" + assert captured_data["model"] == "openai/managed-deployment" + llm_router.get_deployment_credentials_with_provider.assert_called_once_with( + model_id="managed-deployment" + ) + + @pytest.mark.asyncio async def test_vector_store_file_create_denies_other_team_path_store(): from litellm.proxy.vector_store_files_endpoints.endpoints import (