diff --git a/enterprise/enterprise_hooks/__init__.py b/enterprise/enterprise_hooks/__init__.py index a24e885e52..e93c8c9150 100644 --- a/enterprise/enterprise_hooks/__init__.py +++ b/enterprise/enterprise_hooks/__init__.py @@ -1,9 +1,6 @@ from typing import Dict, Literal, Type, Union from litellm_enterprise.proxy.hooks.managed_files import _PROXY_LiteLLMManagedFiles -from litellm_enterprise.proxy.hooks.managed_vector_store_files import ( - _PROXY_LiteLLMManagedVectorStoreFiles, -) from litellm_enterprise.proxy.hooks.managed_vector_stores import ( _PROXY_LiteLLMManagedVectorStores, ) @@ -13,7 +10,6 @@ from litellm.integrations.custom_logger import CustomLogger ENTERPRISE_PROXY_HOOKS: Dict[str, Type[CustomLogger]] = { "managed_files": _PROXY_LiteLLMManagedFiles, "managed_vector_stores": _PROXY_LiteLLMManagedVectorStores, - "managed_vector_store_files": _PROXY_LiteLLMManagedVectorStoreFiles, } @@ -22,7 +18,6 @@ def get_enterprise_proxy_hook( Literal[ "managed_files", "managed_vector_stores", - "managed_vector_store_files", "max_parallel_requests", ], str, diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index 87ff4a66e0..fe60f958f0 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -3945,6 +3945,18 @@ class LiteLLM_ManagedObjectTable(LiteLLMPydanticObjectBase): file_object: Union[LiteLLMBatch, LiteLLMFineTuningJob, ResponsesAPIResponse] +class LiteLLM_ManagedVectorStoreTable(LiteLLMPydanticObjectBase): + """Table for managing vector stores with target_model_names support.""" + unified_resource_id: str + resource_object: Optional[Any] = None # VectorStoreCreateResponse + model_mappings: Dict[str, str] + flat_model_resource_ids: List[str] + created_by: Optional[str] + updated_by: Optional[str] + storage_backend: Optional[str] = None + storage_url: Optional[str] = None + + class EnterpriseLicenseData(TypedDict, total=False): expiration_date: str user_id: str diff --git a/litellm/proxy/vector_store_files_endpoints/endpoints.py b/litellm/proxy/vector_store_files_endpoints/endpoints.py index 6ac5cd5772..e12184c6ee 100644 --- a/litellm/proxy/vector_store_files_endpoints/endpoints.py +++ b/litellm/proxy/vector_store_files_endpoints/endpoints.py @@ -1,9 +1,9 @@ -from typing import Dict, Optional +from typing import TYPE_CHECKING, Dict, Optional -import litellm from fastapi import APIRouter, Depends, Request, Response from fastapi.responses import ORJSONResponse +import litellm from litellm.proxy._types import UserAPIKeyAuth from litellm.proxy.auth.user_api_key_auth import user_api_key_auth from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing @@ -17,13 +17,88 @@ from litellm.proxy.vector_store_endpoints.utils import ( ) from litellm.types.utils import LlmProviders +if TYPE_CHECKING: + from litellm.router import Router + router = APIRouter() def _update_request_data_with_litellm_managed_vector_store_registry( data: Dict, vector_store_id: str, + llm_router: Optional["Router"] = None, ) -> Dict: + """ + Update request data with model routing information from managed vector store. + + This function handles two types of vector stores: + 1. Legacy vector stores from registry (non-managed) + 2. Managed vector stores with unified IDs (requires decoding) + + For managed vector stores, this function: + - Decodes the unified vector store ID + - Extracts the model_id and provider resource ID + - Sets data["model"] so the router can use the correct deployment credentials + - Replaces the unified ID with the provider-specific ID + + Args: + data: Request data to update + vector_store_id: Vector store ID (can be unified or legacy) + llm_router: LiteLLM router for credential lookup (required for managed vector stores) + + Returns: + Updated request data with model routing information + """ + from litellm import verbose_logger + from litellm.llms.base_llm.managed_resources.utils import ( + is_base64_encoded_unified_id, + parse_unified_id, + ) + + # Check if this is a managed vector store ID (base64 encoded unified ID) + decoded_id = is_base64_encoded_unified_id(vector_store_id) + + if decoded_id: + # This is a managed vector store - decode and extract routing information + verbose_logger.debug( + f"Processing managed vector store ID: {vector_store_id}" + ) + + parsed_id = parse_unified_id(vector_store_id) + + if parsed_id: + model_id = parsed_id.get("model_id") + provider_resource_id = parsed_id.get("provider_resource_id") + target_model_names = parsed_id.get("target_model_names", []) + + verbose_logger.debug( + f"Decoded vector store - model_id: {model_id}, provider_resource_id: {provider_resource_id}, target_model_names: {target_model_names}" + ) + + # Set the model for routing - this tells the router which deployment to use + # The router will automatically get the credentials from the deployment + routing_model = None + if model_id: + routing_model = model_id + elif target_model_names and len(target_model_names) > 0: + routing_model = target_model_names[0] + + if routing_model: + data["model"] = routing_model + verbose_logger.info( + f"Routing vector store files operation to model: {routing_model}" + ) + + # Replace unified vector store ID with provider resource ID + if provider_resource_id: + data["vector_store_id"] = provider_resource_id + verbose_logger.debug( + f"Replaced unified vector store ID with provider resource ID: {provider_resource_id}" + ) + + return data + + # Legacy path: Check vector store registry for non-managed vector stores if litellm.vector_store_registry is not None: vector_store_to_run = ( litellm.vector_store_registry.get_litellm_managed_vector_store_from_registry( @@ -42,6 +117,7 @@ def _update_request_data_with_litellm_managed_vector_store_registry( if "litellm_params" in vector_store_to_run: litellm_params = vector_store_to_run.get("litellm_params", {}) or {} data.update(litellm_params) + return data @@ -129,7 +205,7 @@ async def vector_store_file_create( data["vector_store_id"] = vector_store_id data = _update_request_data_with_litellm_managed_vector_store_registry( - data=data, vector_store_id=vector_store_id + data=data, vector_store_id=vector_store_id, llm_router=llm_router ) provider_enum = await _resolve_provider(data=data, request=request) @@ -209,7 +285,7 @@ async def vector_store_file_list( data.update(query_params) data = _update_request_data_with_litellm_managed_vector_store_registry( - data=data, vector_store_id=vector_store_id + data=data, vector_store_id=vector_store_id, llm_router=llm_router ) provider_enum = await _resolve_provider(data=data, request=request) @@ -291,7 +367,7 @@ async def vector_store_file_retrieve( } data = _update_request_data_with_litellm_managed_vector_store_registry( - data=data, vector_store_id=vector_store_id + data=data, vector_store_id=vector_store_id, llm_router=llm_router ) provider_enum = await _resolve_provider(data=data, request=request) @@ -373,7 +449,7 @@ async def vector_store_file_content( } data = _update_request_data_with_litellm_managed_vector_store_registry( - data=data, vector_store_id=vector_store_id + data=data, vector_store_id=vector_store_id, llm_router=llm_router ) provider_enum = await _resolve_provider(data=data, request=request) @@ -455,7 +531,7 @@ async def vector_store_file_update( data["file_id"] = file_id data = _update_request_data_with_litellm_managed_vector_store_registry( - data=data, vector_store_id=vector_store_id + data=data, vector_store_id=vector_store_id, llm_router=llm_router ) provider_enum = await _resolve_provider(data=data, request=request) @@ -537,7 +613,7 @@ async def vector_store_file_delete( } data = _update_request_data_with_litellm_managed_vector_store_registry( - data=data, vector_store_id=vector_store_id + data=data, vector_store_id=vector_store_id, llm_router=llm_router ) provider_enum = await _resolve_provider(data=data, request=request)