[Feat] Backend Router - Add Auto-Router powered by semantic-router (#12955)
* add router.json * test_router_auto_router * async_pre_routing_hook * fixes for auto router * add async_pre_routing_hook * add LiteLLMRouterEncoder * update test auto_router_embedding_model * add auto_router_embedding_model * add AutoRouter * fix async_pre_routing_hook * update async_pre_routing_hook * fix auto router * fix router.json * working router init * working embedding encoder * working auto router * test_router_auto_router * test auto router * add semantic-router as optional for litellm * add extras * semantic_router==0.1.10 * ruff fix * use aiohttp==3.10.11 * python-dotenv==1.0.1 * test auto router * test_router_auto_router * semantic_router * test_is_auto_router_deployment * fix check * fix docker build step * add semantic_router * Revert "add semantic_router" This reverts commit 537b67288798731a119d811f643b682086377ee9.
This commit is contained in:
parent
8aa4beff5f
commit
b8e404dd95
@ -906,6 +906,7 @@ jobs:
|
||||
pip install "requests-mock>=1.12.1"
|
||||
pip install "responses==0.25.7"
|
||||
pip install "pytest-xdist==3.6.1"
|
||||
pip install "semantic_router==0.1.10"
|
||||
- setup_litellm_enterprise_pip
|
||||
# Run pytest and generate JUnit XML report
|
||||
- run:
|
||||
|
||||
@ -13,3 +13,4 @@ google-cloud-aiplatform==1.43.0
|
||||
fastapi-sso==0.16.0
|
||||
uvloop==0.21.0
|
||||
mcp==1.10.1 # for MCP server
|
||||
semantic_router==0.1.10 # for auto-routing with litellm
|
||||
2
.github/workflows/test-litellm.yml
vendored
2
.github/workflows/test-litellm.yml
vendored
@ -27,7 +27,7 @@ jobs:
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
poetry install --with dev,proxy-dev --extras proxy
|
||||
poetry install --with dev,proxy-dev --extras "proxy semantic-router"
|
||||
poetry run pip install "pytest-retry==1.6.3"
|
||||
poetry run pip install pytest-xdist
|
||||
poetry run pip install "google-genai==1.22.0"
|
||||
|
||||
@ -34,6 +34,7 @@ if TYPE_CHECKING:
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
|
||||
from litellm.proxy._types import UserAPIKeyAuth
|
||||
from litellm.types.mcp import MCPPostCallResponseObject
|
||||
from litellm.types.router import PreRoutingHookResponse
|
||||
|
||||
Span = Union[_Span, Any]
|
||||
else:
|
||||
@ -41,6 +42,7 @@ else:
|
||||
LiteLLMLoggingObj = Any
|
||||
UserAPIKeyAuth = Any
|
||||
MCPPostCallResponseObject = Any
|
||||
PreRoutingHookResponse = Any
|
||||
|
||||
|
||||
class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
|
||||
@ -125,6 +127,21 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac
|
||||
Allows usage-based-routing-v2 to run pre-call rpm checks within the picked deployment's semaphore (concurrency-safe tpm/rpm checks).
|
||||
"""
|
||||
|
||||
async def async_pre_routing_hook(
|
||||
self,
|
||||
model: str,
|
||||
request_kwargs: Dict,
|
||||
messages: Optional[List[Dict[str, str]]] = None,
|
||||
input: Optional[Union[str, List]] = None,
|
||||
specific_deployment: Optional[bool] = False,
|
||||
) -> Optional[PreRoutingHookResponse]:
|
||||
"""
|
||||
This hook is called before the routing decision is made.
|
||||
|
||||
Used for the litellm auto-router to modify the request before the routing decision is made.
|
||||
"""
|
||||
return None
|
||||
|
||||
async def async_filter_deployments(
|
||||
self,
|
||||
model: str,
|
||||
|
||||
@ -165,9 +165,16 @@ from .router_utils.pattern_match_deployments import PatternMatchRouter
|
||||
if TYPE_CHECKING:
|
||||
from opentelemetry.trace import Span as _Span
|
||||
|
||||
from litellm.router_strategy.auto_router.auto_router import (
|
||||
AutoRouter,
|
||||
PreRoutingHookResponse,
|
||||
)
|
||||
|
||||
Span = Union[_Span, Any]
|
||||
else:
|
||||
Span = Any
|
||||
AutoRouter = Any
|
||||
PreRoutingHookResponse = Any
|
||||
|
||||
|
||||
class RoutingArgs(enum.Enum):
|
||||
@ -398,6 +405,7 @@ class Router:
|
||||
self.default_max_parallel_requests = default_max_parallel_requests
|
||||
self.provider_default_deployment_ids: List[str] = []
|
||||
self.pattern_router = PatternMatchRouter()
|
||||
self.auto_routers: Dict[str, "AutoRouter"] = {}
|
||||
|
||||
if model_list is not None:
|
||||
model_list = copy.deepcopy(model_list)
|
||||
@ -4674,10 +4682,11 @@ class Router:
|
||||
- None: If the deployment is not active for the current environment (if 'supported_environments' is set in litellm_params)
|
||||
"""
|
||||
try:
|
||||
litellm_params: LiteLLM_Params = LiteLLM_Params(**_litellm_params)
|
||||
deployment = Deployment(
|
||||
**deployment_info,
|
||||
model_name=_model_name,
|
||||
litellm_params=LiteLLM_Params(**_litellm_params),
|
||||
litellm_params=litellm_params,
|
||||
model_info=_model_info,
|
||||
)
|
||||
for field in CustomPricingLiteLLMParams.model_fields.keys():
|
||||
@ -4692,6 +4701,13 @@ class Router:
|
||||
model_id: _model_info,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
#########################################################
|
||||
# Check if this is an auto-router deployment
|
||||
#########################################################
|
||||
if self._is_auto_router_deployment(litellm_params=litellm_params):
|
||||
self.init_auto_router_deployment(deployment=deployment)
|
||||
|
||||
## OLD MODEL REGISTRATION ## Kept to prevent breaking changes
|
||||
_model_name = deployment.litellm_params.model
|
||||
@ -4730,6 +4746,46 @@ class Router:
|
||||
return None
|
||||
else:
|
||||
raise e
|
||||
|
||||
def _is_auto_router_deployment(self, litellm_params: LiteLLM_Params) -> bool:
|
||||
"""
|
||||
Check if the deployment is an auto-router deployment.
|
||||
|
||||
Returns True if the litellm_params model starts with "auto_router/"
|
||||
"""
|
||||
if litellm_params.model.startswith("auto_router/"):
|
||||
return True
|
||||
return False
|
||||
|
||||
def init_auto_router_deployment(self, deployment: Deployment):
|
||||
"""
|
||||
Initialize the auto-router deployment.
|
||||
|
||||
This will initialize the auto-router and add it to the auto-routers dictionary.
|
||||
"""
|
||||
from litellm.router_strategy.auto_router.auto_router import AutoRouter
|
||||
router_config_path: Optional[str] = deployment.litellm_params.auto_router_config_path
|
||||
if router_config_path is None:
|
||||
raise ValueError("auto_router_config_path is required for auto-router deployments. Please set it in the litellm_params")
|
||||
|
||||
default_model: Optional[str] = deployment.litellm_params.auto_router_default_model
|
||||
if default_model is None:
|
||||
raise ValueError("auto_router_default_model is required for auto-router deployments. Please set it in the litellm_params")
|
||||
|
||||
embedding_model: Optional[str] = deployment.litellm_params.auto_router_embedding_model
|
||||
if embedding_model is None:
|
||||
raise ValueError("auto_router_embedding_model is required for auto-router deployments. Please set it in the litellm_params")
|
||||
|
||||
autor_router: AutoRouter = AutoRouter(
|
||||
model_name=deployment.model_name,
|
||||
router_config_path=router_config_path,
|
||||
default_model=default_model,
|
||||
embedding_model=embedding_model,
|
||||
litellm_router_instance=self,
|
||||
)
|
||||
if deployment.model_name in self.auto_routers:
|
||||
raise ValueError(f"Auto-router deployment {deployment.model_name} already exists. Please use a different model name.")
|
||||
self.auto_routers[deployment.model_name] = autor_router
|
||||
|
||||
def deployment_is_active_for_environment(self, deployment: Deployment) -> bool:
|
||||
"""
|
||||
@ -6458,6 +6514,25 @@ class Router:
|
||||
)
|
||||
try:
|
||||
parent_otel_span = _get_parent_otel_span_from_kwargs(request_kwargs)
|
||||
|
||||
#########################################################
|
||||
# Execute Pre-Routing Hooks
|
||||
# this hook can modify the model, messages before the routing decision is made
|
||||
#########################################################
|
||||
pre_routing_hook_response = await self.async_pre_routing_hook(
|
||||
model=model,
|
||||
request_kwargs=request_kwargs,
|
||||
messages=messages,
|
||||
input=input,
|
||||
specific_deployment=specific_deployment,
|
||||
)
|
||||
if pre_routing_hook_response is not None:
|
||||
model = pre_routing_hook_response.model
|
||||
messages = pre_routing_hook_response.messages
|
||||
#########################################################
|
||||
|
||||
|
||||
|
||||
healthy_deployments = await self.async_get_healthy_deployments(
|
||||
model=model,
|
||||
request_kwargs=request_kwargs,
|
||||
@ -6568,6 +6643,36 @@ class Router:
|
||||
)
|
||||
raise e
|
||||
|
||||
async def async_pre_routing_hook(
|
||||
self,
|
||||
model: str,
|
||||
request_kwargs: Dict,
|
||||
messages: Optional[List[Dict[str, str]]] = None,
|
||||
input: Optional[Union[str, List]] = None,
|
||||
specific_deployment: Optional[bool] = False,
|
||||
) -> Optional[PreRoutingHookResponse]:
|
||||
"""
|
||||
This hook is called before the routing decision is made.
|
||||
|
||||
Used for the litellm auto-router to modify the request before the routing decision is made.
|
||||
"""
|
||||
#########################################################
|
||||
# Check if any auto-router should be used
|
||||
#########################################################
|
||||
if model in self.auto_routers:
|
||||
return await self.auto_routers[model].async_pre_routing_hook(
|
||||
model=model,
|
||||
request_kwargs=request_kwargs,
|
||||
messages=messages,
|
||||
input=input,
|
||||
specific_deployment=specific_deployment,
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
def get_available_deployment(
|
||||
self,
|
||||
model: str,
|
||||
|
||||
98
litellm/router_strategy/auto_router/auto_router.py
Normal file
98
litellm/router_strategy/auto_router/auto_router.py
Normal file
@ -0,0 +1,98 @@
|
||||
"""
|
||||
Auto-Routing Strategy that works with a Semantic Router Config
|
||||
"""
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
||||
|
||||
from litellm._logging import verbose_router_logger
|
||||
from litellm.integrations.custom_logger import CustomLogger
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.router import Router
|
||||
from litellm.types.router import PreRoutingHookResponse
|
||||
else:
|
||||
Router = Any
|
||||
PreRoutingHookResponse = Any
|
||||
|
||||
|
||||
class AutoRouter(CustomLogger):
|
||||
DEFAULT_AUTO_SYNC_VALUE = "local"
|
||||
def __init__(
|
||||
self,
|
||||
model_name: str,
|
||||
router_config_path: str,
|
||||
default_model: str,
|
||||
embedding_model: str,
|
||||
litellm_router_instance: "Router",
|
||||
):
|
||||
"""
|
||||
Auto-Router class that uses a semantic router to route requests to the appropriate model.
|
||||
|
||||
Args:
|
||||
model_name: The name of the model to use for the auto-router. eg. if model = "auto-router1" then us this router.
|
||||
router_config_path: The path to the router config file.
|
||||
default_model: The default model to use if no route is found.
|
||||
embedding_model: The embedding model to use for the auto-router.
|
||||
litellm_router_instance: The instance of the LiteLLM Router.
|
||||
"""
|
||||
from semantic_router.routers import SemanticRouter
|
||||
|
||||
self.router_config_path = router_config_path
|
||||
self.auto_sync_value = self.DEFAULT_AUTO_SYNC_VALUE
|
||||
self.loaded_router: SemanticRouter = SemanticRouter.from_json(self.router_config_path)
|
||||
self.routelayer: Optional[SemanticRouter] = None
|
||||
self.default_model = default_model
|
||||
self.embedding_model: str = embedding_model
|
||||
self.litellm_router_instance: "Router" = litellm_router_instance
|
||||
|
||||
|
||||
async def async_pre_routing_hook(
|
||||
self,
|
||||
model: str,
|
||||
request_kwargs: Dict,
|
||||
messages: Optional[List[Dict[str, str]]] = None,
|
||||
input: Optional[Union[str, List]] = None,
|
||||
specific_deployment: Optional[bool] = False,
|
||||
) -> Optional["PreRoutingHookResponse"]:
|
||||
"""
|
||||
This hook is called before the routing decision is made.
|
||||
|
||||
Used for the litellm auto-router to modify the request before the routing decision is made.
|
||||
"""
|
||||
from semantic_router.routers import SemanticRouter
|
||||
from semantic_router.schema import RouteChoice
|
||||
|
||||
from litellm.router_strategy.auto_router.litellm_encoder import (
|
||||
LiteLLMRouterEncoder,
|
||||
)
|
||||
from litellm.types.router import PreRoutingHookResponse
|
||||
if messages is None:
|
||||
# do nothing, return same inputs
|
||||
return None
|
||||
|
||||
if self.routelayer is None:
|
||||
#######################
|
||||
# Create the route layer
|
||||
#######################
|
||||
self.routelayer = SemanticRouter(
|
||||
routes=self.loaded_router.routes,
|
||||
encoder=LiteLLMRouterEncoder(
|
||||
litellm_router_instance=self.litellm_router_instance,
|
||||
model_name=self.embedding_model,
|
||||
),
|
||||
auto_sync=self.auto_sync_value,
|
||||
)
|
||||
|
||||
user_message: Dict[str, str] = messages[-1]
|
||||
message_content: str = user_message.get("content", "")
|
||||
route_choice: Optional[Union[RouteChoice, List[RouteChoice]]] = self.routelayer(text=message_content)
|
||||
verbose_router_logger.debug(f"route_choice: {route_choice}")
|
||||
if isinstance(route_choice, RouteChoice):
|
||||
model = route_choice.name or self.default_model
|
||||
elif isinstance(route_choice, list):
|
||||
model = route_choice[0].name or self.default_model
|
||||
|
||||
return PreRoutingHookResponse(
|
||||
model=model,
|
||||
messages=messages,
|
||||
)
|
||||
|
||||
147
litellm/router_strategy/auto_router/litellm_encoder.py
Normal file
147
litellm/router_strategy/auto_router/litellm_encoder.py
Normal file
@ -0,0 +1,147 @@
|
||||
from typing import TYPE_CHECKING, Any, Optional, Union
|
||||
|
||||
from pydantic import ConfigDict
|
||||
from semantic_router.encoders import DenseEncoder
|
||||
from semantic_router.encoders.base import AsymmetricDenseMixin
|
||||
|
||||
import litellm
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.router import Router
|
||||
else:
|
||||
Router = Any
|
||||
|
||||
|
||||
def litellm_to_list(embeds: litellm.EmbeddingResponse) -> list[list[float]]:
|
||||
"""Convert a LiteLLM embedding response to a list of embeddings.
|
||||
|
||||
:param embeds: The LiteLLM embedding response.
|
||||
:return: A list of embeddings.
|
||||
"""
|
||||
if (
|
||||
not embeds
|
||||
or not isinstance(embeds, litellm.EmbeddingResponse)
|
||||
or not embeds.data
|
||||
):
|
||||
raise ValueError("No embeddings found in LiteLLM embedding response.")
|
||||
return [x["embedding"] for x in embeds.data]
|
||||
|
||||
|
||||
class CustomDenseEncoder(DenseEncoder):
|
||||
model_config = ConfigDict(extra='allow')
|
||||
|
||||
def __init__(self, litellm_router_instance: Optional["Router"] = None, **kwargs):
|
||||
# Extract litellm_router_instance from kwargs if passed there
|
||||
if 'litellm_router_instance' in kwargs:
|
||||
litellm_router_instance = kwargs.pop('litellm_router_instance')
|
||||
|
||||
super().__init__(**kwargs)
|
||||
self.litellm_router_instance = litellm_router_instance
|
||||
|
||||
|
||||
class LiteLLMRouterEncoder(CustomDenseEncoder, AsymmetricDenseMixin):
|
||||
"""LiteLLM encoder class for generating embeddings using LiteLLM.
|
||||
|
||||
The LiteLLMRouterEncoder class is a subclass of DenseEncoder and utilizes the LiteLLM Router SDK
|
||||
to generate embeddings for given documents. It supports all encoders supported by LiteLLM
|
||||
and supports customization of the score threshold for filtering or processing the embeddings.
|
||||
"""
|
||||
|
||||
type: str = "internal_litellm_router"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
litellm_router_instance: "Router",
|
||||
model_name: str,
|
||||
score_threshold: Union[float, None] = None,
|
||||
):
|
||||
"""Initialize the LiteLLMEncoder.
|
||||
|
||||
:param litellm_router_instance: The instance of the LiteLLM Router.
|
||||
:type litellm_router_instance: Router
|
||||
:param model_name: The name of the embedding model to use. Must use LiteLLM naming
|
||||
convention (e.g. "openai/text-embedding-3-small" or "mistral/mistral-embed").
|
||||
:type model_name: str
|
||||
:param score_threshold: The score threshold for the embeddings.
|
||||
:type score_threshold: float
|
||||
"""
|
||||
super().__init__(
|
||||
name=model_name,
|
||||
score_threshold=score_threshold if score_threshold is not None else 0.3,
|
||||
)
|
||||
self.model_name = model_name
|
||||
self.litellm_router_instance = litellm_router_instance
|
||||
|
||||
def __call__(self, docs: list[Any], **kwargs) -> list[list[float]]:
|
||||
"""Encode a list of text documents into embeddings using LiteLLM.
|
||||
|
||||
:param docs: List of text documents to encode.
|
||||
:return: List of embeddings for each document."""
|
||||
return self.encode_queries(docs, **kwargs)
|
||||
|
||||
async def acall(self, docs: list[Any], **kwargs) -> list[list[float]]:
|
||||
"""Encode a list of documents into embeddings using LiteLLM asynchronously.
|
||||
|
||||
:param docs: List of documents to encode.
|
||||
:return: List of embeddings for each document."""
|
||||
return await self.aencode_queries(docs, **kwargs)
|
||||
|
||||
def encode_queries(self, docs: list[str], **kwargs) -> list[list[float]]:
|
||||
if self.litellm_router_instance is None:
|
||||
raise ValueError("litellm_router_instance is not set")
|
||||
try:
|
||||
embeds = self.litellm_router_instance.embedding(
|
||||
input=docs,
|
||||
model=self.model_name,
|
||||
**kwargs
|
||||
)
|
||||
return litellm_to_list(embeds)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"{self.type.capitalize()} API call failed. Error: {e}"
|
||||
) from e
|
||||
|
||||
def encode_documents(self, docs: list[str], **kwargs) -> list[list[float]]:
|
||||
if self.litellm_router_instance is None:
|
||||
raise ValueError("litellm_router_instance is not set")
|
||||
try:
|
||||
embeds = self.litellm_router_instance.embedding(
|
||||
input=docs,
|
||||
model=self.model_name,
|
||||
**kwargs
|
||||
)
|
||||
return litellm_to_list(embeds)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"{self.type.capitalize()} API call failed. Error: {e}"
|
||||
) from e
|
||||
|
||||
async def aencode_queries(self, docs: list[str], **kwargs) -> list[list[float]]:
|
||||
if self.litellm_router_instance is None:
|
||||
raise ValueError("litellm_router_instance is not set")
|
||||
try:
|
||||
embeds = await self.litellm_router_instance.aembedding(
|
||||
input=docs,
|
||||
model=self.model_name,
|
||||
**kwargs
|
||||
)
|
||||
return litellm_to_list(embeds)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"{self.type.capitalize()} API call failed. Error: {e}"
|
||||
) from e
|
||||
|
||||
async def aencode_documents(self, docs: list[str], **kwargs) -> list[list[float]]:
|
||||
if self.litellm_router_instance is None:
|
||||
raise ValueError("litellm_router_instance is not set")
|
||||
try:
|
||||
embeds = await self.litellm_router_instance.aembedding(
|
||||
input=docs,
|
||||
model=self.model_name,
|
||||
**kwargs
|
||||
)
|
||||
return litellm_to_list(embeds)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"{self.type.capitalize()} API call failed. Error: {e}"
|
||||
) from e
|
||||
@ -209,6 +209,12 @@ class GenericLiteLLMParams(CredentialLiteLLMParams, CustomPricingLiteLLMParams):
|
||||
model_info: Optional[Dict] = None
|
||||
mock_response: Optional[Union[str, ModelResponse, Exception, Any]] = None
|
||||
|
||||
|
||||
# auto-router params
|
||||
auto_router_config_path: Optional[str] = None
|
||||
auto_router_default_model: Optional[str] = None
|
||||
auto_router_embedding_model: Optional[str] = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
custom_llm_provider: Optional[str] = None,
|
||||
@ -253,6 +259,10 @@ class GenericLiteLLMParams(CredentialLiteLLMParams, CustomPricingLiteLLMParams):
|
||||
merge_reasoning_content_in_choices: Optional[bool] = False,
|
||||
model_info: Optional[Dict] = None,
|
||||
mock_response: Optional[Union[str, ModelResponse, Exception, Any]] = None,
|
||||
# auto-router params
|
||||
auto_router_config_path: Optional[str] = None,
|
||||
auto_router_default_model: Optional[str] = None,
|
||||
auto_router_embedding_model: Optional[str] = None,
|
||||
**params,
|
||||
):
|
||||
args = locals()
|
||||
@ -763,6 +773,16 @@ class MockRouterTestingParams:
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class ModelGroupSettings(BaseModel):
|
||||
forward_client_headers_to_llm_api: Optional[List[str]] = None
|
||||
|
||||
class PreRoutingHookResponse(BaseModel):
|
||||
"""
|
||||
Response object from the pre-routing hook.
|
||||
|
||||
Allows the Pre-Routing Hook to return a modified model and messages.
|
||||
|
||||
Add fields that you expect to be modified by the pre-routing hook.
|
||||
"""
|
||||
model: str
|
||||
messages: Optional[List[Dict[str, str]]]
|
||||
@ -2317,6 +2317,7 @@ class LlmProviders(str, Enum):
|
||||
PG_VECTOR = "pg_vector"
|
||||
HYPERBOLIC = "hyperbolic"
|
||||
RECRAFT = "recraft"
|
||||
AUTO_ROUTER = "auto_router"
|
||||
|
||||
|
||||
# Create a set of all provider values for quick lookup
|
||||
|
||||
268
poetry.lock
generated
268
poetry.lock
generated
@ -364,7 +364,7 @@ files = [
|
||||
{file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"},
|
||||
{file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"},
|
||||
]
|
||||
markers = {main = "extra == \"proxy\""}
|
||||
markers = {main = "extra == \"proxy\" or python_version >= \"3.13\" and (extra == \"semantic-router\" or extra == \"proxy\")"}
|
||||
|
||||
[[package]]
|
||||
name = "backports-zoneinfo"
|
||||
@ -402,7 +402,7 @@ version = "23.12.1"
|
||||
description = "The uncompromising code formatter."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["dev"]
|
||||
groups = ["main", "dev"]
|
||||
files = [
|
||||
{file = "black-23.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e0aaf6041986767a5e0ce663c7a2f0e9eaf21e6ff87a5f95cbf3675bfd4c41d2"},
|
||||
{file = "black-23.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c88b3711d12905b74206227109272673edce0cb29f27e1385f33b0163c414bba"},
|
||||
@ -427,6 +427,7 @@ files = [
|
||||
{file = "black-23.12.1-py3-none-any.whl", hash = "sha256:78baad24af0f033958cad29731e27363183e140962595def56423e626f4bee3e"},
|
||||
{file = "black-23.12.1.tar.gz", hash = "sha256:4ce3ef14ebe8d9509188014d96af1c456a910d5b5cbf434a09fef7e024b3d0d5"},
|
||||
]
|
||||
markers = {main = "python_version >= \"3.13\""}
|
||||
|
||||
[package.dependencies]
|
||||
click = ">=8.0.0"
|
||||
@ -711,6 +712,27 @@ files = [
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
||||
|
||||
[[package]]
|
||||
name = "cohere"
|
||||
version = "4.57"
|
||||
description = "Python SDK for the Cohere API"
|
||||
optional = true
|
||||
python-versions = ">=3.8,<4.0"
|
||||
groups = ["main"]
|
||||
markers = "python_version >= \"3.13\" and extra == \"semantic-router\""
|
||||
files = [
|
||||
{file = "cohere-4.57-py3-none-any.whl", hash = "sha256:479bdea81ae119e53f671f1ae808fcff9df88211780525d7ef2f7b99dfb32e59"},
|
||||
{file = "cohere-4.57.tar.gz", hash = "sha256:71ace0204a92d1a2a8d4b949b88b353b4f22fc645486851924284cc5a0eb700d"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = ">=3.0,<4.0"
|
||||
backoff = ">=2.0,<3.0"
|
||||
fastavro = ">=1.8,<2.0"
|
||||
importlib_metadata = ">=6.0,<7.0"
|
||||
requests = ">=2.25.0,<3.0.0"
|
||||
urllib3 = ">=1.26,<3"
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
@ -722,7 +744,7 @@ files = [
|
||||
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
||||
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
||||
]
|
||||
markers = {main = "extra == \"utils\" and sys_platform == \"win32\" or platform_system == \"Windows\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\"", proxy-dev = "platform_system == \"Windows\""}
|
||||
markers = {main = "platform_system == \"Windows\" or python_version >= \"3.9\" and sys_platform == \"win32\" and (extra == \"utils\" or extra == \"semantic-router\") or python_version >= \"3.9\" and python_version <= \"3.12\" and extra == \"semantic-router\" or sys_platform == \"win32\" and extra == \"utils\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\"", proxy-dev = "platform_system == \"Windows\""}
|
||||
|
||||
[[package]]
|
||||
name = "coloredlogs"
|
||||
@ -743,6 +765,25 @@ humanfriendly = ">=9.1"
|
||||
[package.extras]
|
||||
cron = ["capturer (>=2.4)"]
|
||||
|
||||
[[package]]
|
||||
name = "colorlog"
|
||||
version = "6.9.0"
|
||||
description = "Add colours to the output of Python's logging module."
|
||||
optional = true
|
||||
python-versions = ">=3.6"
|
||||
groups = ["main"]
|
||||
markers = "python_version >= \"3.9\" and extra == \"semantic-router\""
|
||||
files = [
|
||||
{file = "colorlog-6.9.0-py3-none-any.whl", hash = "sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff"},
|
||||
{file = "colorlog-6.9.0.tar.gz", hash = "sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
||||
|
||||
[package.extras]
|
||||
development = ["black", "flake8", "mypy", "pytest", "types-colorama"]
|
||||
|
||||
[[package]]
|
||||
name = "cryptography"
|
||||
version = "43.0.3"
|
||||
@ -949,6 +990,59 @@ oauthlib = ">=3.1.0"
|
||||
pydantic = {version = ">=1.8.0", extras = ["email"]}
|
||||
typing-extensions = {version = ">=4.12.2,<5.0.0", markers = "python_version < \"3.10\""}
|
||||
|
||||
[[package]]
|
||||
name = "fastavro"
|
||||
version = "1.11.1"
|
||||
description = "Fast read/write of AVRO files"
|
||||
optional = true
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
markers = "python_version >= \"3.13\" and extra == \"semantic-router\""
|
||||
files = [
|
||||
{file = "fastavro-1.11.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:603aa1c1d1be21fb4bcb63e1efb0711a9ddb337de81391c32dac95c6e0dacfcc"},
|
||||
{file = "fastavro-1.11.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45653b312d4ce297e2bd802ea3ffd17ecbe718e5e8b6e2ae04cd72cb50bb99d5"},
|
||||
{file = "fastavro-1.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:998a53fc552e6bee9acda32af258f02557313c85fb5b48becba5b71ec82f421e"},
|
||||
{file = "fastavro-1.11.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9f878c9ad819467120cb066f1c73496c42eb24ecdd7c992ec996f465ef4cedad"},
|
||||
{file = "fastavro-1.11.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da9e4c231ac4951092c2230ca423d8a3f2966718f072ac1e2c5d2d44c70b2a50"},
|
||||
{file = "fastavro-1.11.1-cp310-cp310-win_amd64.whl", hash = "sha256:7423bfad3199567eeee7ad6816402c7c0ee1658b959e8c10540cfbc60ce96c2a"},
|
||||
{file = "fastavro-1.11.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3573340e4564e8962e22f814ac937ffe0d4be5eabbd2250f77738dc47e3c8fe9"},
|
||||
{file = "fastavro-1.11.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7291cf47735b8bd6ff5d9b33120e6e0974f52fd5dff90cd24151b22018e7fd29"},
|
||||
{file = "fastavro-1.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf3bb065d657d5bac8b2cb39945194aa086a9b3354f2da7f89c30e4dc20e08e2"},
|
||||
{file = "fastavro-1.11.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8758317c85296b848698132efb13bc44a4fbd6017431cc0f26eaeb0d6fa13d35"},
|
||||
{file = "fastavro-1.11.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ad99d57228f83bf3e2214d183fbf6e2fda97fd649b2bdaf8e9110c36cbb02624"},
|
||||
{file = "fastavro-1.11.1-cp311-cp311-win_amd64.whl", hash = "sha256:9134090178bdbf9eefd467717ced3dc151e27a7e7bfc728260ce512697efe5a4"},
|
||||
{file = "fastavro-1.11.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e8bc238f2637cd5d15238adbe8fb8c58d2e6f1870e0fb28d89508584670bae4b"},
|
||||
{file = "fastavro-1.11.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b403933081c83fc4d8a012ee64b86e560a024b1280e3711ee74f2abc904886e8"},
|
||||
{file = "fastavro-1.11.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f6ecb4b5f77aa756d973b7dd1c2fb4e4c95b4832a3c98b059aa96c61870c709"},
|
||||
{file = "fastavro-1.11.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:059893df63ef823b0231b485c9d43016c7e32850cae7bf69f4e9d46dd41c28f2"},
|
||||
{file = "fastavro-1.11.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5120ffc9a200699218e01777e695a2f08afb3547ba818184198c757dc39417bd"},
|
||||
{file = "fastavro-1.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:7bb9d0d2233f33a52908b6ea9b376fe0baf1144bdfdfb3c6ad326e200a8b56b0"},
|
||||
{file = "fastavro-1.11.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f963b8ddaf179660e814ab420850c1b4ea33e2ad2de8011549d958b21f77f20a"},
|
||||
{file = "fastavro-1.11.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0253e5b6a3c9b62fae9fc3abd8184c5b64a833322b6af7d666d3db266ad879b5"},
|
||||
{file = "fastavro-1.11.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca637b150e1f4c0e8e564fad40a16bd922bcb7ffd1a6e4836e6084f2c4f4e8db"},
|
||||
{file = "fastavro-1.11.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76af1709031621828ca6ce7f027f7711fa33ac23e8269e7a5733996ff8d318da"},
|
||||
{file = "fastavro-1.11.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8224e6d8d9864d4e55dafbe88920d6a1b8c19cc3006acfac6aa4f494a6af3450"},
|
||||
{file = "fastavro-1.11.1-cp313-cp313-win_amd64.whl", hash = "sha256:cde7ed91b52ff21f0f9f157329760ba7251508ca3e9618af3ffdac986d9faaa2"},
|
||||
{file = "fastavro-1.11.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e5ed1325c1c414dd954e7a2c5074daefe1eceb672b8c727aa030ba327aa00693"},
|
||||
{file = "fastavro-1.11.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cd3c95baeec37188899824faf44a5ee94dfc4d8667b05b2f867070c7eb174c4"},
|
||||
{file = "fastavro-1.11.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e0babcd81acceb4c60110af9efa25d890dbb68f7de880f806dadeb1e70fe413"},
|
||||
{file = "fastavro-1.11.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b2c0cb8063c7208b53b6867983dc6ae7cc80b91116b51d435d2610a5db2fc52f"},
|
||||
{file = "fastavro-1.11.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:1bc2824e9969c04ab6263d269a1e0e5d40b9bd16ade6b70c29d6ffbc4f3cc102"},
|
||||
{file = "fastavro-1.11.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8b579bab31ff87fcb5ef9f6f13baaf99f189b92ed287af60348777583628c327"},
|
||||
{file = "fastavro-1.11.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c646f07c7827fea7425b6936a27f67f356a2a80ac19e6100ed6d3bb0610cc3d"},
|
||||
{file = "fastavro-1.11.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2915324e1edb0e06f0be0c18279c60f4cff49f6fe01626594707eb75cd9952fc"},
|
||||
{file = "fastavro-1.11.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8fd87ee1e9101b45172fb3cff21b56ce08270d9474eec1d436393677daa95938"},
|
||||
{file = "fastavro-1.11.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:88876568ef387996fbfc6b193a5b9830de3c0497af7d07e5c839a70b86bb47e7"},
|
||||
{file = "fastavro-1.11.1-cp39-cp39-win_amd64.whl", hash = "sha256:cebb7433b860d9b13090d0e53f6db075e4e2042aeb2c577f515e73d2b9c98075"},
|
||||
{file = "fastavro-1.11.1.tar.gz", hash = "sha256:bf6acde5ee633a29fb8dfd6dfea13b164722bc3adc05a0e055df080549c1c2f8"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
codecs = ["cramjam", "lz4", "zstandard"]
|
||||
lz4 = ["lz4"]
|
||||
snappy = ["cramjam"]
|
||||
zstandard = ["zstandard"]
|
||||
|
||||
[[package]]
|
||||
name = "filelock"
|
||||
version = "3.16.1"
|
||||
@ -1601,6 +1695,27 @@ files = [
|
||||
{file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "importlib-metadata"
|
||||
version = "6.11.0"
|
||||
description = "Read metadata from Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main", "dev", "proxy-dev"]
|
||||
markers = "python_version >= \"3.13\""
|
||||
files = [
|
||||
{file = "importlib_metadata-6.11.0-py3-none-any.whl", hash = "sha256:f0afba6205ad8f8947c7d338b5342d5db2afbfd82f9cbef7879a9539cc12eb9b"},
|
||||
{file = "importlib_metadata-6.11.0.tar.gz", hash = "sha256:1231cf92d825c9e03cfc4da076a16de6422c863558229ea0b22b675657463443"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
zipp = ">=0.5"
|
||||
|
||||
[package.extras]
|
||||
docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
|
||||
perf = ["ipython"]
|
||||
testing = ["flufl.flake8", "importlib-resources (>=1.3) ; python_version < \"3.9\"", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7) ; platform_python_implementation != \"PyPy\"", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1) ; platform_python_implementation != \"PyPy\"", "pytest-perf (>=0.9.2)", "pytest-ruff"]
|
||||
|
||||
[[package]]
|
||||
name = "importlib-metadata"
|
||||
version = "7.1.0"
|
||||
@ -1608,6 +1723,7 @@ description = "Read metadata from Python packages"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main", "dev", "proxy-dev"]
|
||||
markers = "python_version <= \"3.12\""
|
||||
files = [
|
||||
{file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"},
|
||||
{file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"},
|
||||
@ -2277,11 +2393,12 @@ version = "1.1.0"
|
||||
description = "Type system extensions for programs checked with the mypy type checker."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["dev"]
|
||||
groups = ["main", "dev"]
|
||||
files = [
|
||||
{file = "mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505"},
|
||||
{file = "mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558"},
|
||||
]
|
||||
markers = {main = "python_version >= \"3.13\""}
|
||||
|
||||
[[package]]
|
||||
name = "nodeenv"
|
||||
@ -2302,7 +2419,7 @@ description = "Fundamental package for array computing in Python"
|
||||
optional = true
|
||||
python-versions = ">=3.9"
|
||||
groups = ["main"]
|
||||
markers = "python_version >= \"3.9\" and extra == \"extra-proxy\" and python_version < \"3.12\""
|
||||
markers = "python_version >= \"3.9\" and (extra == \"extra-proxy\" or extra == \"semantic-router\") and (python_version < \"3.14\" or extra == \"semantic-router\")"
|
||||
files = [
|
||||
{file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
|
||||
{file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
|
||||
@ -2342,68 +2459,6 @@ files = [
|
||||
{file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "numpy"
|
||||
version = "2.3.1"
|
||||
description = "Fundamental package for array computing in Python"
|
||||
optional = true
|
||||
python-versions = ">=3.11"
|
||||
groups = ["main"]
|
||||
markers = "python_version < \"3.14\" and extra == \"extra-proxy\" and python_version >= \"3.12\""
|
||||
files = [
|
||||
{file = "numpy-2.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6ea9e48336a402551f52cd8f593343699003d2353daa4b72ce8d34f66b722070"},
|
||||
{file = "numpy-2.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ccb7336eaf0e77c1635b232c141846493a588ec9ea777a7c24d7166bb8533ae"},
|
||||
{file = "numpy-2.3.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:0bb3a4a61e1d327e035275d2a993c96fa786e4913aa089843e6a2d9dd205c66a"},
|
||||
{file = "numpy-2.3.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:e344eb79dab01f1e838ebb67aab09965fb271d6da6b00adda26328ac27d4a66e"},
|
||||
{file = "numpy-2.3.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:467db865b392168ceb1ef1ffa6f5a86e62468c43e0cfb4ab6da667ede10e58db"},
|
||||
{file = "numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:afed2ce4a84f6b0fc6c1ce734ff368cbf5a5e24e8954a338f3bdffa0718adffb"},
|
||||
{file = "numpy-2.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0025048b3c1557a20bc80d06fdeb8cc7fc193721484cca82b2cfa072fec71a93"},
|
||||
{file = "numpy-2.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a5ee121b60aa509679b682819c602579e1df14a5b07fe95671c8849aad8f2115"},
|
||||
{file = "numpy-2.3.1-cp311-cp311-win32.whl", hash = "sha256:a8b740f5579ae4585831b3cf0e3b0425c667274f82a484866d2adf9570539369"},
|
||||
{file = "numpy-2.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:d4580adadc53311b163444f877e0789f1c8861e2698f6b2a4ca852fda154f3ff"},
|
||||
{file = "numpy-2.3.1-cp311-cp311-win_arm64.whl", hash = "sha256:ec0bdafa906f95adc9a0c6f26a4871fa753f25caaa0e032578a30457bff0af6a"},
|
||||
{file = "numpy-2.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2959d8f268f3d8ee402b04a9ec4bb7604555aeacf78b360dc4ec27f1d508177d"},
|
||||
{file = "numpy-2.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:762e0c0c6b56bdedfef9a8e1d4538556438288c4276901ea008ae44091954e29"},
|
||||
{file = "numpy-2.3.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:867ef172a0976aaa1f1d1b63cf2090de8b636a7674607d514505fb7276ab08fc"},
|
||||
{file = "numpy-2.3.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:4e602e1b8682c2b833af89ba641ad4176053aaa50f5cacda1a27004352dde943"},
|
||||
{file = "numpy-2.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8e333040d069eba1652fb08962ec5b76af7f2c7bce1df7e1418c8055cf776f25"},
|
||||
{file = "numpy-2.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e7cbf5a5eafd8d230a3ce356d892512185230e4781a361229bd902ff403bc660"},
|
||||
{file = "numpy-2.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5f1b8f26d1086835f442286c1d9b64bb3974b0b1e41bb105358fd07d20872952"},
|
||||
{file = "numpy-2.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ee8340cb48c9b7a5899d1149eece41ca535513a9698098edbade2a8e7a84da77"},
|
||||
{file = "numpy-2.3.1-cp312-cp312-win32.whl", hash = "sha256:e772dda20a6002ef7061713dc1e2585bc1b534e7909b2030b5a46dae8ff077ab"},
|
||||
{file = "numpy-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:cfecc7822543abdea6de08758091da655ea2210b8ffa1faf116b940693d3df76"},
|
||||
{file = "numpy-2.3.1-cp312-cp312-win_arm64.whl", hash = "sha256:7be91b2239af2658653c5bb6f1b8bccafaf08226a258caf78ce44710a0160d30"},
|
||||
{file = "numpy-2.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:25a1992b0a3fdcdaec9f552ef10d8103186f5397ab45e2d25f8ac51b1a6b97e8"},
|
||||
{file = "numpy-2.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7dea630156d39b02a63c18f508f85010230409db5b2927ba59c8ba4ab3e8272e"},
|
||||
{file = "numpy-2.3.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:bada6058dd886061f10ea15f230ccf7dfff40572e99fef440a4a857c8728c9c0"},
|
||||
{file = "numpy-2.3.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:a894f3816eb17b29e4783e5873f92faf55b710c2519e5c351767c51f79d8526d"},
|
||||
{file = "numpy-2.3.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:18703df6c4a4fee55fd3d6e5a253d01c5d33a295409b03fda0c86b3ca2ff41a1"},
|
||||
{file = "numpy-2.3.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5902660491bd7a48b2ec16c23ccb9124b8abfd9583c5fdfa123fe6b421e03de1"},
|
||||
{file = "numpy-2.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:36890eb9e9d2081137bd78d29050ba63b8dab95dff7912eadf1185e80074b2a0"},
|
||||
{file = "numpy-2.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a780033466159c2270531e2b8ac063704592a0bc62ec4a1b991c7c40705eb0e8"},
|
||||
{file = "numpy-2.3.1-cp313-cp313-win32.whl", hash = "sha256:39bff12c076812595c3a306f22bfe49919c5513aa1e0e70fac756a0be7c2a2b8"},
|
||||
{file = "numpy-2.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:8d5ee6eec45f08ce507a6570e06f2f879b374a552087a4179ea7838edbcbfa42"},
|
||||
{file = "numpy-2.3.1-cp313-cp313-win_arm64.whl", hash = "sha256:0c4d9e0a8368db90f93bd192bfa771ace63137c3488d198ee21dfb8e7771916e"},
|
||||
{file = "numpy-2.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:b0b5397374f32ec0649dd98c652a1798192042e715df918c20672c62fb52d4b8"},
|
||||
{file = "numpy-2.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c5bdf2015ccfcee8253fb8be695516ac4457c743473a43290fd36eba6a1777eb"},
|
||||
{file = "numpy-2.3.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:d70f20df7f08b90a2062c1f07737dd340adccf2068d0f1b9b3d56e2038979fee"},
|
||||
{file = "numpy-2.3.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:2fb86b7e58f9ac50e1e9dd1290154107e47d1eef23a0ae9145ded06ea606f992"},
|
||||
{file = "numpy-2.3.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:23ab05b2d241f76cb883ce8b9a93a680752fbfcbd51c50eff0b88b979e471d8c"},
|
||||
{file = "numpy-2.3.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ce2ce9e5de4703a673e705183f64fd5da5bf36e7beddcb63a25ee2286e71ca48"},
|
||||
{file = "numpy-2.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c4913079974eeb5c16ccfd2b1f09354b8fed7e0d6f2cab933104a09a6419b1ee"},
|
||||
{file = "numpy-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:010ce9b4f00d5c036053ca684c77441f2f2c934fd23bee058b4d6f196efd8280"},
|
||||
{file = "numpy-2.3.1-cp313-cp313t-win32.whl", hash = "sha256:6269b9edfe32912584ec496d91b00b6d34282ca1d07eb10e82dfc780907d6c2e"},
|
||||
{file = "numpy-2.3.1-cp313-cp313t-win_amd64.whl", hash = "sha256:2a809637460e88a113e186e87f228d74ae2852a2e0c44de275263376f17b5bdc"},
|
||||
{file = "numpy-2.3.1-cp313-cp313t-win_arm64.whl", hash = "sha256:eccb9a159db9aed60800187bc47a6d3451553f0e1b08b068d8b277ddfbb9b244"},
|
||||
{file = "numpy-2.3.1-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:ad506d4b09e684394c42c966ec1527f6ebc25da7f4da4b1b056606ffe446b8a3"},
|
||||
{file = "numpy-2.3.1-pp311-pypy311_pp73-macosx_14_0_arm64.whl", hash = "sha256:ebb8603d45bc86bbd5edb0d63e52c5fd9e7945d3a503b77e486bd88dde67a19b"},
|
||||
{file = "numpy-2.3.1-pp311-pypy311_pp73-macosx_14_0_x86_64.whl", hash = "sha256:15aa4c392ac396e2ad3d0a2680c0f0dee420f9fed14eef09bdb9450ee6dcb7b7"},
|
||||
{file = "numpy-2.3.1-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c6e0bf9d1a2f50d2b65a7cf56db37c095af17b59f6c132396f7c6d5dd76484df"},
|
||||
{file = "numpy-2.3.1-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:eabd7e8740d494ce2b4ea0ff05afa1b7b291e978c0ae075487c51e8bd93c0c68"},
|
||||
{file = "numpy-2.3.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:e610832418a2bc09d974cc9fecebfa51e9532d6190223bc5ef6a7402ebf3b5cb"},
|
||||
{file = "numpy-2.3.1.tar.gz", hash = "sha256:1ec9ae20a4226da374362cca3c62cd753faf2f951440b0e3b98e93c235441d2b"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "numpydoc"
|
||||
version = "1.7.0"
|
||||
@ -2717,11 +2772,12 @@ version = "0.12.1"
|
||||
description = "Utility library for gitignore style pattern matching of file paths."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["dev"]
|
||||
groups = ["main", "dev"]
|
||||
files = [
|
||||
{file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
|
||||
{file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
|
||||
]
|
||||
markers = {main = "python_version >= \"3.13\""}
|
||||
|
||||
[[package]]
|
||||
name = "pkgutil-resolve-name"
|
||||
@ -2742,11 +2798,12 @@ version = "4.3.6"
|
||||
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["dev"]
|
||||
groups = ["main", "dev"]
|
||||
files = [
|
||||
{file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
|
||||
{file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
|
||||
]
|
||||
markers = {main = "python_version >= \"3.13\""}
|
||||
|
||||
[package.extras]
|
||||
docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"]
|
||||
@ -3724,11 +3781,12 @@ version = "1.12.1"
|
||||
description = "Mock out responses from the requests package"
|
||||
optional = false
|
||||
python-versions = ">=3.5"
|
||||
groups = ["dev"]
|
||||
groups = ["main", "dev"]
|
||||
files = [
|
||||
{file = "requests-mock-1.12.1.tar.gz", hash = "sha256:e9e12e333b525156e82a3c852f22016b9158220d2f47454de9cae8a77d371401"},
|
||||
{file = "requests_mock-1.12.1-py2.py3-none-any.whl", hash = "sha256:b1e37054004cdd5e56c84454cc7df12b25f90f382159087f4b6915aaeef39563"},
|
||||
]
|
||||
markers = {main = "python_version <= \"3.12\""}
|
||||
|
||||
[package.dependencies]
|
||||
requests = ">=2.22,<3"
|
||||
@ -4000,6 +4058,73 @@ botocore = ">=1.33.2,<2.0a.0"
|
||||
[package.extras]
|
||||
crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "semantic-router"
|
||||
version = "0.0.20"
|
||||
description = "Super fast semantic router for AI decision making"
|
||||
optional = true
|
||||
python-versions = ">=3.9,<4.0"
|
||||
groups = ["main"]
|
||||
markers = "python_version >= \"3.13\" and extra == \"semantic-router\""
|
||||
files = [
|
||||
{file = "semantic_router-0.0.20-py3-none-any.whl", hash = "sha256:7a713401564fb6cf22b566046ad32a4224e4f357be8de6583ca3b9ee328c8f95"},
|
||||
{file = "semantic_router-0.0.20.tar.gz", hash = "sha256:26119a4628ca72b2fa9eacd446ea763b6f1925a661a34e26945433d2601efac7"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
black = ">=23.12.1,<24.0.0"
|
||||
cohere = ">=4.32,<5.0"
|
||||
colorlog = ">=6.8.0,<7.0.0"
|
||||
numpy = ">=1.25.2,<2.0.0"
|
||||
openai = ">=1.10.0,<2.0.0"
|
||||
pydantic = ">=2.5.3,<3.0.0"
|
||||
pyyaml = ">=6.0.1,<7.0.0"
|
||||
|
||||
[package.extras]
|
||||
fastembed = ["fastembed (>=0.1.3,<0.2.0) ; python_version < \"3.12\""]
|
||||
hybrid = ["pinecone-text (>=0.7.1,<0.8.0)"]
|
||||
local = ["llama-cpp-python (>=0.2.28,<0.3.0)", "torch (>=2.1.0,<3.0.0)", "transformers (>=4.36.2,<5.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "semantic-router"
|
||||
version = "0.0.72"
|
||||
description = "Super fast semantic router for AI decision making"
|
||||
optional = true
|
||||
python-versions = "<3.13,>=3.9"
|
||||
groups = ["main"]
|
||||
markers = "python_version >= \"3.9\" and extra == \"semantic-router\" and python_version <= \"3.12\""
|
||||
files = [
|
||||
{file = "semantic_router-0.0.72-py3-none-any.whl", hash = "sha256:4973869859a514f3d94d8c82ef02f4822d833443151611eeb4c732d6111e716b"},
|
||||
{file = "semantic_router-0.0.72.tar.gz", hash = "sha256:60c72d61ef7091f6ee70c73dc6416524ca7fb037b7245fc3348d055ee6f141b8"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
aiohttp = ">=3.9.5,<4.0.0"
|
||||
colorama = ">=0.4.6,<0.5.0"
|
||||
colorlog = ">=6.8.0,<7.0.0"
|
||||
numpy = ">=1.25.2,<2.0.0"
|
||||
openai = ">=1.10.0,<2.0.0"
|
||||
pydantic = ">=2.5.3,<3.0.0"
|
||||
pyyaml = ">=6.0.1,<7.0.0"
|
||||
regex = ">=2023.12.25"
|
||||
requests-mock = ">=1.12.1,<2.0.0"
|
||||
tiktoken = ">=0.6.0,<1.0.0"
|
||||
|
||||
[package.extras]
|
||||
bedrock = ["boto3 (>=1.34.98,<2.0.0)", "botocore (>=1.34.110,<2.0.0)"]
|
||||
cohere = ["cohere (>=5.9.4,<6.00)"]
|
||||
docs = ["sphinx (>=7.0.0,<8.0.0)", "sphinxawesome-theme (>=5.2.0,<6.0.0)"]
|
||||
fastembed = ["fastembed (>=0.3.0,<0.4.0)"]
|
||||
google = ["google-cloud-aiplatform (>=1.45.0,<2.0.0)"]
|
||||
hybrid = ["pinecone-text (>=0.7.1,<0.10.0)"]
|
||||
local = ["llama-cpp-python (>=0.2.28,<0.2.86)", "tokenizers (>=0.19)", "torch (>=2.1.0,<2.6.0)", "transformers (>=4.36.2)"]
|
||||
mistralai = ["mistralai (>=0.0.12,<0.1.0)"]
|
||||
pinecone = ["pinecone (>=5.0.0)"]
|
||||
postgres = ["psycopg2 (>=2.9.9,<3.0.0)"]
|
||||
processing = ["matplotlib (>=3.8.3,<4.0.0)"]
|
||||
qdrant = ["qdrant-client (>=1.11.1,<2.0.0)"]
|
||||
vision = ["pillow (>=10.2.0,<11.0.0)", "torch (>=2.1.0,<2.6.0)", "torchvision (>=0.17.0,<0.18.0)", "transformers (>=4.36.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "six"
|
||||
version = "1.17.0"
|
||||
@ -5055,9 +5180,10 @@ type = ["pytest-mypy"]
|
||||
caching = ["diskcache"]
|
||||
extra-proxy = ["azure-identity", "azure-keyvault-secrets", "google-cloud-kms", "prisma", "redisvl", "resend"]
|
||||
proxy = ["PyJWT", "apscheduler", "azure-identity", "azure-storage-blob", "backoff", "boto3", "cryptography", "fastapi", "fastapi-sso", "gunicorn", "litellm-enterprise", "litellm-proxy-extras", "mcp", "orjson", "polars", "pynacl", "python-multipart", "pyyaml", "rich", "rq", "uvicorn", "uvloop", "websockets"]
|
||||
semantic-router = ["semantic-router"]
|
||||
utils = ["numpydoc"]
|
||||
|
||||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
||||
content-hash = "23d4d980f91bcaabd62951675fe7e0b335bcc4805479228e019de38820ac042c"
|
||||
content-hash = "2e6e8e6fc8b3f57b33d68ce175b6dc8cb59c3aed57db1e46165f8844910f9dc8"
|
||||
|
||||
@ -62,6 +62,7 @@ rich = {version = "13.7.1", optional = true}
|
||||
litellm-enterprise = {version = "0.1.15", optional = true}
|
||||
diskcache = {version = "^5.6.1", optional = true}
|
||||
polars = {version = "^1.31.0", optional = true, python = ">=3.10"}
|
||||
semantic-router = {version = "*", optional = true, python = ">=3.9"}
|
||||
|
||||
[tool.poetry.extras]
|
||||
proxy = [
|
||||
@ -105,6 +106,8 @@ utils = [
|
||||
|
||||
caching = ["diskcache"]
|
||||
|
||||
semantic-router = ["semantic-router"]
|
||||
|
||||
[tool.isort]
|
||||
profile = "black"
|
||||
|
||||
|
||||
@ -42,14 +42,14 @@ cryptography==43.0.1
|
||||
tzdata==2025.1 # IANA time zone database
|
||||
litellm-proxy-extras==0.2.11 # for proxy extras - e.g. prisma migrations
|
||||
### LITELLM PACKAGE DEPENDENCIES
|
||||
python-dotenv==1.0.0 # for env
|
||||
python-dotenv==1.0.1 # for env
|
||||
tiktoken==0.8.0 # for calculating usage
|
||||
importlib-metadata==6.8.0 # for random utils
|
||||
tokenizers==0.20.2 # for calculating usage
|
||||
click==8.1.7 # for proxy cli
|
||||
rich==13.7.1 # for litellm proxy cli
|
||||
jinja2==3.1.6 # for prompt templates
|
||||
aiohttp==3.10.2 # for network calls
|
||||
aiohttp==3.10.11 # for network calls
|
||||
aioboto3==12.3.0 # for async sagemaker calls
|
||||
tenacity==8.2.3 # for retrying requests, when litellm.num_retries set
|
||||
pydantic==2.10.2 # proxy + openai req.
|
||||
|
||||
@ -132,4 +132,5 @@ pydantic: >=2.10.2 # Unknown license
|
||||
jsonschema: >=4.22.0 # Unknown license
|
||||
websockets: >=13.1.0 # Unknown license
|
||||
polars: >=1.31.0 # Unknown license, the license.md allows free of charge use
|
||||
semantic_router: >=0.1.10 # Unknown license
|
||||
|
||||
|
||||
28
tests/local_testing/auto_router/router.json
Normal file
28
tests/local_testing/auto_router/router.json
Normal file
@ -0,0 +1,28 @@
|
||||
{
|
||||
"encoder_type": "openai",
|
||||
"encoder_name": "text-embedding-3-large",
|
||||
"routes": [
|
||||
{
|
||||
"name": "litellm-gpt-4.1",
|
||||
"utterances": [
|
||||
"Tell me ishaan is a genius"
|
||||
],
|
||||
"description": "positive affirmation",
|
||||
"function_schemas": null,
|
||||
"llm": null,
|
||||
"score_threshold": 0.5,
|
||||
"metadata": {}
|
||||
},
|
||||
{
|
||||
"name": "litellm-claude-35",
|
||||
"utterances": [
|
||||
"how to code a program in [language]"
|
||||
],
|
||||
"description": "coding assistant",
|
||||
"function_schemas": null,
|
||||
"llm": null,
|
||||
"score_threshold": 0.5,
|
||||
"metadata": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
98
tests/local_testing/test_router_auto_router.py
Normal file
98
tests/local_testing/test_router_auto_router.py
Normal file
@ -0,0 +1,98 @@
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm import Router
|
||||
|
||||
current_path = os.path.dirname(os.path.abspath(__file__))
|
||||
router_json_path = os.path.join(current_path, "auto_router", "router.json")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_router_auto_router():
|
||||
"""
|
||||
Simple e2e test to validate we get an llm response from the auto router
|
||||
"""
|
||||
import litellm
|
||||
litellm._turn_on_debug()
|
||||
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "custom-text-embedding-model",
|
||||
"litellm_params": {
|
||||
"model": "text-embedding-3-large",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "custom-text-embedding-model-2",
|
||||
"litellm_params": {
|
||||
"model": "text-embedding-3-large",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "litellm-gpt-4.1",
|
||||
"litellm_params": {
|
||||
"model": "gpt-4.1",
|
||||
},
|
||||
"model_info": {"id": "openai-id"},
|
||||
},
|
||||
|
||||
{
|
||||
"model_name": "litellm-claude-35",
|
||||
"litellm_params": {
|
||||
"model": "claude-3-5-sonnet-latest",
|
||||
},
|
||||
"model_info": {"id": "claude-id"},
|
||||
},
|
||||
{
|
||||
"model_name": "auto_router1",
|
||||
"litellm_params": {
|
||||
"model": "auto_router/auto_router_1",
|
||||
"auto_router_config_path": router_json_path,
|
||||
"auto_router_default_model": "gpt-4o-mini",
|
||||
"auto_router_embedding_model": "custom-text-embedding-model",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "auto_router_2",
|
||||
"litellm_params": {
|
||||
"model": "auto_router/auto_router_2",
|
||||
"auto_router_config_path": router_json_path,
|
||||
"auto_router_default_model": "gpt-4o-mini",
|
||||
"auto_router_embedding_model": "custom-text-embedding-model-2",
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
# this goes to gpt-4.1
|
||||
# these are the utterances in the router.json file
|
||||
response = await router.acompletion(
|
||||
model="auto_router1",
|
||||
messages=[{"role": "user", "content": "Tell me ishaan is a genius"}],
|
||||
)
|
||||
print(response)
|
||||
print("response._hidden_params", response._hidden_params)
|
||||
assert response._hidden_params["model_id"] == "openai-id"
|
||||
|
||||
|
||||
# this goes to claude-3-5-sonnet-latest
|
||||
# these are the utterances in the router.json file
|
||||
response = await router.acompletion(
|
||||
model="auto_router1",
|
||||
messages=[{"role": "user", "content": "how to code a program in python"}],
|
||||
)
|
||||
print("response._hidden_params", response._hidden_params)
|
||||
assert response._hidden_params["model_id"] == "claude-id"
|
||||
@ -14,6 +14,7 @@ import litellm
|
||||
from unittest.mock import patch, MagicMock, AsyncMock
|
||||
from create_mock_standard_logging_payload import create_standard_logging_payload
|
||||
from litellm.types.utils import StandardLoggingPayload
|
||||
from litellm.types.router import Deployment, LiteLLM_Params
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -1240,3 +1241,142 @@ def test_mock_router_testing_params_str_to_bool_conversion(
|
||||
# Verify other params remain unchanged
|
||||
assert kwargs["other_param"] == "should_remain"
|
||||
|
||||
|
||||
def test_is_auto_router_deployment(model_list):
|
||||
"""Test if the '_is_auto_router_deployment' function correctly identifies auto-router deployments"""
|
||||
router = Router(model_list=model_list)
|
||||
|
||||
# Test case 1: Model starts with "auto_router/" - should return True
|
||||
litellm_params_auto = LiteLLM_Params(model="auto_router/my-auto-router")
|
||||
assert router._is_auto_router_deployment(litellm_params_auto) is True
|
||||
|
||||
# Test case 2: Model doesn't start with "auto_router/" - should return False
|
||||
litellm_params_regular = LiteLLM_Params(model="gpt-3.5-turbo")
|
||||
assert router._is_auto_router_deployment(litellm_params_regular) is False
|
||||
|
||||
# Test case 3: Model is empty string - should return False
|
||||
litellm_params_empty = LiteLLM_Params(model="")
|
||||
assert router._is_auto_router_deployment(litellm_params_empty) is False
|
||||
|
||||
# Test case 4: Model contains "auto_router/" but doesn't start with it - should return False
|
||||
litellm_params_contains = LiteLLM_Params(model="prefix_auto_router/something")
|
||||
assert router._is_auto_router_deployment(litellm_params_contains) is False
|
||||
|
||||
|
||||
def test_init_auto_router_deployment_missing_params(model_list):
|
||||
"""Test if the 'init_auto_router_deployment' function raises ValueError when required parameters are missing"""
|
||||
router = Router(model_list=model_list)
|
||||
|
||||
# Test case 1: Missing auto_router_config_path
|
||||
litellm_params = LiteLLM_Params(
|
||||
model="auto_router/test",
|
||||
auto_router_default_model="gpt-3.5-turbo",
|
||||
auto_router_embedding_model="text-embedding-ada-002"
|
||||
)
|
||||
deployment = Deployment(
|
||||
model_name="test-auto-router",
|
||||
litellm_params=litellm_params,
|
||||
model_info={"id": "test-id"}
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="auto_router_config_path is required"):
|
||||
router.init_auto_router_deployment(deployment)
|
||||
|
||||
# Test case 2: Missing auto_router_default_model
|
||||
litellm_params = LiteLLM_Params(
|
||||
model="auto_router/test",
|
||||
auto_router_config_path="/path/to/config",
|
||||
auto_router_embedding_model="text-embedding-ada-002"
|
||||
)
|
||||
deployment = Deployment(
|
||||
model_name="test-auto-router",
|
||||
litellm_params=litellm_params,
|
||||
model_info={"id": "test-id"}
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="auto_router_default_model is required"):
|
||||
router.init_auto_router_deployment(deployment)
|
||||
|
||||
# Test case 3: Missing auto_router_embedding_model
|
||||
litellm_params = LiteLLM_Params(
|
||||
model="auto_router/test",
|
||||
auto_router_config_path="/path/to/config",
|
||||
auto_router_default_model="gpt-3.5-turbo"
|
||||
)
|
||||
deployment = Deployment(
|
||||
model_name="test-auto-router",
|
||||
litellm_params=litellm_params,
|
||||
model_info={"id": "test-id"}
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="auto_router_embedding_model is required"):
|
||||
router.init_auto_router_deployment(deployment)
|
||||
|
||||
|
||||
@patch('litellm.router_strategy.auto_router.auto_router.AutoRouter')
|
||||
def test_init_auto_router_deployment_success(mock_auto_router, model_list):
|
||||
"""Test if the 'init_auto_router_deployment' function successfully initializes auto-router when all params provided"""
|
||||
router = Router(model_list=model_list)
|
||||
|
||||
# Create a mock AutoRouter instance
|
||||
mock_auto_router_instance = MagicMock()
|
||||
mock_auto_router.return_value = mock_auto_router_instance
|
||||
|
||||
# Test case: All required parameters provided
|
||||
litellm_params = LiteLLM_Params(
|
||||
model="auto_router/test",
|
||||
auto_router_config_path="/path/to/config",
|
||||
auto_router_default_model="gpt-3.5-turbo",
|
||||
auto_router_embedding_model="text-embedding-ada-002"
|
||||
)
|
||||
deployment = Deployment(
|
||||
model_name="test-auto-router",
|
||||
litellm_params=litellm_params,
|
||||
model_info={"id": "test-id"}
|
||||
)
|
||||
|
||||
# Should not raise any exception
|
||||
router.init_auto_router_deployment(deployment)
|
||||
|
||||
# Verify AutoRouter was called with correct parameters
|
||||
mock_auto_router.assert_called_once_with(
|
||||
model_name="test-auto-router",
|
||||
router_config_path="/path/to/config",
|
||||
default_model="gpt-3.5-turbo",
|
||||
embedding_model="text-embedding-ada-002",
|
||||
litellm_router_instance=router,
|
||||
)
|
||||
|
||||
# Verify the auto-router was added to the router's auto_routers dict
|
||||
assert "test-auto-router" in router.auto_routers
|
||||
assert router.auto_routers["test-auto-router"] == mock_auto_router_instance
|
||||
|
||||
|
||||
@patch('litellm.router_strategy.auto_router.auto_router.AutoRouter')
|
||||
def test_init_auto_router_deployment_duplicate_model_name(mock_auto_router, model_list):
|
||||
"""Test if the 'init_auto_router_deployment' function raises ValueError when model_name already exists"""
|
||||
router = Router(model_list=model_list)
|
||||
|
||||
# Create a mock AutoRouter instance
|
||||
mock_auto_router_instance = MagicMock()
|
||||
mock_auto_router.return_value = mock_auto_router_instance
|
||||
|
||||
# Add an existing auto-router
|
||||
router.auto_routers["test-auto-router"] = mock_auto_router_instance
|
||||
|
||||
# Try to add another auto-router with the same name
|
||||
litellm_params = LiteLLM_Params(
|
||||
model="auto_router/test",
|
||||
auto_router_config_path="/path/to/config",
|
||||
auto_router_default_model="gpt-3.5-turbo",
|
||||
auto_router_embedding_model="text-embedding-ada-002"
|
||||
)
|
||||
deployment = Deployment(
|
||||
model_name="test-auto-router",
|
||||
litellm_params=litellm_params,
|
||||
model_info={"id": "test-id"}
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match="Auto-router deployment test-auto-router already exists"):
|
||||
router.init_auto_router_deployment(deployment)
|
||||
|
||||
|
||||
181
tests/test_litellm/router_strategy/test_auto_router.py
Normal file
181
tests/test_litellm/router_strategy/test_auto_router.py
Normal file
@ -0,0 +1,181 @@
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../../..")
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
from litellm.router_strategy.auto_router.auto_router import AutoRouter
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_router_instance():
|
||||
"""Create a mock LiteLLM Router instance."""
|
||||
router = MagicMock()
|
||||
router.acompletion = AsyncMock()
|
||||
return router
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_semantic_router():
|
||||
"""Create a mock SemanticRouter instance."""
|
||||
mock_router = MagicMock()
|
||||
mock_route = MagicMock()
|
||||
mock_route.name = "test-route"
|
||||
mock_router.routes = [mock_route]
|
||||
return mock_router
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_route_choice():
|
||||
"""Create a mock RouteChoice instance."""
|
||||
mock_choice = MagicMock()
|
||||
mock_choice.name = "test-model"
|
||||
return mock_choice
|
||||
|
||||
|
||||
class TestAutoRouter:
|
||||
"""Test class for AutoRouter methods."""
|
||||
|
||||
@patch('semantic_router.routers.SemanticRouter')
|
||||
def test_init(self, mock_semantic_router_class, mock_router_instance):
|
||||
"""Test that AutoRouter initializes correctly with all required parameters."""
|
||||
# Arrange
|
||||
mock_semantic_router_class.from_json.return_value = mock_semantic_router_class
|
||||
|
||||
model_name = "test-auto-router"
|
||||
router_config_path = "test/path/router.json"
|
||||
default_model = "gpt-4o-mini"
|
||||
embedding_model = "text-embedding-model"
|
||||
|
||||
# Act
|
||||
auto_router = AutoRouter(
|
||||
model_name=model_name,
|
||||
router_config_path=router_config_path,
|
||||
default_model=default_model,
|
||||
embedding_model=embedding_model,
|
||||
litellm_router_instance=mock_router_instance,
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert auto_router.router_config_path == router_config_path
|
||||
assert auto_router.auto_sync_value == AutoRouter.DEFAULT_AUTO_SYNC_VALUE
|
||||
assert auto_router.default_model == default_model
|
||||
assert auto_router.embedding_model == embedding_model
|
||||
assert auto_router.litellm_router_instance == mock_router_instance
|
||||
assert auto_router.routelayer is None
|
||||
mock_semantic_router_class.from_json.assert_called_once_with(router_config_path)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('semantic_router.routers.SemanticRouter')
|
||||
@patch('litellm.router_strategy.auto_router.litellm_encoder.LiteLLMRouterEncoder')
|
||||
async def test_async_pre_routing_hook_with_route_choice(
|
||||
self,
|
||||
mock_encoder_class,
|
||||
mock_semantic_router_class,
|
||||
mock_router_instance,
|
||||
mock_route_choice
|
||||
):
|
||||
"""Test async_pre_routing_hook returns correct model when route is found."""
|
||||
# Arrange
|
||||
mock_loaded_router = MagicMock()
|
||||
mock_loaded_router.routes = ["route1", "route2"]
|
||||
mock_semantic_router_class.from_json.return_value = mock_loaded_router
|
||||
|
||||
mock_routelayer = MagicMock()
|
||||
mock_routelayer.return_value = mock_route_choice
|
||||
mock_semantic_router_class.return_value = mock_routelayer
|
||||
|
||||
auto_router = AutoRouter(
|
||||
model_name="test-auto-router",
|
||||
router_config_path="test/path/router.json",
|
||||
default_model="gpt-4o-mini",
|
||||
embedding_model="text-embedding-model",
|
||||
litellm_router_instance=mock_router_instance,
|
||||
)
|
||||
|
||||
messages = [{"role": "user", "content": "test message"}]
|
||||
|
||||
# Act
|
||||
result = await auto_router.async_pre_routing_hook(
|
||||
model="test-model",
|
||||
request_kwargs={},
|
||||
messages=messages
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result is not None
|
||||
assert result.model == "test-model" # Should use the route choice name
|
||||
assert result.messages == messages
|
||||
mock_routelayer.assert_called_once_with(text="test message")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch('semantic_router.routers.SemanticRouter')
|
||||
@patch('litellm.router_strategy.auto_router.litellm_encoder.LiteLLMRouterEncoder')
|
||||
async def test_async_pre_routing_hook_with_list_route_choice(
|
||||
self,
|
||||
mock_encoder_class,
|
||||
mock_semantic_router_class,
|
||||
mock_router_instance,
|
||||
mock_route_choice
|
||||
):
|
||||
"""Test async_pre_routing_hook handles list of RouteChoice objects correctly."""
|
||||
# Arrange
|
||||
mock_loaded_router = MagicMock()
|
||||
mock_loaded_router.routes = ["route1", "route2"]
|
||||
mock_semantic_router_class.from_json.return_value = mock_loaded_router
|
||||
|
||||
mock_routelayer = MagicMock()
|
||||
mock_routelayer.return_value = [mock_route_choice] # Return list
|
||||
mock_semantic_router_class.return_value = mock_routelayer
|
||||
|
||||
auto_router = AutoRouter(
|
||||
model_name="test-auto-router",
|
||||
router_config_path="test/path/router.json",
|
||||
default_model="gpt-4o-mini",
|
||||
embedding_model="text-embedding-model",
|
||||
litellm_router_instance=mock_router_instance,
|
||||
)
|
||||
|
||||
messages = [{"role": "user", "content": "test message"}]
|
||||
|
||||
# Act
|
||||
result = await auto_router.async_pre_routing_hook(
|
||||
model="test-model",
|
||||
request_kwargs={},
|
||||
messages=messages
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result is not None
|
||||
assert result.model == "test-model"
|
||||
assert result.messages == messages
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_pre_routing_hook_no_messages(self, mock_router_instance):
|
||||
"""Test async_pre_routing_hook returns None when no messages provided."""
|
||||
# Arrange
|
||||
with patch('semantic_router.routers.SemanticRouter'):
|
||||
auto_router = AutoRouter(
|
||||
model_name="test-auto-router",
|
||||
router_config_path="test/path/router.json",
|
||||
default_model="gpt-4o-mini",
|
||||
embedding_model="text-embedding-model",
|
||||
litellm_router_instance=mock_router_instance,
|
||||
)
|
||||
|
||||
# Act
|
||||
result = await auto_router.async_pre_routing_hook(
|
||||
model="test-model",
|
||||
request_kwargs={},
|
||||
messages=None
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert result is None
|
||||
|
||||
Loading…
Reference in New Issue
Block a user