Split the monolithic LiteLLM proxy into independently scalable Kubernetes components to allow separate horizontal scaling of the LLM data plane and management API surfaces - Add DatabaseURLSettings pydantic-settings model that assembles DATABASE_URL (and optional DATABASE_URL_READ_REPLICA) from discrete DATABASE_* env vars before Prisma initializes, supporting both IAM token auth (minting short-lived RDS tokens) and password auth; replaces the CLI-only path that componentized entrypoints bypass - Add gateway component (port 4000) that trims the proxy route table to the LLM data-plane surface (chat, embeddings, completions, audio, realtime, provider passthroughs, health/metrics) via an allowlist applied inside the lifespan context so plugin-registered routes are captured - Add backend component (port 4001) that exposes the management/admin surface (keys, users, teams, orgs, spend analytics, model management, SSO, audit logs) with a complementary allowlist - Add ui component — Next.js static export served by nginx (port 3000) with RSC payload routing, asset prefix aliasing, and SPA fallback for dashboard routes - Add migrations component with dedicated Dockerfile that runs prisma migrate deploy via a Helm pre-install/pre-upgrade Job, eliminating per-pod schema contention on the Prisma advisory lock - Add Helm chart (helm/litellm) with separate Deployments, Services, HPAs, and ConfigMap for each component; shared _helpers.tpl emits DATABASE_*, IAM_TOKEN_DB_AUTH, REDIS_*, and DISABLE_SCHEMA_UPDATE env vars from chart values; ingress template routes traffic to the correct component by path prefix - Add comprehensive tests for DatabaseURLSettings covering IAM auth, password auth, read replica fallbacks, operator-pinned URL preservation, and percent-encoding; add coverage test asserting gateway + backend allowlist union equals the full proxy route set - Add pydantic-settings>=2.14.1 as a proxy extra dependency and update liccheck allowlist Co-authored-by: Yassin Kortam <yassinkortam@g.ucla.edu>
315 lines
9.5 KiB
TOML
315 lines
9.5 KiB
TOML
[project]
|
|
name = "litellm"
|
|
version = "1.85.0"
|
|
description = "Library to easily interface with LLM API providers"
|
|
readme = "README.md"
|
|
requires-python = ">=3.10, <3.14"
|
|
license = "MIT"
|
|
license-files = ["LICENSE"]
|
|
authors = [
|
|
{ name = "BerriAI" },
|
|
]
|
|
dependencies = [
|
|
# Ranges (not exact pins) so SDK consumers can coexist with their other
|
|
# deps. Reproducibility for our Docker/CI comes from `uv.lock`.
|
|
# When changing a floor, verify it installs + imports on every supported
|
|
# Python with: `uv pip install --resolution=lowest-direct .`
|
|
"fastuuid>=0.14.0,<1.0",
|
|
"httpx>=0.28.0,<1.0",
|
|
"openai>=2.20.0,<3.0.0",
|
|
"python-dotenv>=1.0.0,<2.0",
|
|
"tiktoken>=0.8.0,<1.0",
|
|
"importlib-metadata>=8.0.0,<9.0",
|
|
"tokenizers>=0.21.0,<1.0",
|
|
"click>=8.0.0,<9.0",
|
|
"jinja2>=3.1.6,<4.0",
|
|
"aiohttp>=3.10,<4.0",
|
|
"pydantic>=2.10.0,<3.0.0",
|
|
"jsonschema>=4.0.0,<5.0",
|
|
]
|
|
|
|
[project.urls]
|
|
Homepage = "https://litellm.ai"
|
|
Repository = "https://github.com/BerriAI/litellm"
|
|
Documentation = "https://docs.litellm.ai"
|
|
|
|
# Dependencies pinned from the published `litellm[proxy]==1.83.0` resolution.
|
|
# Docker and CI should prefer `uv.lock` rather than maintaining parallel installers.
|
|
[project.optional-dependencies]
|
|
proxy = [
|
|
"gunicorn==23.0.0",
|
|
"uvicorn==0.33.0",
|
|
"uvloop==0.21.0; sys_platform != 'win32'",
|
|
"fastapi==0.124.4",
|
|
"backoff==2.2.1",
|
|
"pyyaml==6.0.3",
|
|
"rq==2.7.0",
|
|
"orjson==3.11.6",
|
|
"apscheduler==3.11.2",
|
|
"fastapi-sso==0.19.0",
|
|
"PyJWT==2.12.0",
|
|
"python-multipart==0.0.27",
|
|
"cryptography==46.0.7",
|
|
"pynacl==1.6.2",
|
|
"websockets==15.0.1",
|
|
"boto3==1.43.1",
|
|
"azure-identity==1.25.2",
|
|
"azure-storage-blob==12.28.0",
|
|
"mcp==1.26.0",
|
|
"litellm-proxy-extras==0.4.72",
|
|
"litellm-enterprise==0.1.40",
|
|
"RestrictedPython==8.1",
|
|
"rich==13.9.4",
|
|
"polars==1.38.1",
|
|
"soundfile==0.12.1",
|
|
"pyroscope-io==0.8.16; sys_platform != 'win32'",
|
|
"pydantic-settings>=2.14.1",
|
|
]
|
|
extra_proxy = [
|
|
"prisma==0.11.0",
|
|
"azure-identity==1.25.2",
|
|
"azure-keyvault-secrets==4.10.0",
|
|
# Not in PyPI proxy extra.
|
|
"google-cloud-kms==2.24.2",
|
|
"google-cloud-iam==2.19.1",
|
|
# Not in PyPI proxy extra.
|
|
"resend==2.23.0",
|
|
"redisvl==0.4.1; python_version < '3.14'",
|
|
"a2a-sdk==0.3.24",
|
|
]
|
|
utils = [
|
|
# Not in Docker or PyPI proxy extra.
|
|
"numpydoc==1.8.0",
|
|
]
|
|
caching = ["diskcache==5.6.3"]
|
|
semantic-router = [
|
|
"semantic-router==0.1.12; python_version < '3.14'",
|
|
"aurelio-sdk==0.0.19; python_version < '3.14'",
|
|
]
|
|
mlflow = ["mlflow==3.11.1"]
|
|
grpc = [
|
|
# Newest non-yanked release older than the 30-day cutoff.
|
|
"grpcio==1.78.0",
|
|
]
|
|
stt-nvidia-riva = [
|
|
# NVIDIA Riva STT provider (gRPC). These are imported lazily inside the
|
|
# provider handler so litellm core remains usable without them.
|
|
"nvidia-riva-client>=2.15.0",
|
|
"soundfile>=0.12.1",
|
|
"audioread>=3.0.1",
|
|
"numpy>=1.26.0",
|
|
]
|
|
google = ["google-cloud-aiplatform==1.133.0"]
|
|
proxy-runtime = [
|
|
# Historically bundled in the proxy Docker images via requirements.txt.
|
|
# Keep these in a dedicated extra so uv-based images preserve the same
|
|
# feature surface without forcing the base SDK install to grow.
|
|
"google-cloud-aiplatform==1.133.0",
|
|
"google-genai==1.37.0",
|
|
"anthropic[vertex]==0.84.0",
|
|
"grpcio==1.78.0",
|
|
"prometheus-client==0.20.0",
|
|
"langfuse==2.59.7",
|
|
"opentelemetry-api==1.28.0",
|
|
"opentelemetry-sdk==1.28.0",
|
|
"opentelemetry-exporter-otlp==1.28.0",
|
|
"ddtrace==2.19.0",
|
|
"sentry-sdk==2.21.0",
|
|
"mangum==0.17.0",
|
|
"azure-ai-contentsafety==1.0.0",
|
|
"azure-storage-file-datalake==12.20.0",
|
|
"pypdf==6.10.2; python_version < '3.14'",
|
|
"llm-sandbox==0.3.39",
|
|
"detect-secrets==1.5.0",
|
|
]
|
|
|
|
[project.scripts]
|
|
litellm = "litellm:run_server"
|
|
litellm-proxy = "litellm.proxy.client.cli:cli"
|
|
|
|
[dependency-groups]
|
|
dev = [
|
|
"diff-cover==9.7.2",
|
|
"flake8==7.3.0",
|
|
"black==24.10.0",
|
|
"mypy==1.19.0",
|
|
"pytest==9.0.3",
|
|
"pytest-mock==3.15.1",
|
|
"pytest-asyncio==1.3.0",
|
|
"pytest-postgresql==7.0.2",
|
|
# pytest-postgresql imports psycopg v3 during pytest startup. Keep the base
|
|
# package and the binary wheel in the default dev environment so local
|
|
# pytest works without requiring a system libpq install.
|
|
"psycopg==3.3.3",
|
|
"psycopg-binary==3.3.3",
|
|
"pytest-xdist==3.8.0",
|
|
"requests-mock==1.12.1",
|
|
"responses==0.26.0",
|
|
"respx==0.22.0",
|
|
"ruff==0.15.3",
|
|
"types-requests==2.32.4.20260107",
|
|
"types-setuptools==75.8.0.20250225",
|
|
"types-redis==4.6.0.20241004",
|
|
"types-PyYAML==6.0.12.20250915",
|
|
"opentelemetry-api==1.28.0",
|
|
"opentelemetry-sdk==1.28.0",
|
|
"opentelemetry-exporter-otlp==1.28.0",
|
|
"langfuse==2.59.7",
|
|
"fastapi-offline==1.7.6",
|
|
"fakeredis==2.34.1",
|
|
"pytest-rerunfailures==15.1",
|
|
"pytest-cov==5.0.0",
|
|
"parameterized==0.9.0",
|
|
"openapi-core==0.22.0; python_version < '3.14'",
|
|
"pytest-timeout==2.4.0",
|
|
"vcrpy==8.1.1",
|
|
"pytest-recording==0.13.4",
|
|
]
|
|
proxy-dev = [
|
|
"prisma==0.11.0",
|
|
"hypercorn==0.17.3",
|
|
"prometheus-client==0.20.0",
|
|
"opentelemetry-api==1.28.0",
|
|
"opentelemetry-sdk==1.28.0",
|
|
"opentelemetry-exporter-otlp==1.28.0",
|
|
"azure-identity==1.25.2",
|
|
"a2a-sdk==0.3.24",
|
|
]
|
|
ci = [
|
|
# These are lazily imported at call sites; keep them out of core deps to
|
|
# avoid bloating the base SDK install (google-generativeai pulls grpcio +
|
|
# protobuf, Pillow is a compiled C extension).
|
|
"tenacity==8.5.0",
|
|
"google-generativeai==0.8.6",
|
|
"Pillow==12.2.0",
|
|
# Azure batch E2E tests still import psycopg2 directly.
|
|
"psycopg2-binary==2.9.11",
|
|
"pytest-codspeed==4.3.0",
|
|
"pytest-retry==1.7.0",
|
|
"pyarrow==22.0.0",
|
|
"langchain==1.2.10",
|
|
"lunary==1.4.36; python_version == '3.10'",
|
|
"lunary==1.4.37; python_version >= '3.11'",
|
|
"logfire==4.6.0",
|
|
"traceloop-sdk==0.33.12",
|
|
"detect-secrets==1.5.0",
|
|
"PyGithub==2.8.1",
|
|
"aiodynamo==24.7",
|
|
"argon2-cffi==25.1.0",
|
|
"assemblyai==0.52.4",
|
|
"jsonlines==4.0.0",
|
|
"anthropic==0.84.0",
|
|
"blockbuster==1.5.26",
|
|
"beautifulsoup4==4.14.3",
|
|
"pylint==4.0.5",
|
|
"pyright==1.1.408",
|
|
"langchain-mcp-adapters==0.2.1",
|
|
"langchain-openai==1.1.14",
|
|
"langgraph==1.0.10",
|
|
# langgraph-prebuilt 1.0.9 imports ExecutionInfo/ServerInfo from
|
|
# langgraph.runtime, which is not exported until langgraph 1.1.0.
|
|
# Pin to 1.0.8 so it pairs correctly with langgraph==1.0.10.
|
|
"langgraph-prebuilt==1.0.8",
|
|
"claude-agent-sdk==0.1.44",
|
|
]
|
|
healthcheck = [
|
|
"httpx==0.28.1",
|
|
"pyyaml==6.0.3",
|
|
]
|
|
|
|
[build-system]
|
|
requires = ["uv_build==0.11.8"]
|
|
build-backend = "uv_build"
|
|
|
|
[tool.uv]
|
|
default-groups = ["dev"]
|
|
required-version = ">=0.10.9"
|
|
exclude-newer = "3 days"
|
|
|
|
[tool.uv.sources]
|
|
litellm-proxy-extras = { workspace = true }
|
|
litellm-enterprise = { workspace = true }
|
|
|
|
[tool.uv.workspace]
|
|
members = ["enterprise", "litellm-proxy-extras"]
|
|
|
|
[tool.uv.build-backend]
|
|
module-root = ""
|
|
source-exclude = [
|
|
"litellm/proxy/enterprise",
|
|
"**/__pycache__",
|
|
"**/__pycache__/**",
|
|
"**/.mypy_cache",
|
|
"**/.mypy_cache/**",
|
|
"**/.pytest_cache",
|
|
"**/.pytest_cache/**",
|
|
"**/.ruff_cache",
|
|
"**/.ruff_cache/**",
|
|
]
|
|
|
|
[tool.isort]
|
|
profile = "black"
|
|
|
|
[tool.commitizen]
|
|
version = "1.85.0"
|
|
version_files = [
|
|
"pyproject.toml:^version",
|
|
]
|
|
|
|
[tool.mypy]
|
|
plugins = "pydantic.mypy"
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_mode = "auto"
|
|
asyncio_default_fixture_loop_scope = "session"
|
|
markers = [
|
|
"asyncio: mark test as an asyncio test",
|
|
"limit_leaks: mark test with memory limit for leak detection (e.g., '40 MB')",
|
|
"no_parallel: mark test to run sequentially (not in parallel) - typically for memory measurement tests",
|
|
]
|
|
filterwarnings = [
|
|
# Suppress Pydantic serializer warnings from mock server responses (non-critical for memory tests)
|
|
# These occur because the mock server returns a simplified response format
|
|
"ignore:Pydantic serializer warnings:UserWarning",
|
|
"ignore::UserWarning:pydantic.main",
|
|
# Suppress pytest-asyncio event loop deprecation warning (handled automatically by pytest-asyncio)
|
|
"ignore::DeprecationWarning:pytest_asyncio.plugin",
|
|
]
|
|
|
|
[tool.mutmut]
|
|
# Mutation-testing scope. Driven by the manually-triggered workflow at
|
|
# .github/workflows/mutation-test.yml. mutmut is not part of the project's
|
|
# default install; it is pulled in via `uv run --with mutmut==<version>` in CI.
|
|
# `also_copy = ["litellm/"]` is required because mutmut runs in a `mutants/`
|
|
# sandbox and the test conftest imports from across the litellm package.
|
|
paths_to_mutate = [
|
|
"litellm/proxy/management_endpoints/",
|
|
]
|
|
tests_dir = [
|
|
"tests/test_litellm/proxy/management_endpoints/",
|
|
]
|
|
also_copy = [
|
|
"litellm/",
|
|
]
|
|
# Run the test suite once before mutation to gather line coverage, then skip
|
|
# mutating lines no test exercises. Those mutants would survive regardless
|
|
# (no test hits the line to kill them), so generating them wastes hours of CI.
|
|
# The score now reads as "mutation score over covered code" — pair with a
|
|
# line-coverage number when reporting.
|
|
mutate_only_covered_lines = true
|
|
# Disable rerun/parallel plugins for mutation runs:
|
|
# - pytest-retry triggers an `INTERNALERROR: no option named 'filtered_exceptions'`
|
|
# when invoked via mutmut's in-process `pytest.main()` call.
|
|
# - rerunning a "failed" test on a mutant would mask which mutants are killed
|
|
# vs. survive, so reruns are wrong for mutation testing regardless.
|
|
# - xdist is unnecessary inside mutmut (mutmut handles its own parallelism).
|
|
pytest_add_cli_args = [
|
|
"-p", "no:retry",
|
|
"-p", "no:rerunfailures",
|
|
"-p", "no:xdist",
|
|
]
|
|
|
|
[tool.coverage.run]
|
|
source = ["litellm"]
|
|
relative_files = true
|