[project] name = "litellm" version = "1.89.0" description = "Library to easily interface with LLM API providers" readme = "README.md" requires-python = ">=3.10, <3.14" license = "MIT" license-files = ["LICENSE"] authors = [ { name = "BerriAI" }, ] dependencies = [ # Ranges (not exact pins) so SDK consumers can coexist with their other # deps. Reproducibility for our Docker/CI comes from `uv.lock`. # When changing a floor, verify it installs + imports on every supported # Python with: `uv pip install --resolution=lowest-direct .` "fastuuid>=0.14.0,<1.0", "httpx>=0.28.0,<1.0", "openai>=2.20.0,<3.0.0", "python-dotenv>=1.0.0,<2.0", "tiktoken>=0.8.0,<1.0", "importlib-metadata>=8.0.0,<9.0", "tokenizers>=0.21.0,<1.0", "click>=8.0.0,<9.0", "jinja2>=3.1.6,<4.0", "aiohttp>=3.10,<4.0", "pydantic>=2.10.0,<3.0.0", "jsonschema>=4.0.0,<5.0", ] [project.urls] Homepage = "https://litellm.ai" Repository = "https://github.com/BerriAI/litellm" Documentation = "https://docs.litellm.ai" # Optional extras use compatible ranges (like the core SDK above) so downstream # consumers can coexist with other packages and pick up security patches without # forking. Reproducibility for our Docker/CI comes from `uv.lock` (images install # via `uv sync --frozen`). A few deps stay exact-pinned: litellm's own # sub-packages and the opentelemetry trio move in lockstep, and grpcio is # supply-chain-pinned to a vetted, aged release. [project.optional-dependencies] proxy = [ "gunicorn>=23.0.0,<24.0", "uvicorn>=0.33.0,<1.0", "granian>=2.7.4,<3.0", "uvloop>=0.21.0,<1.0; sys_platform != 'win32'", "fastapi>=0.136.3,<1.0", "starlette>=1.0.1,<2.0", "backoff>=2.2.1,<3.0", "pyyaml>=6.0.3,<7.0", "rq>=2.7.0,<3.0", "orjson>=3.11.6,<4.0", "apscheduler>=3.11.2,<4.0", "fastapi-sso>=0.19.0,<1.0", "PyJWT>=2.13.0,<3.0", "python-multipart>=0.0.27,<1.0", "cryptography>=46.0.7,<47.0", "pynacl>=1.6.2,<2.0", "websockets>=15.0.1,<16.0", "boto3>=1.43.1,<2.0", "azure-identity>=1.25.2,<2.0", "azure-storage-blob>=12.28.0,<13.0", "mcp>=1.26.0,<2.0", "litellm-proxy-extras==0.4.74", "litellm-enterprise==0.1.42", "RestrictedPython>=8.1,<9.0", "rich>=13.9.4,<14.0", "polars>=1.38.1,<2.0", "soundfile>=0.12.1,<1.0", "pyroscope-io>=0.8.16,<1.0; sys_platform != 'win32'", "pydantic-settings>=2.14.1,<3.0", ] # Thin client install for the `lite` CLI on developer laptops. The CLI's heavy # imports (fastapi, cryptography, ...) are all guarded, so it runs on the base # SDK plus just these three; none of the server runtime in `proxy` is pulled in. cli = [ "rich>=13.9.4,<14.0", "pyyaml>=6.0.3,<7.0", "requests>=2.32.0,<3.0", ] extra_proxy = [ "prisma>=0.11.0,<1.0", "azure-identity>=1.25.2,<2.0", "azure-keyvault-secrets>=4.10.0,<5.0", # Not in PyPI proxy extra. "google-cloud-kms>=2.24.2,<3.0", "google-cloud-iam>=2.19.1,<3.0", # Not in PyPI proxy extra. "resend>=2.23.0,<3.0", "redisvl>=0.4.1,<1.0; python_version < '3.14'", "a2a-sdk>=0.3.24,<1.0", ] utils = [ # Not in Docker or PyPI proxy extra. "numpydoc>=1.8.0,<2.0", ] caching = ["diskcache>=5.6.3,<6.0"] semantic-router = [ "semantic-router>=0.1.15,<1.0; python_version < '3.14'", "aurelio-sdk>=0.0.19,<1.0; python_version < '3.14'", ] mlflow = ["mlflow>=3.11.1,<4.0"] grpc = [ # Newest non-yanked release older than the 30-day cutoff. "grpcio==1.78.0", ] stt-nvidia-riva = [ # NVIDIA Riva STT provider (gRPC). These are imported lazily inside the # provider handler so litellm core remains usable without them. "nvidia-riva-client>=2.15.0", "soundfile>=0.12.1", "audioread>=3.0.1", "numpy>=1.26.0", ] google = ["google-cloud-aiplatform>=1.133.0,<2.0"] proxy-runtime = [ # Historically bundled in the proxy Docker images via requirements.txt. # Keep these in a dedicated extra so uv-based images preserve the same # feature surface without forcing the base SDK install to grow. "google-cloud-aiplatform>=1.133.0,<2.0", "google-genai>=1.37.0,<2.0", "anthropic[vertex]>=0.84.0,<1.0", "grpcio==1.78.0", "prometheus-client>=0.20.0,<1.0", "langfuse>=2.59.7,<3.0", "opentelemetry-api==1.28.0", "opentelemetry-sdk==1.28.0", "opentelemetry-exporter-otlp==1.28.0", "opentelemetry-instrumentation-fastapi==0.49b0", "ddtrace>=2.19.0,<3.0", "sentry-sdk>=2.21.0,<3.0", "mangum>=0.17.0,<1.0", "azure-ai-contentsafety>=1.0.0,<2.0", "azure-storage-file-datalake>=12.20.0,<13.0", "pypdf>=6.10.2,<7.0; python_version < '3.14'", "llm-sandbox>=0.3.39,<1.0", "detect-secrets>=1.5.0,<2.0", ] [project.scripts] litellm = "litellm:run_server" lite = "litellm.proxy.client.cli:cli" litellm-proxy = "litellm.proxy.client.cli:cli" [dependency-groups] dev = [ "diff-cover==9.7.2", "flake8==7.3.0", "black==26.3.1", "mypy==1.19.0", "pytest==9.0.3", "pytest-mock==3.15.1", "pytest-asyncio==1.3.0", "pytest-postgresql==7.0.2", # pytest-postgresql imports psycopg v3 during pytest startup. Keep the base # package and the binary wheel in the default dev environment so local # pytest works without requiring a system libpq install. "psycopg==3.3.3", "psycopg-binary==3.3.3", "pytest-xdist==3.8.0", "requests-mock==1.12.1", "responses==0.26.0", "respx==0.22.0", "ruff==0.15.3", "types-requests==2.32.4.20260107", "types-setuptools==75.8.0.20250225", "types-redis==4.6.0.20241004", "types-PyYAML==6.0.12.20250915", "opentelemetry-api==1.28.0", "opentelemetry-sdk==1.28.0", "opentelemetry-exporter-otlp==1.28.0", "opentelemetry-instrumentation-fastapi==0.49b0", "langfuse==2.59.7", "fastapi-offline==1.7.6", "fakeredis==2.34.1", "pytest-rerunfailures==15.1", "pytest-cov==5.0.0", "parameterized==0.9.0", "openapi-core==0.22.0; python_version < '3.14'", "pytest-timeout==2.4.0", "vcrpy==8.1.1", "pytest-recording==0.13.4", ] proxy-dev = [ "prisma==0.11.0", "hypercorn==0.17.3", "prometheus-client==0.20.0", "opentelemetry-api==1.28.0", "opentelemetry-sdk==1.28.0", "opentelemetry-exporter-otlp==1.28.0", "opentelemetry-instrumentation-fastapi==0.49b0", "azure-identity==1.25.2", "a2a-sdk==0.3.24", ] ci = [ # These are lazily imported at call sites; keep them out of core deps to # avoid bloating the base SDK install (google-generativeai pulls grpcio + # protobuf, Pillow is a compiled C extension). "tenacity==8.5.0", "google-generativeai==0.8.6", "Pillow==12.2.0", # Azure batch E2E tests still import psycopg2 directly. "psycopg2-binary==2.9.11", "pytest-codspeed==4.3.0", "pytest-retry==1.7.0", "pyarrow==23.0.1", "langchain==1.2.10", "lunary==1.4.36; python_version == '3.10'", "lunary==1.4.37; python_version >= '3.11'", "logfire==4.6.0", "traceloop-sdk==0.33.12", "detect-secrets==1.5.0", "PyGithub==2.8.1", "aiodynamo==24.7", "argon2-cffi==25.1.0", "assemblyai==0.52.4", "jsonlines==4.0.0", "anthropic==0.84.0", "blockbuster==1.5.26", "beautifulsoup4==4.14.3", "pylint==4.0.5", "pyright==1.1.408", "langchain-mcp-adapters==0.2.1", "langchain-openai==1.1.14", "langgraph==1.0.10", # langgraph-prebuilt 1.0.9 imports ExecutionInfo/ServerInfo from # langgraph.runtime, which is not exported until langgraph 1.1.0. # Pin to 1.0.8 so it pairs correctly with langgraph==1.0.10. "langgraph-prebuilt==1.0.8", "claude-agent-sdk==0.1.44", ] healthcheck = [ "httpx==0.28.1", "pyyaml==6.0.3", ] [build-system] requires = ["uv_build==0.11.8"] build-backend = "uv_build" [tool.uv] default-groups = ["dev"] required-version = ">=0.10.9" exclude-newer = "3 days" [tool.uv.sources] litellm-proxy-extras = { workspace = true } litellm-enterprise = { workspace = true } [tool.uv.workspace] members = ["enterprise", "litellm-proxy-extras"] [tool.uv.build-backend] module-root = "" source-exclude = [ "litellm/proxy/enterprise", "**/__pycache__", "**/__pycache__/**", "**/.mypy_cache", "**/.mypy_cache/**", "**/.pytest_cache", "**/.pytest_cache/**", "**/.ruff_cache", "**/.ruff_cache/**", ] [tool.isort] profile = "black" [tool.commitizen] version = "1.89.0" version_files = [ "pyproject.toml:^version", ] [tool.mypy] plugins = "pydantic.mypy" [tool.pytest.ini_options] asyncio_mode = "auto" asyncio_default_fixture_loop_scope = "session" markers = [ "asyncio: mark test as an asyncio test", "limit_leaks: mark test with memory limit for leak detection (e.g., '40 MB')", "no_parallel: mark test to run sequentially (not in parallel) - typically for memory measurement tests", ] filterwarnings = [ # Suppress Pydantic serializer warnings from mock server responses (non-critical for memory tests) # These occur because the mock server returns a simplified response format "ignore:Pydantic serializer warnings:UserWarning", "ignore::UserWarning:pydantic.main", # Suppress pytest-asyncio event loop deprecation warning (handled automatically by pytest-asyncio) "ignore::DeprecationWarning:pytest_asyncio.plugin", ] [tool.mutmut] # Mutation-testing scope. Driven by the manually-triggered workflow at # .github/workflows/mutation-test.yml. mutmut is not part of the project's # default install; it is pulled in via `uv run --with mutmut==` in CI. # `also_copy = ["litellm/"]` is required because mutmut runs in a `mutants/` # sandbox and the test conftest imports from across the litellm package. paths_to_mutate = [ "litellm/proxy/management_endpoints/", ] tests_dir = [ "tests/test_litellm/proxy/management_endpoints/", "tests/proxy_behavior/management/", ] also_copy = [ "litellm/", ] # Run the test suite once before mutation to gather line coverage, then skip # mutating lines no test exercises. Those mutants would survive regardless # (no test hits the line to kill them), so generating them wastes hours of CI. # The score now reads as "mutation score over covered code" — pair with a # line-coverage number when reporting. mutate_only_covered_lines = true # Disable rerun/parallel plugins for mutation runs: # - pytest-retry triggers an `INTERNALERROR: no option named 'filtered_exceptions'` # when invoked via mutmut's in-process `pytest.main()` call. # - rerunning a "failed" test on a mutant would mask which mutants are killed # vs. survive, so reruns are wrong for mutation testing regardless. # - xdist is unnecessary inside mutmut (mutmut handles its own parallelism). pytest_add_cli_args = [ "-p", "no:retry", "-p", "no:rerunfailures", "-p", "no:xdist", ] [tool.coverage.run] source = ["litellm"] relative_files = true