Split the monolithic LiteLLM proxy into independently scalable Kubernetes components to allow separate horizontal scaling of the LLM data plane and management API surfaces - Add DatabaseURLSettings pydantic-settings model that assembles DATABASE_URL (and optional DATABASE_URL_READ_REPLICA) from discrete DATABASE_* env vars before Prisma initializes, supporting both IAM token auth (minting short-lived RDS tokens) and password auth; replaces the CLI-only path that componentized entrypoints bypass - Add gateway component (port 4000) that trims the proxy route table to the LLM data-plane surface (chat, embeddings, completions, audio, realtime, provider passthroughs, health/metrics) via an allowlist applied inside the lifespan context so plugin-registered routes are captured - Add backend component (port 4001) that exposes the management/admin surface (keys, users, teams, orgs, spend analytics, model management, SSO, audit logs) with a complementary allowlist - Add ui component — Next.js static export served by nginx (port 3000) with RSC payload routing, asset prefix aliasing, and SPA fallback for dashboard routes - Add migrations component with dedicated Dockerfile that runs prisma migrate deploy via a Helm pre-install/pre-upgrade Job, eliminating per-pod schema contention on the Prisma advisory lock - Add Helm chart (helm/litellm) with separate Deployments, Services, HPAs, and ConfigMap for each component; shared _helpers.tpl emits DATABASE_*, IAM_TOKEN_DB_AUTH, REDIS_*, and DISABLE_SCHEMA_UPDATE env vars from chart values; ingress template routes traffic to the correct component by path prefix - Add comprehensive tests for DatabaseURLSettings covering IAM auth, password auth, read replica fallbacks, operator-pinned URL preservation, and percent-encoding; add coverage test asserting gateway + backend allowlist union equals the full proxy route set - Add pydantic-settings>=2.14.1 as a proxy extra dependency and update liccheck allowlist Co-authored-by: Yassin Kortam <yassinkortam@g.ucla.edu>
84 lines
3.0 KiB
Docker
84 lines
3.0 KiB
Docker
ARG LITELLM_BUILD_IMAGE=cgr.dev/chainguard/wolfi-base@sha256:31da6565f35af6401031c1d7aa91dc84ac76c5c48edd17fb90f0ed9e3173c7a9
|
|
ARG LITELLM_RUNTIME_IMAGE=cgr.dev/chainguard/wolfi-base@sha256:31da6565f35af6401031c1d7aa91dc84ac76c5c48edd17fb90f0ed9e3173c7a9
|
|
ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
|
|
|
|
FROM $UV_IMAGE AS uvbin
|
|
|
|
# ---------- Builder ----------
|
|
FROM $LITELLM_BUILD_IMAGE AS builder
|
|
|
|
WORKDIR /app
|
|
USER root
|
|
|
|
COPY --from=uvbin /uv /uvx /usr/local/bin/
|
|
|
|
RUN apk add --no-cache bash gcc python3 python3-dev openssl openssl-dev libsndfile
|
|
|
|
# UV_COMPILE_BYTECODE=1 precompiles .pyc at install time → faster cold start.
|
|
# UV_LINK_MODE=copy avoids hardlink warnings when uv installs from a
|
|
# BuildKit cache mount (different filesystem).
|
|
# UV_PYTHON_DOWNLOADS=0 force uv to use the apk-installed CPython instead of
|
|
# silently pulling a managed interpreter.
|
|
ENV UV_PROJECT_ENVIRONMENT=/app/.venv \
|
|
UV_LINK_MODE=copy \
|
|
UV_COMPILE_BYTECODE=1 \
|
|
UV_PYTHON_DOWNLOADS=0 \
|
|
PATH="/app/.venv/bin:${PATH}"
|
|
|
|
# Stage 1 — install dependencies only.
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
|
--mount=type=bind,source=uv.lock,target=uv.lock \
|
|
--mount=type=bind,source=enterprise/pyproject.toml,target=enterprise/pyproject.toml \
|
|
--mount=type=bind,source=litellm-proxy-extras/pyproject.toml,target=litellm-proxy-extras/pyproject.toml \
|
|
uv sync --frozen --no-install-project --no-install-workspace --no-default-groups --no-editable \
|
|
--extra proxy \
|
|
--extra proxy-runtime \
|
|
--extra extra_proxy \
|
|
--extra semantic-router \
|
|
--python python3
|
|
|
|
# Stage 2 — copy source and install the project + workspace members.
|
|
COPY . .
|
|
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
uv sync --frozen --no-default-groups --no-editable \
|
|
--extra proxy \
|
|
--extra proxy-runtime \
|
|
--extra extra_proxy \
|
|
--extra semantic-router \
|
|
--python python3
|
|
|
|
RUN mkdir -p /home/nonroot && \
|
|
HOME=/home/nonroot prisma generate --schema=./schema.prisma && \
|
|
chown -R nonroot:nonroot /home/nonroot/.cache
|
|
|
|
# ---------- Runtime ----------
|
|
FROM $LITELLM_RUNTIME_IMAGE AS runtime
|
|
|
|
USER root
|
|
|
|
RUN apk add --no-cache bash openssl tzdata python3 libsndfile libatomic
|
|
|
|
# wolfi-base ships an unprivileged `nonroot` account (UID/GID 65532) with
|
|
# /home/nonroot. We run the backend as that user
|
|
WORKDIR /app
|
|
ENV HOME=/home/nonroot \
|
|
PATH="/app/.venv/bin:${PATH}" \
|
|
PYTHONPATH="/app" \
|
|
PYTHONDONTWRITEBYTECODE=1 \
|
|
PYTHONUNBUFFERED=1
|
|
|
|
COPY --from=builder --chown=nonroot:nonroot /app /app
|
|
COPY --from=builder --chown=nonroot:nonroot /home/nonroot/.cache /home/nonroot/.cache
|
|
|
|
RUN find /app/.venv -type f -path "*/tornado/test/*" -delete && \
|
|
find /app/.venv -type d -path "*/tornado/test" -delete
|
|
|
|
USER nonroot
|
|
|
|
EXPOSE 4001/tcp
|
|
|
|
ENTRYPOINT ["uvicorn", "backend.main:app"]
|
|
CMD ["--host", "0.0.0.0", "--port", "4001"]
|