feat: add componentized proxy deployment with gateway, backend, ui, and migrations (#27557)

Split the monolithic LiteLLM proxy into independently scalable Kubernetes components to allow separate horizontal scaling of the LLM data plane and management API surfaces - Add DatabaseURLSettings pydantic-settings model that assembles DATABASE_URL (and optional DATABASE_URL_READ_REPLICA) from discrete DATABASE_* env vars before Prisma initializes, supporting both IAM token auth (minting short-lived RDS tokens) and password auth; replaces the CLI-only path that componentized entrypoints bypass - Add gateway component (port 4000) that trims the proxy route table to the LLM data-plane surface (chat, embeddings, completions, audio, realtime, provider passthroughs, health/metrics) via an allowlist applied inside the lifespan context so plugin-registered routes are captured - Add backend component (port 4001) that exposes the management/admin surface (keys, users, teams, orgs, spend analytics, model management, SSO, audit logs) with a complementary allowlist - Add ui component — Next.js static export served by nginx (port 3000) with RSC payload routing, asset prefix aliasing, and SPA fallback for dashboard routes - Add migrations component with dedicated Dockerfile that runs prisma migrate deploy via a Helm pre-install/pre-upgrade Job, eliminating per-pod schema contention on the Prisma advisory lock - Add Helm chart (helm/litellm) with separate Deployments, Services, HPAs, and ConfigMap for each component; shared _helpers.tpl emits DATABASE_*, IAM_TOKEN_DB_AUTH, REDIS_*, and DISABLE_SCHEMA_UPDATE env vars from chart values; ingress template routes traffic to the correct component by path prefix - Add comprehensive tests for DatabaseURLSettings covering IAM auth, password auth, read replica fallbacks, operator-pinned URL preservation, and percent-encoding; add coverage test asserting gateway + backend allowlist union equals the full proxy route set - Add pydantic-settings>=2.14.1 as a proxy extra dependency and update liccheck allowlist Co-authored-by: Yassin Kortam <yassinkortam@g.ucla.edu>
2026-05-16 09:25:17 -07:00 · 2026-05-16 09:25:17 -07:00 · 014cb8fa9d
commit 014cb8fa9d
parent ec2f3aadb8
36 changed files with 3221 additions and 586 deletions
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -0,0 +1,83 @@
+ARG LITELLM_BUILD_IMAGE=cgr.dev/chainguard/wolfi-base@sha256:31da6565f35af6401031c1d7aa91dc84ac76c5c48edd17fb90f0ed9e3173c7a9
+ARG LITELLM_RUNTIME_IMAGE=cgr.dev/chainguard/wolfi-base@sha256:31da6565f35af6401031c1d7aa91dc84ac76c5c48edd17fb90f0ed9e3173c7a9
+ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
+
+FROM $UV_IMAGE AS uvbin
+
+# ---------- Builder ----------
+FROM $LITELLM_BUILD_IMAGE AS builder
+
+WORKDIR /app
+USER root
+
+COPY --from=uvbin /uv /uvx /usr/local/bin/
+
+RUN apk add --no-cache bash gcc python3 python3-dev openssl openssl-dev libsndfile
+
+# UV_COMPILE_BYTECODE=1   precompiles .pyc at install time → faster cold start.
+# UV_LINK_MODE=copy       avoids hardlink warnings when uv installs from a
+#                         BuildKit cache mount (different filesystem).
+# UV_PYTHON_DOWNLOADS=0   force uv to use the apk-installed CPython instead of
+#                         silently pulling a managed interpreter.
+ENV UV_PROJECT_ENVIRONMENT=/app/.venv \
+    UV_LINK_MODE=copy \
+    UV_COMPILE_BYTECODE=1 \
+    UV_PYTHON_DOWNLOADS=0 \
+    PATH="/app/.venv/bin:${PATH}"
+
+# Stage 1 — install dependencies only.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=enterprise/pyproject.toml,target=enterprise/pyproject.toml \
+    --mount=type=bind,source=litellm-proxy-extras/pyproject.toml,target=litellm-proxy-extras/pyproject.toml \
+    uv sync --frozen --no-install-project --no-install-workspace --no-default-groups --no-editable \
+        --extra proxy \
+        --extra proxy-runtime \
+        --extra extra_proxy \
+        --extra semantic-router \
+        --python python3
+
+# Stage 2 — copy source and install the project + workspace members.
+COPY . .
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-default-groups --no-editable \
+        --extra proxy \
+        --extra proxy-runtime \
+        --extra extra_proxy \
+        --extra semantic-router \
+        --python python3
+
+RUN mkdir -p /home/nonroot && \
+    HOME=/home/nonroot prisma generate --schema=./schema.prisma && \
+    chown -R nonroot:nonroot /home/nonroot/.cache
+
+# ---------- Runtime ----------
+FROM $LITELLM_RUNTIME_IMAGE AS runtime
+
+USER root
+
+RUN apk add --no-cache bash openssl tzdata python3 libsndfile libatomic
+
+# wolfi-base ships an unprivileged `nonroot` account (UID/GID 65532) with
+# /home/nonroot. We run the backend as that user
+WORKDIR /app
+ENV HOME=/home/nonroot \
+    PATH="/app/.venv/bin:${PATH}" \
+    PYTHONPATH="/app" \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+
+COPY --from=builder --chown=nonroot:nonroot /app /app
+COPY --from=builder --chown=nonroot:nonroot /home/nonroot/.cache /home/nonroot/.cache
+
+RUN find /app/.venv -type f -path "*/tornado/test/*" -delete && \
+    find /app/.venv -type d -path "*/tornado/test" -delete
+
+USER nonroot
+
+EXPOSE 4001/tcp
+
+ENTRYPOINT ["uvicorn", "backend.main:app"]
+CMD ["--host", "0.0.0.0", "--port", "4001"]
--- a/backend/main.py
+++ b/backend/main.py
@ -0,0 +1,51 @@
+"""UI backend entrypoint.
+
+Reuses the existing FastAPI app from `litellm.proxy.proxy_server` and trims its
+route table to just the management/admin surface used by the dashboard. Purely
+additive — no existing module is modified.
+
+Run with:
+    uvicorn backend.main:app --host 0.0.0.0 --port 4001
+"""
+
+from contextlib import asynccontextmanager
+
+from fastapi.routing import Mount
+
+# See gateway/main.py for why we assemble DATABASE_URL(s) here before
+# importing proxy_server.
+from litellm.proxy.db.db_url_settings import DatabaseURLSettings
+
+DatabaseURLSettings.from_env().apply_to_env()
+
+from litellm.proxy.proxy_server import app
+
+from backend.routes.allowlist import BACKEND_EXACT_PATHS, BACKEND_PATH_PREFIXES
+
+
+def _is_backend_route(route) -> bool:
+    """Keep the route on the backend if its path is in the management surface."""
+    path = getattr(route, "path", None)
+    if path is None:
+        return False
+    if isinstance(route, Mount):
+        # Static UI mounts are served by the dedicated UI container, not here.
+        return False
+    if path in BACKEND_EXACT_PATHS:
+        return True
+    return any(path.startswith(prefix) for prefix in BACKEND_PATH_PREFIXES)
+
+
+# See gateway/main.py for why the trim runs inside the lifespan instead of at
+# module scope.
+_proxy_lifespan = app.router.lifespan_context
+
+
+@asynccontextmanager
+async def _backend_lifespan(app_):
+    async with _proxy_lifespan(app_):
+        app_.router.routes = [r for r in app_.router.routes if _is_backend_route(r)]
+        yield
+
+
+app.router.lifespan_context = _backend_lifespan
--- a/backend/routes/init.py
+++ b/backend/routes/init.py
--- a/backend/routes/allowlist.py
+++ b/backend/routes/allowlist.py
@ -0,0 +1,135 @@
+"""Path allowlist for the UI backend (control plane) component.
+
+The backend exposes management/admin endpoints consumed by the UI: keys, users,
+teams, orgs, customers, budgets, tags, workflows, model management, spend &
+analytics, settings (router/cache/cost-tracking/fallbacks), SSO/onboarding,
+audit logs, debug, enterprise admin, and UI bootstrap helpers (logo, favicon,
+.well-known config).
+
+Anything LLM data-plane is dropped — those run on the gateway component.
+"""
+
+BACKEND_PATH_PREFIXES: tuple[str, ...] = (
+    # Identity / access
+    "/key/",
+    "/v2/key/",
+    "/user/",
+    "/v2/user/",
+    "/team/",
+    "/v2/team/",
+    "/organization/",
+    "/customer/",
+    "/end_user/",
+    "/sso/",
+    "/login",
+    "/v2/login",
+    "/v3/login",
+    "/logout",
+    "/token",
+    "/onboarding/",
+    "/audit",
+    "/oauth/",
+    "/invitation/",
+    "/jwt/",
+    # Models & routing config
+    "/model/",
+    "/v1/model/info",
+    "/v2/model/",
+    "/model_group",
+    "/model_access_group/",
+    "/model_hub/",
+    "/v1/access_group",
+    "/access_group/",
+    "/router/",
+    "/router_settings",
+    "/adaptive_router/",
+    "/fallback",
+    "/fallbacks",
+    "/cache_settings",
+    "/cost_tracking",
+    "/cost/",
+    "/credentials",
+    "/credential",
+    "/provider/budgets",
+    # Tools / agents (registry & policy admin)
+    "/v1/tool/",
+    "/v1/agents",
+    # Guardrails admin
+    "/v2/guardrails/",
+    # MCP server admin + BYOK OAuth flow (UI-initiated) + dynamic per-server endpoints
+    "/v1/mcp/",
+    "/test/",
+    "/{mcp_server_name}/",
+    # Budgets / tags / workflows / memory mgmt
+    "/budget/",
+    "/tag/",
+    "/workflow/",
+    "/v1/workflows/",
+    "/project/",
+    "/memory/",
+    "/mcp/",
+    # Spend / analytics
+    "/spend/",
+    "/analytics/",
+    "/global/",
+    "/user_agent",
+    "/usage/",
+    "/daily/",
+    # CloudZero cost-export admin (init / settings / export / dry-run / delete)
+    "/cloudzero/",
+    # Caching admin
+    "/cache/",
+    "/caching/",
+    # Callbacks / hooks
+    "/active/callbacks",
+    "/callbacks",
+    "/team_callback",
+    # Alerting / email / IP allowlist
+    "/alerting/",
+    "/email/",
+    "/add/allowed_ip",
+    "/delete/allowed_ip",
+    "/get/",
+    # Enterprise admin
+    "/enterprise/",
+    # Debug / config / profiling
+    "/debug/",
+    "/config/",
+    "/memory-usage-in-mem-cache",
+    "/otel-spans",
+    "/lazy/",
+    "/in_product_nudges",
+    # Admin reload / schedule
+    "/reload/",
+    "/schedule/",
+    "/settings",
+    "/update/",
+    "/upload/",
+    # Dev / admin utilities
+    "/utils/",
+    # UI bootstrap helpers (assets the dashboard fetches)
+    "/get_logo_url",
+    "/get_image",
+    "/get_favicon",
+    "/.well-known/",
+    "/litellm/.well-known/",
+    "/ui_discovery/",
+    "/ui-config",
+    "/sso_settings",
+    "/public/",
+    "/robots.txt",
+    # Health (k8s probes)
+    "/health",
+)
+
+BACKEND_EXACT_PATHS: frozenset[str] = frozenset(
+    {
+        "/",
+        "/routes",
+        "/openapi.json",
+        "/docs",
+        "/docs/oauth2-redirect",
+        "/redoc",
+        "/fallback/login",
+    }
+)
--- a/gateway/Dockerfile
+++ b/gateway/Dockerfile
@ -0,0 +1,83 @@
+ARG LITELLM_BUILD_IMAGE=cgr.dev/chainguard/wolfi-base@sha256:31da6565f35af6401031c1d7aa91dc84ac76c5c48edd17fb90f0ed9e3173c7a9
+ARG LITELLM_RUNTIME_IMAGE=cgr.dev/chainguard/wolfi-base@sha256:31da6565f35af6401031c1d7aa91dc84ac76c5c48edd17fb90f0ed9e3173c7a9
+ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
+
+FROM $UV_IMAGE AS uvbin
+
+# ---------- Builder ----------
+FROM $LITELLM_BUILD_IMAGE AS builder
+
+WORKDIR /app
+USER root
+
+COPY --from=uvbin /uv /uvx /usr/local/bin/
+
+RUN apk add --no-cache bash gcc python3 python3-dev openssl openssl-dev libsndfile
+
+# UV_COMPILE_BYTECODE=1   precompiles .pyc at install time → faster cold start.
+# UV_LINK_MODE=copy       avoids hardlink warnings when uv installs from a
+#                         BuildKit cache mount (different filesystem).
+# UV_PYTHON_DOWNLOADS=0   force uv to use the apk-installed CPython instead of
+#                         silently pulling a managed interpreter.
+ENV UV_PROJECT_ENVIRONMENT=/app/.venv \
+    UV_LINK_MODE=copy \
+    UV_COMPILE_BYTECODE=1 \
+    UV_PYTHON_DOWNLOADS=0 \
+    PATH="/app/.venv/bin:${PATH}"
+
+# Stage 1 — install dependencies only.
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=enterprise/pyproject.toml,target=enterprise/pyproject.toml \
+    --mount=type=bind,source=litellm-proxy-extras/pyproject.toml,target=litellm-proxy-extras/pyproject.toml \
+    uv sync --frozen --no-install-project --no-install-workspace --no-default-groups --no-editable \
+        --extra proxy \
+        --extra proxy-runtime \
+        --extra extra_proxy \
+        --extra semantic-router \
+        --python python3
+
+# Stage 2 — copy source and install the project + workspace members.
+COPY . .
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-default-groups --no-editable \
+        --extra proxy \
+        --extra proxy-runtime \
+        --extra extra_proxy \
+        --extra semantic-router \
+        --python python3
+
+RUN mkdir -p /home/nonroot && \
+    HOME=/home/nonroot prisma generate --schema=./schema.prisma && \
+    chown -R nonroot:nonroot /home/nonroot/.cache
+
+# ---------- Runtime ----------
+FROM $LITELLM_RUNTIME_IMAGE AS runtime
+
+USER root
+
+RUN apk add --no-cache bash openssl tzdata python3 libsndfile libatomic
+
+# wolfi-base ships an unprivileged `nonroot` account (UID/GID 65532) with
+# /home/nonroot. We run the proxy as that user.
+WORKDIR /app
+ENV HOME=/home/nonroot \
+    PATH="/app/.venv/bin:${PATH}" \
+    PYTHONPATH="/app" \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+
+COPY --from=builder --chown=nonroot:nonroot /app /app
+COPY --from=builder --chown=nonroot:nonroot /home/nonroot/.cache /home/nonroot/.cache
+
+RUN find /app/.venv -type f -path "*/tornado/test/*" -delete && \
+    find /app/.venv -type d -path "*/tornado/test" -delete
+
+USER nonroot
+
+EXPOSE 4000/tcp
+
+ENTRYPOINT ["sh", "-c", "exec uvicorn gateway.main:app --workers \"${NUM_WORKERS:-1}\" \"$@\"", "--"]
+CMD ["--host", "0.0.0.0", "--port", "4000"]
--- a/gateway/main.py
+++ b/gateway/main.py
@ -0,0 +1,59 @@
+"""Gateway entrypoint.
+
+Reuses the existing FastAPI app from `litellm.proxy.proxy_server` and trims its
+route table to just the LLM data-plane surface. The trim is purely additive —
+no existing module is modified, the full app continues to work via the legacy
+entrypoint (`litellm.proxy.proxy_server:app`).
+
+Run with:
+    uvicorn gateway.main:app --host 0.0.0.0 --port 4000
+"""
+
+from contextlib import asynccontextmanager
+
+from fastapi.routing import Mount
+
+# Assemble DATABASE_URL (+ DATABASE_URL_READ_REPLICA) from the discrete
+# DATABASE_* env vars before proxy_server imports spin up Prisma. Handles
+# both IAM (mint a token) and password auth, writer and reader. The standard
+# CLI flow does this in proxy_cli.py; we bypass proxy_cli by uvicorn'ing the
+# app directly, so without this Prisma initializes with the placeholder URL
+# and every DB-needing endpoint returns "Database not connected".
+from litellm.proxy.db.db_url_settings import DatabaseURLSettings
+
+DatabaseURLSettings.from_env().apply_to_env()
+
+from litellm.proxy.proxy_server import app
+
+from gateway.routes.allowlist import GATEWAY_EXACT_PATHS, GATEWAY_PATH_PREFIXES
+
+
+def _is_gateway_route(route) -> bool:
+    """Keep the route on the gateway if its path is in the LLM data-plane surface."""
+    path = getattr(route, "path", None)
+    if path is None:
+        return False
+    if isinstance(route, Mount):
+        # Gateway never serves the static UI or its asset bundles.
+        return False
+    if path in GATEWAY_EXACT_PATHS:
+        return True
+    return any(path.startswith(prefix) for prefix in GATEWAY_PATH_PREFIXES)
+
+
+# Wrap proxy_server's existing lifespan so the route trim runs *after* its
+# startup hooks (and any plugin code those hooks load) have had a chance to
+# register routes. A module-load filter would miss routes added during
+# startup; running inside the lifespan, after the inner __aenter__, catches
+# them while still completing before uvicorn opens the listener.
+_proxy_lifespan = app.router.lifespan_context
+
+
+@asynccontextmanager
+async def _gateway_lifespan(app_):
+    async with _proxy_lifespan(app_):
+        app_.router.routes = [r for r in app_.router.routes if _is_gateway_route(r)]
+        yield
+
+
+app.router.lifespan_context = _gateway_lifespan
--- a/gateway/routes/init.py
+++ b/gateway/routes/init.py
--- a/gateway/routes/allowlist.py
+++ b/gateway/routes/allowlist.py
@ -0,0 +1,121 @@
+"""Path allowlist for the gateway component.
+
+The gateway exposes the LLM data-plane surface: chat/completions, embeddings,
+audio, batches, files, fine-tuning, rerank, ocr, rag, video, search, image,
+responses, vector stores, passthrough providers, realtime websockets, MCP
+tool-call endpoints, and operational endpoints (/health, /metrics).
+
+Any path not listed here is dropped from the gateway process so management/UI
+endpoints don't ride on the same pods.
+
+Versioned data-plane paths are enumerated explicitly rather than allowing a
+blanket `/v1/` or `/v2/` prefix — those broad prefixes would otherwise also
+match management routes like `/v1/access_group`, `/v1/tool/{tool_name}/logs`,
+`/v2/key/info`, etc.
+"""
+
+GATEWAY_PATH_PREFIXES: tuple[str, ...] = (
+    # OpenAI-compatible data-plane surface (versioned + unversioned)
+    "/v1/chat/",
+    "/chat/",
+    "/v1/completions",
+    "/completions",
+    "/v1/embeddings",
+    "/embeddings",
+    "/v1/moderations",
+    "/moderations",
+    "/v1/audio/",
+    "/audio/",
+    "/v1/images/",
+    "/images/",
+    "/v1/files",
+    "/files",
+    "/v1/batches",
+    "/batches",
+    "/v1/fine_tuning/",
+    "/fine_tuning/",
+    "/v1/fine-tuning/",
+    "/fine-tuning/",
+    "/v1/responses",
+    "/responses",
+    "/v1/threads",
+    "/threads",
+    "/v1/assistants",
+    "/assistants",
+    "/v1/vector_stores",
+    "/vector_stores",
+    "/v1/indexes",
+    "/v1/models",
+    "/models",
+    "/openai/",
+    "/engines/",
+    # Anthropic / agentic data-plane surface
+    "/v1/messages",
+    "/messages",
+    "/v1/skills",
+    "/v1/a2a/",
+    # LiteLLM-native LLM surface
+    "/v1/rerank",
+    "/v2/rerank",
+    "/rerank",
+    "/v1/ocr",
+    "/ocr",
+    "/v1/rag/",
+    "/rag/",
+    "/v1/video",
+    "/v1/videos",
+    "/video/",
+    "/videos",
+    "/v1/search",
+    "/search",
+    "/v1/containers",
+    "/containers",
+    "/v1/evals",
+    "/v1/memory",
+    "/queue/chat/",
+    # Google data plane (v1beta is the Google AI Studio version)
+    "/v1beta/",
+    "/interactions",
+    # Provider passthrough
+    "/anthropic/",
+    "/azure/",
+    "/azure_ai/",
+    "/aws/",
+    "/bedrock/",
+    "/cohere/",
+    "/gemini/",
+    "/google/",
+    "/vertex_ai/",
+    "/vertex-ai/",
+    "/assemblyai/",
+    "/eu.assemblyai/",
+    "/langfuse/",
+    "/vllm/",
+    "/mistral/",
+    "/groq/",
+    "/voyage/",
+    "/cursor/",
+    "/milvus/",
+    "/openai_passthrough/",
+    # Dynamic provider / toolset passthrough (path templates)
+    "/{provider}/",
+    "/toolset/",
+    # Realtime / streaming
+    "/v1/realtime",
+    "/realtime",
+    # Health & ops
+    "/health",
+    "/metrics",
+)
+
+GATEWAY_EXACT_PATHS: frozenset[str] = frozenset(
+    {
+        "/",
+        "/routes",
+        "/openapi.json",
+        "/docs",
+        "/docs/oauth2-redirect",
+        "/redoc",
+        "/test",
+    }
+)
--- a/helm/litellm/Chart.yaml
+++ b/helm/litellm/Chart.yaml
@ -0,0 +1,8 @@
+apiVersion: v2
+name: litellm
+description: LiteLLM componentized — gateway, UI backend, and UI as separate services
+type: application
+version: 0.1.0
+appVersion: "0.1.0"
+annotations:
+  org.opencontainers.image.source: "https://github.com/BerriAI/litellm"
--- a/helm/litellm/templates/NOTES.txt
+++ b/helm/litellm/templates/NOTES.txt
@ -0,0 +1,49 @@
+LiteLLM componentized — release {{ .Release.Name }} in namespace {{ .Release.Namespace }}.
+
+Components:
+{{- if .Values.gateway.enabled }}
+  - gateway  : Service {{ include "litellm.gateway.fullname" . }} on port {{ .Values.gateway.service.port }}
+{{- end }}
+{{- if .Values.backend.enabled }}
+  - backend  : Service {{ include "litellm.backend.fullname" . }} on port {{ .Values.backend.service.port }}
+{{- end }}
+{{- if .Values.ui.enabled }}
+  - ui       : Service {{ include "litellm.ui.fullname" . }} on port {{ .Values.ui.service.port }}
+{{- end }}
+
+Port-forward examples:
+  kubectl -n {{ .Release.Namespace }} port-forward svc/{{ include "litellm.gateway.fullname" . }} {{ .Values.gateway.service.port }}
+  kubectl -n {{ .Release.Namespace }} port-forward svc/{{ include "litellm.backend.fullname" . }} {{ .Values.backend.service.port }}
+  kubectl -n {{ .Release.Namespace }} port-forward svc/{{ include "litellm.ui.fullname" . }} {{ .Values.ui.service.port }}
+
+Reminders:
+  - Sensitive values come from Secret references only. Before installing, set:
+      - masterKey.secretName                                   (Secret with the proxy master key)
+      - database.writer.{host,port,dbname}                     (writer connection pieces)
+      - database.writer.passwordSecret.{name,usernameKey,passwordKey}
+                                                               (Secret holding the writer DB username + password)
+      - database.writer.useIAMAuth: true                       (optional — chart sets IAM_TOKEN_DB_AUTH=true and
+                                                                omits DATABASE_PASSWORD / DATABASE_URL so the proxy
+                                                                mints the URL from an IAM token at startup)
+      - database.reader.host                                   (optional — enables read-replica routing; reader
+                                                                .passwordSecret.name is required when set, unless
+                                                                .useIAMAuth is true)
+      - database.reader.useIAMAuth: true                       (optional, requires database.writer.useIAMAuth: true —
+                                                                chart emits DATABASE_*_READ_REPLICA env vars and
+                                                                omits DATABASE_PASSWORD_READ_REPLICA /
+                                                                DATABASE_URL_READ_REPLICA so the proxy mints the
+                                                                reader URL from an IAM token at startup)
+      - redis.passwordSecret.name                              (optional — set when redis.host is provided and the
+                                                                cache requires auth)
+      - redis.cluster: true                                    (optional — chart sets REDIS_CLUSTER_NODES from
+                                                                redis.host / redis.port so the proxy's Cache()
+                                                                constructs a RedisClusterCache; the cluster client
+                                                                discovers remaining nodes from CLUSTER SLOTS)
+  - Per-component extras (gateway / backend / ui):
+      - {component}.extraEnv / envConfigMaps / envSecrets      (the latter two are lists of resource names →
+                                                                envFrom configMapRef / secretRef)
+      - {component}.logLevel                                   (renders as LITELLM_LOG)
+      - gateway.config.proxy_config                            (rendered into a ConfigMap and mounted at
+                                                                /app/config/config.yaml; gateway reads it via
+                                                                CONFIG_FILE_PATH)
+  - Enable ingress.enabled=true to dispatch / → ui, gateway data-plane prefixes → gateway, and the catch-all → backend.
--- a/helm/litellm/templates/_helpers.tpl
+++ b/helm/litellm/templates/_helpers.tpl
@ -0,0 +1,245 @@
+{{/*
+Common naming + label helpers shared by gateway, backend, and ui templates.
+*/}}
+
+{{- define "litellm.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{- define "litellm.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+
+{{- define "litellm.gateway.fullname" -}}
+{{- printf "%s-gateway" (include "litellm.fullname" .) | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{- define "litellm.backend.fullname" -}}
+{{- printf "%s-backend" (include "litellm.fullname" .) | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{- define "litellm.ui.fullname" -}}
+{{- printf "%s-ui" (include "litellm.fullname" .) | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{- define "litellm.commonLabels" -}}
+app.kubernetes.io/name: {{ include "litellm.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+helm.sh/chart: {{ printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" }}
+{{- end -}}
+
+{{/*
+Per-component selector labels — used in both Service selectors and Deployment matchLabels.
+*/}}
+{{- define "litellm.gateway.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "litellm.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+app.kubernetes.io/component: gateway
+{{- end -}}
+
+{{- define "litellm.backend.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "litellm.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+app.kubernetes.io/component: backend
+{{- end -}}
+
+{{- define "litellm.ui.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "litellm.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+app.kubernetes.io/component: ui
+{{- end -}}
+
+{{/*
+Shared ServiceAccount name used by all three component Deployments. When
+`serviceAccount.create` is true and `serviceAccount.name` is empty, default
+to the chart fullname. When `create` is false, fall back to the provided
+name or the namespace's `default` SA.
+*/}}
+{{- define "litellm.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create -}}
+{{ default (include "litellm.fullname" .) .Values.serviceAccount.name }}
+{{- else -}}
+{{ default "default" .Values.serviceAccount.name }}
+{{- end -}}
+{{- end -}}
+
+{{/*
+Master-key + database + redis env block — shared by gateway, backend, and the
+migrations Job.
+
+Invoke with a dict: `(dict "root" $ "component" .Values.gateway)`. `root` is
+the chart context (needed for .Values), `component` selects which component's
+`extraEnv` / `logLevel` to render.
+
+Sensitive values (master key, DB username + password, Redis password) come
+only from referenced Secrets; the chart never accepts inline values for them.
+
+The chart never assembles DATABASE_URL itself. It emits only the discrete
+DATABASE_HOST/PORT/USER/NAME/SCHEMA (+ DATABASE_PASSWORD for password auth)
+vars; the proxy's entrypoint (DatabaseURLSettings in
+litellm/proxy/db/db_url_settings.py) builds the URL from them and
+percent-encodes the credentials. Assembling the URL here via Kubernetes
+`$(VAR)` substitution would embed the raw secret value, corrupting the URL
+whenever the password contains a URL-reserved character (@, /, ?, %, +,
+...) — as AWS RDS auto-generated passwords routinely do.
+
+When `database.writer.useIAMAuth: true`, the chart injects
+IAM_TOKEN_DB_AUTH=true and omits DATABASE_PASSWORD — the entrypoint mints
+the URL from DATABASE_HOST/PORT/USER/NAME plus a short-lived IAM token
+instead of a static password.
+
+The read replica is opt-in via `database.reader.host`. The chart emits
+DATABASE_HOST_READ_REPLICA / DATABASE_PORT_READ_REPLICA /
+DATABASE_NAME_READ_REPLICA (+ DATABASE_SCHEMA_READ_REPLICA) for both auth
+modes, plus DATABASE_USER_READ_REPLICA / DATABASE_PASSWORD_READ_REPLICA for
+password auth. When `database.reader.useIAMAuth: true` it omits
+DATABASE_PASSWORD_READ_REPLICA and the entrypoint mints the reader URL the
+same way. Reader IAM only takes effect when the writer also uses IAM auth
+(the proxy gates URL minting on IAM_TOKEN_DB_AUTH, which only the writer
+sets).
+*/}}
+{{- define "litellm.serverEnv" -}}
+{{- $root := .root -}}
+{{- $component := .component -}}
+- name: LITELLM_MASTER_KEY
+  valueFrom:
+    secretKeyRef:
+      name: {{ required "masterKey.secretName is required (the chart no longer accepts an inline master key)" $root.Values.masterKey.secretName }}
+      key: {{ $root.Values.masterKey.secretKey | default "master-key" }}
+{{- if $component.logLevel }}
+- name: LITELLM_LOG
+  value: {{ $component.logLevel | quote }}
+{{- end }}
+{{- with $root.Values.database.writer }}
+- name: DATABASE_HOST
+  value: {{ required "database.writer.host is required" .host | quote }}
+- name: DATABASE_PORT
+  value: {{ .port | default 5432 | quote }}
+- name: DATABASE_USER
+  valueFrom:
+    secretKeyRef:
+      name: {{ required "database.writer.passwordSecret.name is required" .passwordSecret.name }}
+      key: {{ .passwordSecret.usernameKey | default "username" }}
+- name: DATABASE_NAME
+  value: {{ required "database.writer.dbname is required" .dbname | quote }}
+{{- if .schema }}
+- name: DATABASE_SCHEMA
+  value: {{ .schema | quote }}
+{{- end }}
+{{- if .useIAMAuth }}
+- name: IAM_TOKEN_DB_AUTH
+  value: "true"
+{{- else }}
+- name: DATABASE_PASSWORD
+  valueFrom:
+    secretKeyRef:
+      name: {{ .passwordSecret.name }}
+      key: {{ .passwordSecret.passwordKey | default "password" }}
+{{- end }}
+{{- end }}
+{{- with $root.Values.database.reader }}
+{{- if .host }}
+{{- if and .useIAMAuth (not $root.Values.database.writer.useIAMAuth) }}
+{{- fail "database.reader.useIAMAuth requires database.writer.useIAMAuth: true (the proxy gates IAM URL minting on IAM_TOKEN_DB_AUTH, which is only set by the writer)" }}
+{{- end }}
+- name: DATABASE_HOST_READ_REPLICA
+  value: {{ .host | quote }}
+- name: DATABASE_PORT_READ_REPLICA
+  value: {{ .port | default 5432 | quote }}
+- name: DATABASE_NAME_READ_REPLICA
+  value: {{ required "database.reader.dbname is required when database.reader.host is set" .dbname | quote }}
+{{- if .schema }}
+- name: DATABASE_SCHEMA_READ_REPLICA
+  value: {{ .schema | quote }}
+{{- end }}
+{{- if .useIAMAuth }}
+{{- if .passwordSecret.name }}
+- name: DATABASE_USER_READ_REPLICA
+  valueFrom:
+    secretKeyRef:
+      name: {{ .passwordSecret.name }}
+      key: {{ .passwordSecret.usernameKey | default "username" }}
+{{- end }}
+{{- else }}
+{{- if not .passwordSecret.name }}
+{{- fail "database.reader.passwordSecret.name is required when database.reader.host is set" }}
+{{- end }}
+- name: DATABASE_USER_READ_REPLICA
+  valueFrom:
+    secretKeyRef:
+      name: {{ .passwordSecret.name }}
+      key: {{ .passwordSecret.usernameKey | default "username" }}
+- name: DATABASE_PASSWORD_READ_REPLICA
+  valueFrom:
+    secretKeyRef:
+      name: {{ .passwordSecret.name }}
+      key: {{ .passwordSecret.passwordKey | default "password" }}
+{{- end }}
+{{- end }}
+{{- end }}
+{{/*
+The migrations Job (helm.sh/hook: pre-upgrade) is the single owner of
+`prisma migrate deploy`. Without this, every gateway/backend pod also runs
+Prisma schema-update on startup and contends with the Job — and with each
+other — for Prisma's Postgres advisory lock on the writer, which makes the
+Job's `migrate deploy` intermittently block until its per-attempt timeout
+and retry-exhaust. The Job's entrypoint (migrations/run.py) does not import
+proxy_server and never reads DISABLE_SCHEMA_UPDATE, so emitting it here is a
+harmless no-op for the Job and authoritative for the app pods.
+*/}}
+- name: DISABLE_SCHEMA_UPDATE
+  value: "true"
+{{- if $root.Values.redis.host }}
+- name: REDIS_HOST
+  value: {{ $root.Values.redis.host | quote }}
+- name: REDIS_PORT
+  value: {{ $root.Values.redis.port | quote }}
+{{- if $root.Values.redis.passwordSecret.name }}
+- name: REDIS_PASSWORD
+  valueFrom:
+    secretKeyRef:
+      name: {{ $root.Values.redis.passwordSecret.name }}
+      key: {{ $root.Values.redis.passwordSecret.passwordKey | default "password" }}
+{{- end }}
+{{- if $root.Values.redis.cluster }}
+{{/* The proxy's Cache() reads REDIS_CLUSTER_NODES as JSON and constructs a
+     RedisClusterCache when it's set (litellm/caching/caching.py:169-192).
+     We seed with the single configured endpoint — the cluster client
+     discovers the remaining nodes from CLUSTER SLOTS at startup. */}}
+- name: REDIS_CLUSTER_NODES
+  value: {{ printf "[{\"host\":%q,\"port\":%v}]" $root.Values.redis.host (int $root.Values.redis.port) | quote }}
+{{- end }}
+{{- end }}
+{{- with $component.extraEnv }}
+{{ toYaml . }}
+{{- end }}
+{{- end -}}
+
+{{/*
+Renders `envFrom:` block for a component's `envConfigMaps` / `envSecrets`
+lists. Each entry is a resource name; the chart wires the whole ConfigMap /
+Secret into the container's env via configMapRef / secretRef.
+
+Invoke with just the component dict, e.g. `.Values.gateway`. Emits nothing
+when both lists are empty so the container spec stays clean.
+*/}}
+{{- define "litellm.envFrom" -}}
+{{- $component := . -}}
+{{- if or $component.envConfigMaps $component.envSecrets }}
+envFrom:
+{{- range $component.envConfigMaps }}
+  - configMapRef:
+      name: {{ . }}
+{{- end }}
+{{- range $component.envSecrets }}
+  - secretRef:
+      name: {{ . }}
+{{- end }}
+{{- end }}
+{{- end -}}
--- a/helm/litellm/templates/backend/deployment.yaml
+++ b/helm/litellm/templates/backend/deployment.yaml
@ -0,0 +1,60 @@
+{{- if .Values.backend.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "litellm.backend.fullname" . }}
+  labels:
+    {{- include "litellm.commonLabels" . | nindent 4 }}
+    app.kubernetes.io/component: backend
+spec:
+  selector:
+    matchLabels:
+      {{- include "litellm.backend.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.backend.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "litellm.backend.selectorLabels" . | nindent 8 }}
+    spec:
+      serviceAccountName: {{ include "litellm.serviceAccountName" . }}
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: backend
+          image: "{{ .Values.backend.image.repository }}:{{ .Values.backend.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.backend.image.pullPolicy }}
+          ports:
+            - name: http
+              containerPort: 4001
+              protocol: TCP
+          env:
+            {{- include "litellm.serverEnv" (dict "root" $ "component" .Values.backend) | nindent 12 }}
+          {{- include "litellm.envFrom" .Values.backend | nindent 10 }}
+          {{- with .Values.backend.livenessProbe }}
+          livenessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.backend.readinessProbe }}
+          readinessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.backend.resources | nindent 12 }}
+      {{- with .Values.backend.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.backend.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.backend.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+{{- end }}
--- a/helm/litellm/templates/backend/hpa.yaml
+++ b/helm/litellm/templates/backend/hpa.yaml
@ -0,0 +1,33 @@
+{{- if and .Values.backend.enabled .Values.backend.hpa.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "litellm.backend.fullname" . }}
+  labels:
+    {{- include "litellm.commonLabels" . | nindent 4 }}
+    app.kubernetes.io/component: backend
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "litellm.backend.fullname" . }}
+  minReplicas: {{ .Values.backend.hpa.minReplicas }}
+  maxReplicas: {{ .Values.backend.hpa.maxReplicas }}
+  metrics:
+    {{- if .Values.backend.hpa.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.backend.hpa.targetCPUUtilizationPercentage }}
+    {{- end }}
+    {{- if .Values.backend.hpa.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.backend.hpa.targetMemoryUtilizationPercentage }}
+    {{- end }}
+{{- end }}
--- a/helm/litellm/templates/backend/service.yaml
+++ b/helm/litellm/templates/backend/service.yaml
@ -0,0 +1,18 @@
+{{- if .Values.backend.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "litellm.backend.fullname" . }}
+  labels:
+    {{- include "litellm.commonLabels" . | nindent 4 }}
+    app.kubernetes.io/component: backend
+spec:
+  type: {{ .Values.backend.service.type }}
+  ports:
+    - port: {{ .Values.backend.service.port }}
+      targetPort: http
+      protocol: TCP
+      name: http
+  selector:
+    {{- include "litellm.backend.selectorLabels" . | nindent 4 }}
+{{- end }}
--- a/helm/litellm/templates/gateway/configmap.yaml
+++ b/helm/litellm/templates/gateway/configmap.yaml
@ -0,0 +1,9 @@
+{{- if .Values.gateway.config.create }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "litellm.gateway.fullname" . }}-config
+data:
+  config.yaml: |
+{{ .Values.gateway.config.proxy_config | toYaml | indent 6 }}
+{{- end }}
--- a/helm/litellm/templates/gateway/deployment.yaml
+++ b/helm/litellm/templates/gateway/deployment.yaml
@ -0,0 +1,83 @@
+{{- if .Values.gateway.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "litellm.gateway.fullname" . }}
+  labels:
+    {{- include "litellm.commonLabels" . | nindent 4 }}
+    app.kubernetes.io/component: gateway
+spec:
+  selector:
+    matchLabels:
+      {{- include "litellm.gateway.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      annotations:
+        {{- if .Values.gateway.config.create }}
+        checksum/config: {{ include (print $.Template.BasePath "/gateway/configmap.yaml") . | sha256sum }}
+        {{- end }}
+        {{- with .Values.gateway.podAnnotations }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+      labels:
+        {{- include "litellm.gateway.selectorLabels" . | nindent 8 }}
+    spec:
+      serviceAccountName: {{ include "litellm.serviceAccountName" . }}
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: gateway
+          image: "{{ .Values.gateway.image.repository }}:{{ .Values.gateway.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.gateway.image.pullPolicy }}
+          ports:
+            - name: http
+              containerPort: 4000
+              protocol: TCP
+          env:
+            {{- include "litellm.serverEnv" (dict "root" $ "component" .Values.gateway) | nindent 12 }}
+            {{- if .Values.gateway.config.create }}
+            - name: CONFIG_FILE_PATH
+              value: /app/config/config.yaml
+            {{- end }}
+            {{- if .Values.gateway.numWorkers }}
+            - name: NUM_WORKERS
+              value: {{ .Values.gateway.numWorkers | quote }}
+            {{- end }}
+          {{- include "litellm.envFrom" .Values.gateway | nindent 10 }}
+          {{- if .Values.gateway.config.create }}
+          volumeMounts:
+            - name: gateway-config
+              mountPath: /app/config/config.yaml
+              subPath: config.yaml
+          {{- end }}
+          {{- with .Values.gateway.livenessProbe }}
+          livenessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.gateway.readinessProbe }}
+          readinessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.gateway.resources | nindent 12 }}
+      {{- if .Values.gateway.config.create }}
+      volumes:
+        - name: gateway-config
+          configMap:
+            name: {{ include "litellm.gateway.fullname" . }}-config
+      {{- end }}
+      {{- with .Values.gateway.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.gateway.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.gateway.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+{{- end }}
--- a/helm/litellm/templates/gateway/hpa.yaml
+++ b/helm/litellm/templates/gateway/hpa.yaml
@ -0,0 +1,33 @@
+{{- if and .Values.gateway.enabled .Values.gateway.hpa.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "litellm.gateway.fullname" . }}
+  labels:
+    {{- include "litellm.commonLabels" . | nindent 4 }}
+    app.kubernetes.io/component: gateway
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "litellm.gateway.fullname" . }}
+  minReplicas: {{ .Values.gateway.hpa.minReplicas }}
+  maxReplicas: {{ .Values.gateway.hpa.maxReplicas }}
+  metrics:
+    {{- if .Values.gateway.hpa.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.gateway.hpa.targetCPUUtilizationPercentage }}
+    {{- end }}
+    {{- if .Values.gateway.hpa.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.gateway.hpa.targetMemoryUtilizationPercentage }}
+    {{- end }}
+{{- end }}
--- a/helm/litellm/templates/gateway/service.yaml
+++ b/helm/litellm/templates/gateway/service.yaml
@ -0,0 +1,18 @@
+{{- if .Values.gateway.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "litellm.gateway.fullname" . }}
+  labels:
+    {{- include "litellm.commonLabels" . | nindent 4 }}
+    app.kubernetes.io/component: gateway
+spec:
+  type: {{ .Values.gateway.service.type }}
+  ports:
+    - port: {{ .Values.gateway.service.port }}
+      targetPort: http
+      protocol: TCP
+      name: http
+  selector:
+    {{- include "litellm.gateway.selectorLabels" . | nindent 4 }}
+{{- end }}
--- a/helm/litellm/templates/ingress.yaml
+++ b/helm/litellm/templates/ingress.yaml
@ -0,0 +1,153 @@
+{{- if .Values.ingress.enabled -}}
+{{- $gatewayName := include "litellm.gateway.fullname" . -}}
+{{- $backendName := include "litellm.backend.fullname" . -}}
+{{- $uiName := include "litellm.ui.fullname" . -}}
+{{- $gatewayPort := .Values.gateway.service.port -}}
+{{- $backendPort := .Values.backend.service.port -}}
+{{- $uiPort := .Values.ui.service.port -}}
+{{/*
+  Gateway data-plane prefixes — must mirror gateway/routes/allowlist.py.
+  Versioned paths are listed explicitly to avoid routing management routes
+  (e.g. /v1/access_group, /v2/key/info, /v1/tool/*, /v1/agents, /v1/workflows,
+  /v2/user/info, /v2/team/list, /v2/model/info, /v2/login, /v2/guardrails/*,
+  /v1/mcp/*) onto the gateway via a broad /v1 or /v2 prefix.
+*/}}
+{{- $gatewayPrefixes := list
+    "/v1/chat" "/chat" "/v1/completions" "/completions" "/v1/embeddings" "/embeddings"
+    "/v1/moderations" "/moderations" "/v1/audio" "/audio" "/v1/images" "/images"
+    "/v1/files" "/files" "/v1/batches" "/batches" "/v1/fine_tuning" "/fine_tuning"
+    "/v1/fine-tuning" "/fine-tuning" "/v1/responses" "/responses" "/v1/threads" "/threads"
+    "/v1/assistants" "/assistants" "/v1/vector_stores" "/vector_stores" "/v1/indexes"
+    "/v1/models" "/models" "/openai" "/engines"
+    "/v1/messages" "/messages" "/v1/skills" "/v1/a2a"
+    "/v1/rerank" "/v2/rerank" "/rerank" "/v1/ocr" "/ocr" "/v1/rag" "/rag"
+    "/v1/video" "/v1/videos" "/video" "/videos" "/v1/search" "/search"
+    "/v1/containers" "/containers" "/v1/evals" "/v1/memory" "/queue/chat"
+    "/v1beta" "/interactions"
+    "/anthropic" "/azure" "/azure_ai" "/aws" "/bedrock" "/cohere" "/gemini" "/google"
+    "/vertex_ai" "/vertex-ai" "/assemblyai" "/eu.assemblyai" "/langfuse" "/vllm"
+    "/mistral" "/groq" "/voyage" "/cursor" "/milvus" "/openai_passthrough"
+    "/toolset"
+    "/v1/realtime" "/realtime"
+    "/health" "/metrics"
+-}}
+{{/*
+  /test is gateway-only as an EXACT path (GATEWAY_EXACT_PATHS), but its
+  children /test/connection and /test/tools/list are MCP-server management
+  endpoints kept only on the backend ("/test/" in BACKEND_PATH_PREFIXES).
+  A Prefix match here would route /test/* to the gateway, which trims those
+  routes at startup -> 404. So /test is rendered as a standalone Exact path
+  and /test/* falls through to the backend catch-all.
+*/}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ include "litellm.fullname" . }}
+  labels:
+    {{- include "litellm.commonLabels" . | nindent 4 }}
+  {{- with .Values.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- with .Values.ingress.className }}
+  ingressClassName: {{ . | quote }}
+  {{- end }}
+  {{- with .Values.ingress.tls }}
+  tls:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+  rules:
+    - {{- with .Values.ingress.host }}
+      host: {{ . | quote }}
+      {{- end }}
+      http:
+        paths:
+          # --- UI (Next.js static export) ---
+          - path: /
+            pathType: Exact
+            backend:
+              service:
+                name: {{ $uiName }}
+                port:
+                  number: {{ $uiPort }}
+          - path: /favicon.ico
+            pathType: Exact
+            backend:
+              service:
+                name: {{ $uiName }}
+                port:
+                  number: {{ $uiPort }}
+          - path: /litellm-asset-prefix
+            pathType: Prefix
+            backend:
+              service:
+                name: {{ $uiName }}
+                port:
+                  number: {{ $uiPort }}
+          - path: /_next
+            pathType: Prefix
+            backend:
+              service:
+                name: {{ $uiName }}
+                port:
+                  number: {{ $uiPort }}
+          # /ui/* is where the Next.js SPA serves its login + dashboard
+          # routes (e.g. /ui/login). Without this, /ui/* falls into the
+          # catch-all → backend → 404.
+          - path: /ui
+            pathType: Prefix
+            backend:
+              service:
+                name: {{ $uiName }}
+                port:
+                  number: {{ $uiPort }}
+          # Next.js App Router (output: "export", basePath: "") emits the
+          # RSC/flight payload for every route as a ROOT-level <route>.txt
+          # (/index.txt, /teams.txt, /__next._tree.txt, ...). The client
+          # router fetches these on every soft navigation / prefetch as
+          # <route>.txt?_rsc=<hash> (the query string is irrelevant to path
+          # matching). They are not under /ui, /_next, or
+          # /litellm-asset-prefix, so without this rule they fall to the
+          # backend catch-all → 404 → client-side navigation never settles
+          # and the login flow spins in an infinite redirect loop
+          # (/ ⇄ /ui/login). ui/nginx.conf already serves *.txt from the
+          # export; this rule only routes the request to it. Needs an
+          # ingress controller whose ImplementationSpecific path is a
+          # wildcard pattern (AWS ALB: `*` = 0+ chars); this chart targets
+          # the AWS Load Balancer Controller.
+          - path: /*.txt
+            pathType: ImplementationSpecific
+            backend:
+              service:
+                name: {{ $uiName }}
+                port:
+                  number: {{ $uiPort }}
+          # --- Gateway data plane ---
+          # Exact /test only (see the $gatewayPrefixes comment above);
+          # /test/* MCP management endpoints fall to the backend catch-all.
+          - path: /test
+            pathType: Exact
+            backend:
+              service:
+                name: {{ $gatewayName }}
+                port:
+                  number: {{ $gatewayPort }}
+          {{- range $gatewayPrefixes }}
+          - path: {{ . }}
+            pathType: Prefix
+            backend:
+              service:
+                name: {{ $gatewayName }}
+                port:
+                  number: {{ $gatewayPort }}
+          {{- end }}
+          # --- Catch-all → backend (management API: /key/*, /user/*, /team/*, ...) ---
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: {{ $backendName }}
+                port:
+                  number: {{ $backendPort }}
+{{- end }}
--- a/helm/litellm/templates/migrations-job.yaml
+++ b/helm/litellm/templates/migrations-job.yaml
@ -0,0 +1,46 @@
+{{- if .Values.migrationJob.enabled -}}
+# Pre-install / pre-upgrade hook that runs `prisma migrate deploy` against
+# the writer database before the gateway and backend Deployments are rolled
+# out. Required because the gateway and backend both spin up Prisma at
+# startup and assume the LiteLLM schema (LiteLLM_Config,
+# LiteLLM_VerificationToken, LiteLLM_SpendLogs, ...) already exists.
+#
+# Running this pre-upgrade closes the window where new application pods would
+# otherwise serve traffic against the previous release's unmigrated schema.
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: {{ include "litellm.fullname" . }}-migrations
+  labels:
+    {{- include "litellm.commonLabels" . | nindent 4 }}
+    app.kubernetes.io/component: migrations
+  annotations:
+    helm.sh/hook: pre-install,pre-upgrade
+    helm.sh/hook-delete-policy: before-hook-creation
+    helm.sh/hook-weight: "0"
+spec:
+  backoffLimit: {{ .Values.migrationJob.backoffLimit }}
+  ttlSecondsAfterFinished: {{ .Values.migrationJob.ttlSecondsAfterFinished }}
+  template:
+    metadata:
+      labels:
+        {{- include "litellm.commonLabels" . | nindent 8 }}
+        app.kubernetes.io/component: migrations
+    spec:
+      restartPolicy: Never
+      serviceAccountName: {{ include "litellm.serviceAccountName" . }}
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: prisma-migrations
+          image: "{{ .Values.migrationJob.image.repository }}:{{ .Values.migrationJob.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.migrationJob.image.pullPolicy }}
+          env:
+            {{- include "litellm.serverEnv" (dict "root" $ "component" .Values.migrationJob) | nindent 12 }}
+          {{- with .Values.migrationJob.resources }}
+          resources:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+{{- end }}
--- a/helm/litellm/templates/serviceaccount.yaml
+++ b/helm/litellm/templates/serviceaccount.yaml
@ -0,0 +1,13 @@
+{{- if .Values.serviceAccount.create -}}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "litellm.serviceAccountName" . }}
+  labels:
+    {{- include "litellm.commonLabels" . | nindent 4 }}
+  {{- with .Values.serviceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
+{{- end }}
--- a/helm/litellm/templates/ui/deployment.yaml
+++ b/helm/litellm/templates/ui/deployment.yaml
@ -0,0 +1,70 @@
+{{- if .Values.ui.enabled }}
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "litellm.ui.fullname" . }}
+  labels:
+    {{- include "litellm.commonLabels" . | nindent 4 }}
+    app.kubernetes.io/component: ui
+spec:
+  selector:
+    matchLabels:
+      {{- include "litellm.ui.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.ui.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "litellm.ui.selectorLabels" . | nindent 8 }}
+    spec:
+      serviceAccountName: {{ include "litellm.serviceAccountName" . }}
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      containers:
+        - name: ui
+          image: "{{ .Values.ui.image.repository }}:{{ .Values.ui.image.tag | default .Chart.AppVersion }}"
+          imagePullPolicy: {{ .Values.ui.image.pullPolicy }}
+          ports:
+            - name: http
+              containerPort: 3000
+              protocol: TCP
+          env:
+            {{- if .Values.ui.logLevel }}
+            - name: LITELLM_LOG
+              value: {{ .Values.ui.logLevel | quote }}
+            {{- end }}
+            {{- if .Values.ui.backendUrl }}
+            - name: LITELLM_BACKEND_URL
+              value: {{ .Values.ui.backendUrl | quote }}
+            {{- end }}
+            {{- with .Values.ui.extraEnv }}
+            {{- toYaml . | nindent 12 }}
+            {{- end }}
+          {{- include "litellm.envFrom" .Values.ui | nindent 10 }}
+          {{- with .Values.ui.livenessProbe }}
+          livenessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          {{- with .Values.ui.readinessProbe }}
+          readinessProbe:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.ui.resources | nindent 12 }}
+      {{- with .Values.ui.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.ui.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.ui.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+{{- end }}
--- a/helm/litellm/templates/ui/hpa.yaml
+++ b/helm/litellm/templates/ui/hpa.yaml
@ -0,0 +1,33 @@
+{{- if and .Values.ui.enabled .Values.ui.hpa.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "litellm.ui.fullname" . }}
+  labels:
+    {{- include "litellm.commonLabels" . | nindent 4 }}
+    app.kubernetes.io/component: ui
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "litellm.ui.fullname" . }}
+  minReplicas: {{ .Values.ui.hpa.minReplicas }}
+  maxReplicas: {{ .Values.ui.hpa.maxReplicas }}
+  metrics:
+    {{- if .Values.ui.hpa.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.ui.hpa.targetCPUUtilizationPercentage }}
+    {{- end }}
+    {{- if .Values.ui.hpa.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.ui.hpa.targetMemoryUtilizationPercentage }}
+    {{- end }}
+{{- end }}
--- a/helm/litellm/templates/ui/service.yaml
+++ b/helm/litellm/templates/ui/service.yaml
@ -0,0 +1,18 @@
+{{- if .Values.ui.enabled }}
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "litellm.ui.fullname" . }}
+  labels:
+    {{- include "litellm.commonLabels" . | nindent 4 }}
+    app.kubernetes.io/component: ui
+spec:
+  type: {{ .Values.ui.service.type }}
+  ports:
+    - port: {{ .Values.ui.service.port }}
+      targetPort: http
+      protocol: TCP
+      name: http
+  selector:
+    {{- include "litellm.ui.selectorLabels" . | nindent 4 }}
+{{- end }}
--- a/helm/litellm/values.yaml
+++ b/helm/litellm/values.yaml
@ -0,0 +1,225 @@
+# LiteLLM helm chart values
+
+nameOverride: ""
+fullnameOverride: ""
+
+imagePullSecrets: []
+
+# Optional Ingress wiring the three component Services behind a single L7
+# entrypoint. Required when serving the static UI bundle over the network.
+ingress:
+  enabled: false
+  className: ""
+  annotations: {}
+  host: ""        # optional; if set, becomes the rule's host
+  tls: []
+
+# Shared ServiceAccount used by all three component Deployments. Set
+# `create: true` to have the chart provision it (e.g. when wiring an EKS
+# Pod Identity association by SA name). Set `name` to use an existing SA
+# (chart-created or out-of-band). When both are empty / false, pods run
+# with the namespace's `default` SA.
+serviceAccount:
+  create: false
+  automount: true
+  annotations: {}
+  name: ""
+
+# Pre-install / pre-upgrade Helm hook that runs `prisma migrate deploy`
+# against the writer database, creating the LiteLLM schema (tables that
+# gateway + backend assume exist at startup: LiteLLM_Config,
+# LiteLLM_VerificationToken, LiteLLM_SpendLogs, ...). Disable if your
+# pipeline runs migrations out-of-band.
+#
+# Uses a dedicated `litellm-migrations` image (prisma CLI + the migration
+# files from `litellm-proxy-extras`) instead of the backend image, so the
+# Job doesn't drag in the rest of the proxy and doesn't run `prisma
+# generate` — the migration engine doesn't need the generated client.
+migrationJob:
+  enabled: true
+  backoffLimit: 4
+  ttlSecondsAfterFinished: 120
+  resources: {}
+  image:
+    repository: ghcr.io/berriai/litellm-migrations
+    tag: ""              # defaults to .Chart.AppVersion
+    pullPolicy: IfNotPresent
+  # Extra env appended to the migration container. The migration entrypoint
+  # uses the v2 resolver by default (no diff-and-force recovery — avoids the
+  # schema thrashing seen during rolling deploys). To opt back into the v1
+  # resolver, append `- name: USE_V2_MIGRATION_RESOLVER` / `value: "false"`.
+  extraEnv: []
+
+# Required: a master key used by gateway + backend to mint/verify proxy tokens.
+# Must reference an existing Secret.
+masterKey:
+  secretName: litellm-master-key-secret              # name of a Secret containing the master key
+  secretKey: master-key
+
+# External Postgres connection. 
+database:
+  writer:
+    host: ""
+    port: 5432
+    dbname: ""
+    schema: ""
+    useIAMAuth: false
+    passwordSecret:
+      name: litellm-writer-secret
+      usernameKey: username
+      passwordKey: password
+
+  # Optional read-replica routing. When `reader.host` is set, the proxy routes
+  # reads (find_*, count, group_by, query_raw/_first) to this endpoint while
+  # writes stay on the writer. Leave `reader.host` empty to disable.
+  reader:
+    host: ""
+    port: 5432
+    dbname: ""
+    schema: ""
+    useIAMAuth: false
+    passwordSecret:
+      name: litellm-reader-secret
+      usernameKey: username
+      passwordKey: password
+
+# Optional Redis (caching, rate limiting). Leave host empty to disable.
+#
+# Set `cluster: true` for Redis Cluster mode (e.g. AWS ElastiCache Cluster,
+# self-hosted Redis Cluster). The chart emits REDIS_CLUSTER_NODES from
+# `host` / `port` as the single seed; the cluster client discovers the
+# remaining nodes from CLUSTER SLOTS at startup.
+redis:
+  cluster: false
+  host: ""
+  port: 6379
+  passwordSecret:
+    name: "" # Leave empty for auth-less Redis
+    passwordKey: password
+
+# ---------- gateway (LLM data plane) ----------
+gateway:
+  enabled: true
+  logLevel: INFO
+  # Number of uvicorn worker processes per gateway pod. Sets NUM_WORKERS,
+  # consumed by the gateway image entrypoint. Default is 1.
+  numWorkers: 1
+  extraEnv: [] # Add extra environment variables to the gateway
+  envConfigMaps: [] # Add extra environment variables to the gateway from config maps
+  envSecrets: [] # Add extra environment variables to the gateway from secrets
+  config:
+    create: true
+    proxy_config: {}
+  image:
+    repository: ghcr.io/berriai/litellm-gateway
+    tag: ""              # defaults to .Chart.AppVersion
+    pullPolicy: IfNotPresent
+  service:
+    type: ClusterIP
+    port: 4000
+  resources:
+    requests:
+      cpu: "1"
+      memory: 4Gi
+    limits:
+      cpu: "2"
+      memory: 4Gi
+  livenessProbe:
+    httpGet: { path: /health/liveliness, port: http }
+    initialDelaySeconds: 10
+    periodSeconds: 15
+  readinessProbe:
+    httpGet: { path: /health/readiness, port: http }
+    initialDelaySeconds: 5
+    periodSeconds: 10
+  hpa:
+    enabled: true
+    minReplicas: 1
+    maxReplicas: 10
+    targetCPUUtilizationPercentage: 70
+    targetMemoryUtilizationPercentage: 80
+  podAnnotations: {}
+  nodeSelector: {}
+  tolerations: []
+  affinity: {}
+
+# ---------- backend (UI / management API) ----------
+backend:
+  enabled: true
+  logLevel: INFO
+  extraEnv: []
+  envConfigMaps: []
+  envSecrets: []
+  image:
+    repository: ghcr.io/berriai/litellm-backend
+    tag: ""
+    pullPolicy: IfNotPresent
+  service:
+    type: ClusterIP
+    port: 4001
+  resources:
+    requests:
+      cpu: "1"
+      memory: 4Gi
+    limits:
+      cpu: "2"
+      memory: 4Gi
+  livenessProbe:
+    httpGet: { path: /health/liveliness, port: http }
+    initialDelaySeconds: 10
+    periodSeconds: 15
+  readinessProbe:
+    httpGet: { path: /health/readiness, port: http }
+    initialDelaySeconds: 5
+    periodSeconds: 10
+  hpa:
+    enabled: true
+    minReplicas: 1
+    maxReplicas: 4
+    targetCPUUtilizationPercentage: 70
+  podAnnotations: {}
+  nodeSelector: {}
+  tolerations: []
+  affinity: {}
+
+# ---------- ui (Next.js static dashboard) ----------
+ui:
+  enabled: true
+  logLevel: INFO
+  extraEnv: []
+  envConfigMaps: []
+  envSecrets: []
+  image:
+    repository: ghcr.io/berriai/litellm-ui
+    tag: ""
+    pullPolicy: IfNotPresent
+  service:
+    type: ClusterIP
+    port: 3000
+  # The dashboard expects to know where to reach the backend API. Set this to
+  # the externally-routable URL (typically the ingress host + /api or similar).
+  backendUrl: ""
+  resources:
+    requests:
+      cpu: 500m
+      memory: 500Mi
+    limits:
+      cpu: "1"
+      memory: 1Gi
+  livenessProbe:
+    httpGet: { path: /, port: http }
+    initialDelaySeconds: 5
+    periodSeconds: 20
+  readinessProbe:
+    httpGet: { path: /, port: http }
+    initialDelaySeconds: 2
+    periodSeconds: 10
+  hpa:
+    enabled: false
+    minReplicas: 1
+    maxReplicas: 3
+    targetCPUUtilizationPercentage: 80
+  podAnnotations: {}
+  nodeSelector: {}
+  tolerations: []
+  affinity: {}
--- a/license_cache.json
+++ b/license_cache.json
--- a/litellm/proxy/db/db_url_settings.py
+++ b/litellm/proxy/db/db_url_settings.py
@ -0,0 +1,263 @@
+"""Assemble DATABASE_URL (+ optional DATABASE_URL_READ_REPLICA) from env.
+
+The CLI (`proxy_cli.py`) assembles ``DATABASE_URL`` from discrete
+``DATABASE_*`` env vars before Prisma initializes. The componentized
+entrypoints (gateway / backend / migrations) bypass the CLI by uvicorn'ing
+the app directly, so they call ``DatabaseURLSettings.from_env().apply_to_env()``
+to do the same thing before importing ``proxy_server``.
+
+The env var names this module reads are exactly the ones emitted by the
+``helm/litellm`` chart's ``litellm.serverEnv`` block
+(``helm/litellm/templates/_helpers.tpl``). Both auth styles and both
+endpoints are covered:
+
+  * IAM auth (``IAM_TOKEN_DB_AUTH`` truthy): mint a short-lived RDS IAM
+    token and embed it as the password. The writer URL is always
+    (re)written because the token is freshly minted on every startup. The
+    chart omits ``DATABASE_PASSWORD`` in this mode.
+  * Password auth: build a percent-encoded URL from ``DATABASE_PASSWORD``.
+    The chart emits the discrete ``DATABASE_*`` fields (never a
+    pre-assembled URL), so URL-reserved characters in the password survive
+    instead of corrupting the URL. A pre-existing ``DATABASE_URL`` — e.g.
+    one an operator pinned via ``extraEnv`` — is left untouched and wins.
+
+The read replica is opt-in via ``DATABASE_HOST_READ_REPLICA`` and never
+clobbers a pre-existing ``DATABASE_URL_READ_REPLICA``, so an IAM writer can
+run alongside a password-auth reader (or a precomputed reader URL). Reader
+IAM is gated on the single global ``IAM_TOKEN_DB_AUTH`` flag — the chart
+only emits the reader IAM env vars when the writer also uses IAM auth.
+Reader-side fields fall back to the writer's user / name / schema / port /
+password when their ``*_READ_REPLICA`` counterpart is unset.
+"""
+
+import os
+import urllib.parse
+from typing import Optional, cast
+
+from pydantic import AliasChoices, Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+# Imported as a module (not `from ... import generate_iam_auth_token`) so the
+# AWS-touching token mint stays patchable at its canonical location in tests.
+from litellm.proxy.auth import rds_iam_token
+
+_IAM_ENV_KEY = "IAM_TOKEN_DB_AUTH"
+_DEFAULT_PG_PORT = "5432"
+
+
+class DatabaseURLSettings(BaseSettings):
+    """Discrete ``DATABASE_*`` env vars, loaded once at process start.
+
+    Field names are internal; ``validation_alias`` pins each one to the exact
+    env var the helm chart emits. ``DATABASE_USER`` doubles as
+    ``DATABASE_USERNAME`` for parity with ``construct_database_url_from_env_vars``.
+    """
+
+    model_config = SettingsConfigDict(case_sensitive=False, extra="ignore")
+
+    iam_token_db_auth: bool = Field(default=False, validation_alias=_IAM_ENV_KEY)
+
+    # Writer
+    database_url: Optional[str] = Field(default=None, validation_alias="DATABASE_URL")
+    database_host: Optional[str] = Field(default=None, validation_alias="DATABASE_HOST")
+    database_port: str = Field(
+        default=_DEFAULT_PG_PORT, validation_alias="DATABASE_PORT"
+    )
+    database_user: Optional[str] = Field(
+        default=None,
+        validation_alias=AliasChoices("DATABASE_USER", "DATABASE_USERNAME"),
+    )
+    database_name: Optional[str] = Field(default=None, validation_alias="DATABASE_NAME")
+    database_schema: Optional[str] = Field(
+        default=None, validation_alias="DATABASE_SCHEMA"
+    )
+    database_password: Optional[str] = Field(
+        default=None, validation_alias="DATABASE_PASSWORD"
+    )
+
+    # Read replica
+    database_url_read_replica: Optional[str] = Field(
+        default=None, validation_alias="DATABASE_URL_READ_REPLICA"
+    )
+    database_host_read_replica: Optional[str] = Field(
+        default=None, validation_alias="DATABASE_HOST_READ_REPLICA"
+    )
+    database_port_read_replica: Optional[str] = Field(
+        default=None, validation_alias="DATABASE_PORT_READ_REPLICA"
+    )
+    database_user_read_replica: Optional[str] = Field(
+        default=None,
+        validation_alias=AliasChoices(
+            "DATABASE_USER_READ_REPLICA", "DATABASE_USERNAME_READ_REPLICA"
+        ),
+    )
+    database_name_read_replica: Optional[str] = Field(
+        default=None, validation_alias="DATABASE_NAME_READ_REPLICA"
+    )
+    database_schema_read_replica: Optional[str] = Field(
+        default=None, validation_alias="DATABASE_SCHEMA_READ_REPLICA"
+    )
+    database_password_read_replica: Optional[str] = Field(
+        default=None, validation_alias="DATABASE_PASSWORD_READ_REPLICA"
+    )
+
+    @classmethod
+    def from_env(cls) -> "DatabaseURLSettings":
+        """Load the settings from ``os.environ`` (read at call time)."""
+        return cls()
+
+    def build_writer_url(self) -> Optional[str]:
+        """Return the writer URL to set, or ``None`` to leave it as-is.
+
+        Raises ``RuntimeError`` (naming the offending vars) when IAM auth is
+        enabled but a required field is missing — the proxy cannot recover
+        from this and a clear startup error beats a Prisma connect failure.
+        """
+        if self.iam_token_db_auth:
+            missing = [
+                env
+                for env, val in (
+                    ("DATABASE_HOST", self.database_host),
+                    ("DATABASE_USER", self.database_user),
+                    ("DATABASE_NAME", self.database_name),
+                )
+                if not val
+            ]
+            if missing:
+                raise RuntimeError(
+                    "IAM_TOKEN_DB_AUTH is enabled but required DB env var(s) "
+                    f"are unset: {', '.join(missing)}. Set them so the writer "
+                    "DATABASE_URL can be assembled with a minted IAM token."
+                )
+            host = cast(str, self.database_host)
+            user = cast(str, self.database_user)
+            name = cast(str, self.database_name)
+            # IAM token is already URL-quoted by generate_iam_auth_token;
+            # user/name embedded raw (parity with proxy_cli.py / IAMEndpoint).
+            token = rds_iam_token.generate_iam_auth_token(
+                db_host=host, db_port=self.database_port, db_user=user
+            )
+            url = f"postgresql://{user}:{token}@{host}:{self.database_port}/{name}"
+            if self.database_schema:
+                url += f"?schema={self.database_schema}"
+            return url
+
+        # Password auth: an operator-pinned DATABASE_URL always wins.
+        if self.database_url:
+            return None
+        if self.database_host and self.database_user and self.database_name:
+            return self._password_url(
+                user=self.database_user,
+                password=self.database_password,
+                host=self.database_host,
+                port=self.database_port,
+                name=self.database_name,
+                schema=self.database_schema,
+            )
+        return None
+
+    def build_reader_url(self) -> Optional[str]:
+        """Return the read-replica URL to set, or ``None`` to leave it as-is.
+
+        Opt-in via ``DATABASE_HOST_READ_REPLICA``; never clobbers a
+        pre-existing ``DATABASE_URL_READ_REPLICA``. Reader fields fall back
+        to the writer's values.
+        """
+        if not self.database_host_read_replica:
+            return None  # reader is opt-in
+        if self.database_url_read_replica:
+            return None  # never clobber an operator-supplied reader URL
+
+        host = self.database_host_read_replica
+        port = self.database_port_read_replica or self.database_port
+        user = self.database_user_read_replica or self.database_user
+        name = self.database_name_read_replica or self.database_name
+        schema = self.database_schema_read_replica or self.database_schema
+        password = self.database_password_read_replica or self.database_password
+
+        if self.iam_token_db_auth:
+            missing = [
+                env
+                for env, val in (
+                    ("DATABASE_USER[_READ_REPLICA]", user),
+                    ("DATABASE_NAME[_READ_REPLICA]", name),
+                )
+                if not val
+            ]
+            if missing:
+                raise RuntimeError(
+                    "IAM_TOKEN_DB_AUTH is enabled and DATABASE_HOST_READ_REPLICA "
+                    "is set, but the reader could not resolve: "
+                    f"{', '.join(missing)} (no *_READ_REPLICA value and no "
+                    "writer fallback). Set the reader fields or the writer "
+                    "defaults."
+                )
+            user = cast(str, user)
+            name = cast(str, name)
+            token = rds_iam_token.generate_iam_auth_token(
+                db_host=host, db_port=port, db_user=user
+            )
+            url = f"postgresql://{user}:{token}@{host}:{port}/{name}"
+            if schema:
+                url += f"?schema={schema}"
+            return url
+
+        if user and name:
+            return self._password_url(
+                user=user,
+                password=password,
+                host=host,
+                port=port,
+                name=name,
+                schema=schema,
+            )
+        return None
+
+    @staticmethod
+    def _password_url(
+        *,
+        user: str,
+        password: Optional[str],
+        host: str,
+        port: str,
+        name: str,
+        schema: Optional[str],
+    ) -> str:
+        """Percent-encode credentials into a ``postgresql://`` URL.
+
+        Parity with ``construct_database_url_from_env_vars`` in
+        ``proxy/utils.py``; ``password`` may be empty for a passwordless URL.
+        """
+        quote = urllib.parse.quote_plus
+        user_p = quote(user)
+        name_p = quote(name)
+        if password:
+            url = f"postgresql://{user_p}:{quote(password)}@{host}:{port}/{name_p}"
+        else:
+            url = f"postgresql://{user_p}@{host}:{port}/{name_p}"
+        if schema:
+            url += f"?schema={schema}"
+        return url
+
+    def apply_to_env(self) -> bool:
+        """Write the assembled URL(s) into ``os.environ``.
+
+        Returns True iff this call set ``DATABASE_URL`` (IAM mint, or
+        password auth that assembled a fresh URL). False means there was
+        nothing to do — an operator-pinned URL, or no discrete fields.
+        """
+        wrote_writer = False
+        writer_url = self.build_writer_url()
+        if writer_url is not None:
+            os.environ["DATABASE_URL"] = writer_url
+            if self.iam_token_db_auth:
+                # Normalize the toggle so downstream readers (PrismaWrapper's
+                # IAM refresh) reliably see IAM on, regardless of spelling.
+                os.environ[_IAM_ENV_KEY] = "True"
+            wrote_writer = True
+
+        reader_url = self.build_reader_url()
+        if reader_url is not None:
+            os.environ["DATABASE_URL_READ_REPLICA"] = reader_url
+
+        return wrote_writer
--- a/migrations/Dockerfile
+++ b/migrations/Dockerfile
@ -0,0 +1,98 @@
+ARG LITELLM_BUILD_IMAGE=cgr.dev/chainguard/wolfi-base@sha256:31da6565f35af6401031c1d7aa91dc84ac76c5c48edd17fb90f0ed9e3173c7a9
+ARG LITELLM_RUNTIME_IMAGE=cgr.dev/chainguard/wolfi-base@sha256:31da6565f35af6401031c1d7aa91dc84ac76c5c48edd17fb90f0ed9e3173c7a9
+ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
+
+FROM $UV_IMAGE AS uvbin
+
+# ---------- Builder ----------
+#
+# Minimal install for `prisma migrate deploy`. We deliberately skip the heavy
+# `proxy-runtime` (otel, sentry, ddtrace, pypdf, google-genai, anthropic-vertex,
+# ...) and `semantic-router` extras that the gateway/backend pull in — the
+# migration engine doesn't need them. We DO install `--extra proxy` so the
+# DB-URL helper from `litellm.proxy.auth.rds_iam_token` is importable, which
+# is how the gateway and backend assemble `DATABASE_URL` at pod startup when
+# `IAM_TOKEN_DB_AUTH=true` (see backend/main.py:17, gateway/main.py:22). And
+# `--extra extra_proxy` provides the `prisma` CLI + the secret-manager
+# backends `litellm.secret_managers.main` lazily imports.
+#
+# `prisma generate` runs once at BUILD time to (a) install the Node-based
+# Prisma CLI into the binary cache and (b) download the migration / query
+# engine binaries. The Python client it also produces is unused by this
+# image's runtime entrypoint — that's fine, it's a few hundred KB and the
+# alternative (`prisma py fetch`) doesn't reliably trigger engine downloads
+# under nodeenv. Crucially we do NOT run `prisma generate` at RUNTIME; the
+# old migration job did, on every pod start, which is the wasteful behaviour
+# the componentization is fixing.
+FROM $LITELLM_BUILD_IMAGE AS builder
+
+WORKDIR /app
+USER root
+
+COPY --from=uvbin /uv /uvx /usr/local/bin/
+
+RUN apk add --no-cache bash gcc python3 python3-dev openssl openssl-dev libsndfile
+
+ENV UV_PROJECT_ENVIRONMENT=/app/.venv \
+    UV_LINK_MODE=copy \
+    UV_COMPILE_BYTECODE=1 \
+    UV_PYTHON_DOWNLOADS=0 \
+    PATH="/app/.venv/bin:${PATH}"
+
+# Stage 1 — install third-party deps only (cached by pyproject.toml/uv.lock).
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=enterprise/pyproject.toml,target=enterprise/pyproject.toml \
+    --mount=type=bind,source=litellm-proxy-extras/pyproject.toml,target=litellm-proxy-extras/pyproject.toml \
+    uv sync --frozen --no-install-project --no-install-workspace --no-default-groups --no-editable \
+        --extra proxy \
+        --extra extra_proxy \
+        --python python3
+
+# Stage 2 — copy source and install the project + workspace members.
+COPY . .
+
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-default-groups --no-editable \
+        --extra proxy \
+        --extra extra_proxy \
+        --python python3
+
+COPY migrations/run.py /app/run.py
+
+# Pre-warm the Prisma binary cache so the Job pod doesn't reach the
+# internet on first start. This matches what the backend Dockerfile does:
+# `prisma generate` runs nodeenv (downloads Node), installs the prisma npm
+# CLI, downloads the engine binaries for each `binaryTarget` in
+# schema.prisma, AND emits the generated Python client. We don't need the
+# client at runtime — the migration job invokes `prisma migrate deploy`
+# via subprocess — but having it cached is harmless and the alternative
+# (`prisma py fetch`) doesn't reliably trigger engine downloads.
+RUN mkdir -p /home/nonroot && \
+    HOME=/home/nonroot prisma generate --schema=./schema.prisma && \
+    chown -R nonroot:nonroot /home/nonroot/.cache
+
+# ---------- Runtime ----------
+FROM $LITELLM_RUNTIME_IMAGE AS runtime
+
+USER root
+
+RUN apk add --no-cache bash openssl tzdata python3 libsndfile libatomic
+
+# wolfi-base ships an unprivileged `nonroot` account (UID/GID 65532). The
+# Prisma engine binaries are dynamically linked against libssl/libcrypto, so
+# openssl stays in the runtime layer.
+WORKDIR /app
+ENV HOME=/home/nonroot \
+    PATH="/app/.venv/bin:${PATH}" \
+    PYTHONPATH="/app" \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+
+COPY --from=builder --chown=nonroot:nonroot /app /app
+COPY --from=builder --chown=nonroot:nonroot /home/nonroot/.cache /home/nonroot/.cache
+
+USER nonroot
+
+ENTRYPOINT ["python3", "/app/run.py"]
--- a/migrations/run.py
+++ b/migrations/run.py
@ -0,0 +1,67 @@
+"""Entrypoint for the migrations Job container.
+
+Runs `prisma migrate deploy` against the LiteLLM writer database using the
+recovery logic in `litellm_proxy_extras.ProxyExtrasDBManager.setup_database`
+(P3005 baseline + P3009/P3018 idempotent-error handling, retries, etc.).
+
+Env vars:
+  DATABASE_URL                  required unless it can be assembled at
+                                startup from the discrete DATABASE_* vars
+                                (password auth) or minted from an IAM token
+                                (`IAM_TOKEN_DB_AUTH=true`)
+  DIRECT_URL                    optional — used by `migrate diff` when the
+                                primary URL is a pooler (e.g. Neon -pooler)
+  USE_V2_MIGRATION_RESOLVER     "false" → fall back to the v1 resolver
+                                (legacy diff-and-force recovery). Defaults
+                                to "true": the v2 resolver avoids the schema
+                                thrashing seen during rolling deploys when
+                                two LiteLLM versions contend for the same DB.
+  USE_PRISMA_DB_PUSH            "true" → use `prisma db push` instead of
+                                `migrate deploy`. Default false.
+"""
+
+import os
+import sys
+
+from litellm.proxy.db.db_url_settings import DatabaseURLSettings
+from litellm_proxy_extras._logging import logger
+from litellm_proxy_extras.utils import ProxyExtrasDBManager, str_to_bool
+
+
+def main() -> int:
+    # Assemble DATABASE_URL from the discrete DATABASE_* env vars, matching
+    # the gateway/backend startup path (IAM mint or password auth). Leaves an
+    # operator-pinned DATABASE_URL untouched.
+    DatabaseURLSettings.from_env().apply_to_env()
+
+    if not os.getenv("DATABASE_URL"):
+        logger.error(
+            "DATABASE_URL is not set and could not be assembled from the "
+            "DATABASE_* env vars — cannot run migrations."
+        )
+        return 1
+
+    # v2 is the safer default for componentized deploys: it skips the
+    # diff-and-force recovery from v1 that caused schema thrashing during
+    # rolling deploys. Set USE_V2_MIGRATION_RESOLVER=false to opt back into v1.
+    use_v2 = str_to_bool(os.getenv("USE_V2_MIGRATION_RESOLVER", "true"))
+    use_db_push = str_to_bool(os.getenv("USE_PRISMA_DB_PUSH"))
+
+    logger.info(
+        "Starting prisma migration job (use_migrate=%s, use_v2_resolver=%s)",
+        not use_db_push,
+        use_v2,
+    )
+    ok = ProxyExtrasDBManager.setup_database(
+        use_migrate=not use_db_push,
+        use_v2_resolver=use_v2,
+    )
+    if not ok:
+        logger.error("Migration job failed after retries.")
+        return 1
+    logger.info("Migration job completed successfully.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/pyproject.toml
+++ b/pyproject.toml
@ -63,6 +63,7 @@ proxy = [
    "polars==1.38.1",
    "soundfile==0.12.1",
    "pyroscope-io==0.8.16; sys_platform != 'win32'",
+    "pydantic-settings>=2.14.1",
 ]
 extra_proxy = [
    "prisma==0.11.0",
--- a/tests/code_coverage_tests/liccheck.ini
+++ b/tests/code_coverage_tests/liccheck.ini
@ -90,6 +90,7 @@ jinja2: >=3.1.4 # BSD 3-Clause License
 litellm-proxy-extras: >=0.1.1 # MIT License
 litellm-enterprise: >=0.1.1 # LiteLLM Enterprise License
 a2a-sdk: >=0.3.22 # Apache 2.0 license
+pydantic-settings: >=2.14.1 # MIT License manually verified (uses PEP 639 License-Expression: MIT, not the legacy License field, so liccheck reports it as unknown)
 anyio: >=4.5.0 # Unknown license
 httpx-aiohttp: >=0.1.4 # Unknown license
 backoff: >=2.2.1 # Unknown license
--- a/tests/test_litellm/proxy/db/test_db_url_settings.py
+++ b/tests/test_litellm/proxy/db/test_db_url_settings.py
@ -0,0 +1,271 @@
+"""Tests for ``DatabaseURLSettings``.
+
+The model assembles ``DATABASE_URL`` (and optionally
+``DATABASE_URL_READ_REPLICA``) from the discrete ``DATABASE_*`` env vars
+emitted by the ``helm/litellm`` chart, before Prisma initializes. It covers
+both IAM auth (mint a short-lived token) and password auth, for both the
+writer and the read replica.
+
+The reader URL is opt-in via ``DATABASE_HOST_READ_REPLICA`` and must not
+clobber a pre-existing ``DATABASE_URL_READ_REPLICA``. A pre-existing
+``DATABASE_URL`` (password auth) is likewise left untouched.
+"""
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from litellm.proxy.db.db_url_settings import DatabaseURLSettings
+
+
+def _apply() -> bool:
+    """Run the production call path: load from env, write to env."""
+    return DatabaseURLSettings.from_env().apply_to_env()
+
+
+@pytest.fixture(autouse=True)
+def _scrub_db_env(monkeypatch):
+    """Remove every env var the model reads so tests start from a clean slate."""
+    for var in (
+        "IAM_TOKEN_DB_AUTH",
+        "DATABASE_URL",
+        "DATABASE_URL_READ_REPLICA",
+        "DATABASE_HOST",
+        "DATABASE_PORT",
+        "DATABASE_USER",
+        "DATABASE_USERNAME",
+        "DATABASE_NAME",
+        "DATABASE_SCHEMA",
+        "DATABASE_PASSWORD",
+        "DATABASE_HOST_READ_REPLICA",
+        "DATABASE_PORT_READ_REPLICA",
+        "DATABASE_USER_READ_REPLICA",
+        "DATABASE_USERNAME_READ_REPLICA",
+        "DATABASE_NAME_READ_REPLICA",
+        "DATABASE_SCHEMA_READ_REPLICA",
+        "DATABASE_PASSWORD_READ_REPLICA",
+    ):
+        monkeypatch.delenv(var, raising=False)
+
+
+def _stub_iam_token(token: str = "FAKE_TOKEN"):
+    """Patch the AWS-touching token mint so tests don't need boto3 / network."""
+    return patch(
+        "litellm.proxy.auth.rds_iam_token.generate_iam_auth_token",
+        return_value=token,
+    )
+
+
+# ---------------------------------------------------------------------------
+# IAM auth
+# ---------------------------------------------------------------------------
+
+
+def test_returns_false_when_nothing_configured(monkeypatch):
+    """No env mutation, no error — just a False return."""
+    assert _apply() is False
+    assert "DATABASE_URL" not in os.environ
+
+
+def test_assembles_writer_url_when_iam_enabled(monkeypatch):
+    monkeypatch.setenv("IAM_TOKEN_DB_AUTH", "true")
+    monkeypatch.setenv("DATABASE_HOST", "writer.example.com")
+    monkeypatch.setenv("DATABASE_USER", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+
+    with _stub_iam_token("WRITER_TOKEN"):
+        assert _apply() is True
+
+    assert (
+        os.environ["DATABASE_URL"]
+        == "postgresql://litellm:WRITER_TOKEN@writer.example.com:5432/litellm_db"
+    )
+    # Reader was never configured, so it must not have been set.
+    assert "DATABASE_URL_READ_REPLICA" not in os.environ
+
+
+def test_missing_writer_envs_raises(monkeypatch):
+    monkeypatch.setenv("IAM_TOKEN_DB_AUTH", "true")
+    # DATABASE_HOST intentionally unset.
+    monkeypatch.setenv("DATABASE_USER", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+
+    with pytest.raises(RuntimeError, match="DATABASE_HOST"):
+        _apply()
+
+
+def test_reader_url_assembled_when_host_set_and_url_unset(monkeypatch):
+    monkeypatch.setenv("IAM_TOKEN_DB_AUTH", "true")
+    monkeypatch.setenv("DATABASE_HOST", "writer.example.com")
+    monkeypatch.setenv("DATABASE_USER", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+    monkeypatch.setenv("DATABASE_HOST_READ_REPLICA", "reader.example.com")
+
+    with _stub_iam_token("READER_TOKEN"):
+        _apply()
+
+    assert (
+        os.environ["DATABASE_URL_READ_REPLICA"]
+        == "postgresql://litellm:READER_TOKEN@reader.example.com:5432/litellm_db"
+    )
+
+
+def test_reader_url_not_clobbered_when_already_set(monkeypatch):
+    """If the operator pinned DATABASE_URL_READ_REPLICA (e.g. a non-IAM
+    reader), the model must leave it untouched even though
+    DATABASE_HOST_READ_REPLICA is also set."""
+    monkeypatch.setenv("IAM_TOKEN_DB_AUTH", "true")
+    monkeypatch.setenv("DATABASE_HOST", "writer.example.com")
+    monkeypatch.setenv("DATABASE_USER", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+    monkeypatch.setenv("DATABASE_HOST_READ_REPLICA", "reader.example.com")
+    monkeypatch.setenv(
+        "DATABASE_URL_READ_REPLICA",
+        "postgresql://app:secret@reader.example.com:5432/litellm_db",
+    )
+
+    with _stub_iam_token("READER_TOKEN"):
+        _apply()
+
+    assert (
+        os.environ["DATABASE_URL_READ_REPLICA"]
+        == "postgresql://app:secret@reader.example.com:5432/litellm_db"
+    )
+
+
+def test_reader_url_skipped_when_host_unset(monkeypatch):
+    monkeypatch.setenv("IAM_TOKEN_DB_AUTH", "true")
+    monkeypatch.setenv("DATABASE_HOST", "writer.example.com")
+    monkeypatch.setenv("DATABASE_USER", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+
+    with _stub_iam_token("WRITER_TOKEN"):
+        _apply()
+
+    assert "DATABASE_URL_READ_REPLICA" not in os.environ
+
+
+def test_reader_field_fallbacks_default_to_writer_values(monkeypatch):
+    """When *_READ_REPLICA fields are unset (other than host), they fall
+    back to the writer's user / name / schema."""
+    monkeypatch.setenv("IAM_TOKEN_DB_AUTH", "true")
+    monkeypatch.setenv("DATABASE_HOST", "writer.example.com")
+    monkeypatch.setenv("DATABASE_USER", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+    monkeypatch.setenv("DATABASE_SCHEMA", "public")
+    monkeypatch.setenv("DATABASE_HOST_READ_REPLICA", "reader.example.com")
+
+    with _stub_iam_token("READER_TOKEN"):
+        _apply()
+
+    assert (
+        os.environ["DATABASE_URL_READ_REPLICA"]
+        == "postgresql://litellm:READER_TOKEN@reader.example.com:5432/litellm_db?schema=public"
+    )
+
+
+# ---------------------------------------------------------------------------
+# Password auth
+# ---------------------------------------------------------------------------
+
+
+def test_assembles_writer_url_from_password(monkeypatch):
+    monkeypatch.setenv("DATABASE_HOST", "writer.example.com")
+    monkeypatch.setenv("DATABASE_USER", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+    monkeypatch.setenv("DATABASE_PASSWORD", "s3cr3t")
+
+    assert _apply() is True
+    assert (
+        os.environ["DATABASE_URL"]
+        == "postgresql://litellm:s3cr3t@writer.example.com:5432/litellm_db"
+    )
+
+
+def test_writer_password_is_percent_encoded(monkeypatch):
+    monkeypatch.setenv("DATABASE_HOST", "writer.example.com")
+    monkeypatch.setenv("DATABASE_USER", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+    monkeypatch.setenv("DATABASE_PASSWORD", "p@ss/w:rd")
+
+    assert _apply() is True
+    assert (
+        os.environ["DATABASE_URL"]
+        == "postgresql://litellm:p%40ss%2Fw%3Ard@writer.example.com:5432/litellm_db"
+    )
+
+
+def test_writer_url_not_clobbered_when_already_set(monkeypatch):
+    """An operator-pinned DATABASE_URL (e.g. helm's $(VAR) assembly) always
+    wins over the discrete fields."""
+    monkeypatch.setenv(
+        "DATABASE_URL", "postgresql://pinned:url@db.example.com:5432/litellm_db"
+    )
+    monkeypatch.setenv("DATABASE_HOST", "writer.example.com")
+    monkeypatch.setenv("DATABASE_USER", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+    monkeypatch.setenv("DATABASE_PASSWORD", "s3cr3t")
+
+    assert _apply() is False
+    assert (
+        os.environ["DATABASE_URL"]
+        == "postgresql://pinned:url@db.example.com:5432/litellm_db"
+    )
+
+
+def test_writer_url_passwordless(monkeypatch):
+    monkeypatch.setenv("DATABASE_HOST", "writer.example.com")
+    monkeypatch.setenv("DATABASE_USER", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+
+    assert _apply() is True
+    assert (
+        os.environ["DATABASE_URL"]
+        == "postgresql://litellm@writer.example.com:5432/litellm_db"
+    )
+
+
+def test_database_username_alias(monkeypatch):
+    """DATABASE_USERNAME is accepted as an alias for DATABASE_USER (parity
+    with construct_database_url_from_env_vars)."""
+    monkeypatch.setenv("DATABASE_HOST", "writer.example.com")
+    monkeypatch.setenv("DATABASE_USERNAME", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+    monkeypatch.setenv("DATABASE_PASSWORD", "s3cr3t")
+
+    assert _apply() is True
+    assert (
+        os.environ["DATABASE_URL"]
+        == "postgresql://litellm:s3cr3t@writer.example.com:5432/litellm_db"
+    )
+
+
+def test_password_reader_falls_back_to_writer_password(monkeypatch):
+    monkeypatch.setenv("DATABASE_HOST", "writer.example.com")
+    monkeypatch.setenv("DATABASE_USER", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+    monkeypatch.setenv("DATABASE_PASSWORD", "s3cr3t")
+    monkeypatch.setenv("DATABASE_HOST_READ_REPLICA", "reader.example.com")
+
+    assert _apply() is True
+    assert (
+        os.environ["DATABASE_URL_READ_REPLICA"]
+        == "postgresql://litellm:s3cr3t@reader.example.com:5432/litellm_db"
+    )
+
+
+def test_password_reader_uses_own_credentials(monkeypatch):
+    monkeypatch.setenv("DATABASE_HOST", "writer.example.com")
+    monkeypatch.setenv("DATABASE_USER", "litellm")
+    monkeypatch.setenv("DATABASE_NAME", "litellm_db")
+    monkeypatch.setenv("DATABASE_PASSWORD", "s3cr3t")
+    monkeypatch.setenv("DATABASE_HOST_READ_REPLICA", "reader.example.com")
+    monkeypatch.setenv("DATABASE_USER_READ_REPLICA", "litellm_ro")
+    monkeypatch.setenv("DATABASE_PASSWORD_READ_REPLICA", "ro_pw")
+
+    assert _apply() is True
+    assert (
+        os.environ["DATABASE_URL_READ_REPLICA"]
+        == "postgresql://litellm_ro:ro_pw@reader.example.com:5432/litellm_db"
+    )
--- a/tests/test_litellm/proxy/test_component_allowlists.py
+++ b/tests/test_litellm/proxy/test_component_allowlists.py
@ -0,0 +1,76 @@
+"""Coverage test for the gateway / backend component allowlists.
+
+The componentization scaffold splits the proxy FastAPI app into two runtime
+components by trimming the route table inside a wrapped lifespan context:
+
+  gateway.main  -> only paths matched by gateway/routes/allowlist.py
+  backend.main  -> only paths matched by backend/routes/allowlist.py
+
+If either allowlist drops a path that was reachable on the monolithic app,
+clients hitting that path on the corresponding pod get a 404. This test
+guarantees that the union of the two trimmed route sets equals the full set
+of routes on the proxy app — i.e. no endpoint is dropped on the floor.
+
+The test reproduces the same predicate that ``gateway/main.py`` and
+``backend/main.py`` use, without importing them. The component modules wrap
+the shared ``app.router.lifespan_context``; importing them in the test process
+would chain wrappers and corrupt the snapshot.
+"""
+
+import os
+import sys
+
+# Importing ``litellm.proxy.proxy_server`` runs its module-level setup, which
+# reads ``DATABASE_URL`` (Prisma) and ``LITELLM_MASTER_KEY``. Tier-zero CI
+# runners don't set these. We pin throwaway values before the import so the
+# test never depends on a live database or master key.
+os.environ.setdefault("DATABASE_URL", "sqlite:///:memory:")
+os.environ.setdefault("LITELLM_MASTER_KEY", "sk-test-component-allowlist")
+
+from fastapi.routing import Mount
+
+# gateway/ and backend/ live at the repo root, not inside litellm/.
+_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
+if _REPO_ROOT not in sys.path:
+    sys.path.insert(0, _REPO_ROOT)
+
+from backend.routes.allowlist import BACKEND_EXACT_PATHS, BACKEND_PATH_PREFIXES
+from gateway.routes.allowlist import GATEWAY_EXACT_PATHS, GATEWAY_PATH_PREFIXES
+from litellm.proxy.proxy_server import app
+
+
+def _component_paths(routes, exact_paths, path_prefixes) -> set[str]:
+    """Reproduce ``gateway.main._is_gateway_route`` / ``backend.main._is_backend_route``."""
+    out: set[str] = set()
+    for route in routes:
+        if isinstance(route, Mount):
+            continue
+        path = getattr(route, "path", None)
+        if path is None:
+            continue
+        if path in exact_paths or any(path.startswith(p) for p in path_prefixes):
+            out.add(path)
+    return out
+
+
+def test_gateway_plus_backend_covers_full_app():
+    """Every route on the proxy app must be served by gateway or backend."""
+    all_paths = {
+        getattr(r, "path")
+        for r in app.router.routes
+        if not isinstance(r, Mount) and getattr(r, "path", None) is not None
+    }
+    gateway_paths = _component_paths(
+        app.router.routes, GATEWAY_EXACT_PATHS, GATEWAY_PATH_PREFIXES
+    )
+    backend_paths = _component_paths(
+        app.router.routes, BACKEND_EXACT_PATHS, BACKEND_PATH_PREFIXES
+    )
+
+    uncovered = all_paths - (gateway_paths | backend_paths)
+
+    assert not uncovered, (
+        f"{len(uncovered)} route(s) are not exposed on either component. "
+        f"Update gateway/routes/allowlist.py or backend/routes/allowlist.py to cover:\n  "
+        + "\n  ".join(sorted(uncovered))
+    )
--- a/ui/Dockerfile
+++ b/ui/Dockerfile
@ -0,0 +1,42 @@
+# syntax=docker/dockerfile:1.7
+
+# UI container — Next.js static export served by nginx.
+
+ARG NODE_VERSION=20.18-alpine3.20
+ARG NGINX_VERSION=1.27-alpine
+
+# ---------- builder ----------
+FROM node:${NODE_VERSION} AS builder
+
+ENV NEXT_TELEMETRY_DISABLED=1 \
+    npm_config_fund=false \
+    npm_config_audit=false
+
+WORKDIR /app
+
+# Layer the lockfile-only install above the source copy so source-only
+# edits don't bust the install cache.
+COPY ui/litellm-dashboard/package.json ui/litellm-dashboard/package-lock.json ./
+RUN --mount=type=cache,target=/root/.npm \
+    npm ci --prefer-offline
+
+COPY ui/litellm-dashboard/ ./
+RUN npm run build
+
+# ---------- runtime ----------
+FROM nginx:${NGINX_VERSION} AS runtime
+
+# Drop the upstream default :80 server; we own the config.
+RUN rm -f /etc/nginx/conf.d/default.conf
+
+# Static export → web root.
+COPY --from=builder /app/out /usr/share/nginx/html
+
+# Routing rules — see ui/nginx.conf for the full description.
+COPY ui/nginx.conf /etc/nginx/nginx.conf
+
+EXPOSE 3000/tcp
+
+# nginx as PID 1 in foreground; respects SIGTERM out of the box, so
+# no tini/dumb-init wrapper needed.
+CMD ["nginx", "-g", "daemon off;"]
--- a/ui/nginx.conf
+++ b/ui/nginx.conf
@ -0,0 +1,99 @@
+worker_processes auto;
+events { worker_connections 1024; }
+
+http {
+  include       /etc/nginx/mime.types;
+  default_type  application/octet-stream;
+  sendfile      on;
+  tcp_nopush    on;
+  keepalive_timeout 65;
+
+  gzip on;
+  gzip_comp_level 4;
+  gzip_min_length 1024;
+  gzip_proxied   any;
+  gzip_types
+    application/javascript
+    application/json
+    text/css
+    text/html
+    image/svg+xml
+    font/woff
+    font/woff2;
+
+  server {
+    listen 3000 default_server;
+    server_name _;
+    root /usr/share/nginx/html;
+
+    # next.config.mjs sets assetPrefix=/litellm-asset-prefix, which makes
+    # the built HTML reference /litellm-asset-prefix/_next/... — but the
+    # static export only emits files under /_next/. Map the prefix to
+    # the real tree at request time instead of duplicating the directory
+    # at build time. NB: alias rewrites the location prefix, so
+    # /litellm-asset-prefix/_next/foo.js → /usr/share/nginx/html/_next/foo.js.
+    location /litellm-asset-prefix/_next/ {
+      alias /usr/share/nginx/html/_next/;
+      expires 1y;
+      add_header Cache-Control "public, immutable";
+    }
+
+    # Content-hashed asset bundles — cache forever.
+    location /_next/ {
+      try_files $uri =404;
+      expires 1y;
+      add_header Cache-Control "public, immutable";
+    }
+    location /assets/ {
+      try_files $uri =404;
+      expires 1y;
+      add_header Cache-Control "public, immutable";
+    }
+    location = /favicon.ico {
+      try_files $uri =404;
+      expires 1d;
+    }
+
+    # Probe target — doesn't depend on disk.
+    location = /healthz { default_type text/plain; return 200 "ok\n"; }
+
+    # Next.js App Router (output: "export") emits an RSC/flight payload
+    # as <route>.txt next to <route>.html, plus __next.*.txt segment
+    # data. The client router fetches these on soft navigation/prefetch
+    # (?_rsc=<hash>) — the query string is irrelevant, files resolve by
+    # $uri. These MUST be served from the export: if they fall through
+    # to the catch-all 404 below, client-side navigation never settles
+    # and the login flow spins in an infinite redirect loop
+    # (/ ⇄ /ui/login). Keep this BEFORE the /ui/ regex — ^/ui/(.+)$ is
+    # also a regex and nginx takes the first matching one, so a stray
+    # /ui/<page>.txt would otherwise be rewritten to HTML and break RSC
+    # for nested routes. A genuinely missing payload must 404 (the
+    # router degrades to a hard navigation); never fall back to HTML.
+    location ~ \.txt$ {
+      try_files $uri =404;
+    }
+
+    # /ui[/<page>] — the dashboard's JS hardcodes URLs under this prefix
+    # (router.replace("/ui"), buildLoginUrlWithReturn("/ui/login"), ...).
+    # Mirror what FastAPI StaticFiles(mount="/ui") did in the monolithic
+    # proxy_server: serve /ui/<page> from out/<page>.html, with App
+    # Router-aware fallback (out/<page>/index.html) and a final SPA
+    # fallback to out/index.html for client-side routes.
+    location = /ui  { try_files /index.html =404; }
+    location = /ui/ { try_files /index.html =404; }
+    location ~ ^/ui/(.+)$ {
+      try_files /$1.html /$1/index.html /index.html =404;
+    }
+
+    # `/` is handy for direct-debug port-forwards.
+    location = / { try_files /index.html =404; }
+
+    # Anything else (API calls etc.) returns 404 from the UI's
+    # perspective. A reverse proxy in front of this image routes the
+    # API surface (/v1, /key, /.well-known/litellm-ui-config, ...) to
+    # gateway/backend before requests get here; if something slips
+    # through, fall through to a 404 instead of accidentally serving
+    # HTML and confusing a JSON-expecting caller.
+    location / { return 404; }
+  }
+}
--- a/uv.lock
+++ b/uv.lock