litellm/deploy/charts/litellm-helm/templates/deployment.yaml
Yassin Kortam b5d3a5fc85
feat: add read-replica routing for Prisma DB via DATABASE_URL_READ_REPLICA (#27493)
- Introduce RoutingPrismaWrapper that transparently routes read operations (find_*, count, group_by, query_raw, query_first) to a reader endpoint while writes remain on the writer, enabling Aurora-style reader/writer endpoint splits
- Add IAMEndpoint dataclass and parse_iam_endpoint_from_url() to capture static connection fields from a reader URL so only the IAM token needs to rotate, avoiding the need for separate DATABASE_HOST_READ_REPLICA/etc. env vars
- Enhance PrismaWrapper with per-instance knobs (db_url_env_var, iam_endpoint, recreate_uses_datasource, log_prefix) so writer and reader wrappers are independent: the reader writes its fresh URL to DATABASE_URL_READ_REPLICA and passes datasource override to Prisma since Prisma only auto-reads DATABASE_URL
- Fix deadlock in PrismaWrapper.__getattr__: when called from inside a running event loop, schedule the token refresh as a background task instead of blocking with run_coroutine_threadsafe + future.result(), which would deadlock the loop thread waiting for a coroutine that needs the loop to run
- Fix botocore crash when DATABASE_PORT is unset by defaulting to "5432" in both proxy_cli.py and PrismaWrapper.get_rds_iam_token(); passing None caused botocore to embed the literal string "None" in the presigned URL
- Implement graceful reader degradation: reader connect/recreate failures are non-fatal; wrapper sets _reader_unavailable=True and silently routes reads to the writer to keep the proxy serving traffic during transient reader outages
- Add PrismaClient.writer_db property so the reconnect smoke-test always validates the writer engine specifically; query_raw on the routing wrapper would route to the reader and not verify the newly-recreated writer
- Expose DATABASE_URL_READ_REPLICA in Helm chart (values.yaml + deployment.yaml) via both plain value and secret key reference, and document the field in docker-compose.yml
- Add 887-line test suite covering routing logic, IAM token refresh paths, reader degradation scenarios, datasource override behavior, and the deadlock regression

Co-authored-by: Yassin Kortam <yassinkortam@g.ucla.edu>
2026-05-08 21:05:50 -07:00

275 lines
11 KiB
YAML

apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
{{- toYaml .Values.deploymentAnnotations | nindent 4 }}
name: {{ include "litellm.fullname" . }}
labels:
{{- include "litellm.labels" . | nindent 4 }}
{{- if .Values.deploymentLabels }}
{{- toYaml .Values.deploymentLabels | nindent 4 }}
{{- end }}
spec:
{{- if and (not .Values.keda.enabled) (not .Values.autoscaling.enabled) }}
replicas: {{ .Values.replicaCount }}
{{- end }}
{{- with .Values.strategy }}
strategy:
{{- toYaml . | nindent 4 }}
{{- end }}
selector:
matchLabels:
{{- include "litellm.selectorLabels" . | nindent 6 }}
{{- if .Values.deploymentMinReadySeconds }}
minReadySeconds: {{ .Values.deploymentMinReadySeconds }}
{{- end }}
template:
metadata:
annotations:
{{- if .Values.proxyConfigMap.create }}
checksum/config: {{ include (print $.Template.BasePath "/configmap-litellm.yaml") . | sha256sum }}
{{- end }}
{{- with .Values.podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
labels:
{{- include "litellm.labels" . | nindent 8 }}
{{- with .Values.podLabels }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "litellm.serviceAccountName" . }}
securityContext:
{{- toYaml .Values.podSecurityContext | nindent 8 }}
{{- with .Values.extraInitContainers }}
initContainers:
{{- tpl (toYaml .) $ | nindent 8 }}
{{- end }}
containers:
- name: {{ include "litellm.name" . }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "main-%s" .Chart.AppVersion) }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
env:
- name: HOST
value: "{{ .Values.listen | default "0.0.0.0" }}"
- name: PORT
value: {{ .Values.service.port | quote}}
{{- if .Values.db.deployStandalone }}
- name: DATABASE_USERNAME
valueFrom:
secretKeyRef:
name: {{ include "litellm.fullname" . }}-dbcredentials
key: username
- name: DATABASE_PASSWORD
valueFrom:
secretKeyRef:
name: {{ include "litellm.fullname" . }}-dbcredentials
key: password
- name: DATABASE_HOST
value: {{ .Release.Name }}-postgresql
- name: DATABASE_NAME
value: litellm
{{- else if .Values.db.useExisting }}
- name: DATABASE_USERNAME
valueFrom:
secretKeyRef:
name: {{ .Values.db.secret.name }}
key: {{ .Values.db.secret.usernameKey }}
- name: DATABASE_PASSWORD
valueFrom:
secretKeyRef:
name: {{ .Values.db.secret.name }}
key: {{ .Values.db.secret.passwordKey }}
- name: DATABASE_HOST
{{- if .Values.db.secret.endpointKey }}
valueFrom:
secretKeyRef:
name: {{ .Values.db.secret.name }}
key: {{ .Values.db.secret.endpointKey }}
{{- else }}
value: {{ .Values.db.endpoint }}
{{- end }}
- name: DATABASE_NAME
value: {{ .Values.db.database }}
- name: DATABASE_URL
value: {{ .Values.db.url | quote }}
{{- end }}
{{- if and .Values.db.useExisting .Values.db.secret.readReplicaUrlKey }}
- name: DATABASE_URL_READ_REPLICA
valueFrom:
secretKeyRef:
name: {{ .Values.db.secret.name }}
key: {{ .Values.db.secret.readReplicaUrlKey }}
{{- else if .Values.db.readReplicaUrl }}
- name: DATABASE_URL_READ_REPLICA
value: {{ .Values.db.readReplicaUrl | quote }}
{{- end }}
- name: PROXY_MASTER_KEY
valueFrom:
secretKeyRef:
name: {{ .Values.masterkeySecretName | default (printf "%s-masterkey" (include "litellm.fullname" .)) }}
key: {{ .Values.masterkeySecretKey | default "masterkey" }}
{{- if .Values.redis.enabled }}
- name: REDIS_HOST
value: {{ include "litellm.redis.serviceName" . }}
- name: REDIS_PORT
value: {{ include "litellm.redis.port" . | quote }}
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: {{ include "redis.secretName" .Subcharts.redis }}
key: {{include "redis.secretPasswordKey" .Subcharts.redis }}
{{- end }}
{{- /*
Inject LITELLM_LOG only when envVars does not already define it.
*/}}
{{- if and .Values.logLevel (not (hasKey (default dict .Values.envVars) "LITELLM_LOG")) }}
- name: LITELLM_LOG
value: {{ .Values.logLevel | quote }}
{{- end }}
{{- if .Values.envVars }}
{{- range $key, $val := .Values.envVars }}
- name: {{ $key }}
value: {{ $val | quote }}
{{- end }}
{{- end }}
{{- with .Values.extraEnvVars }}
{{- toYaml . | nindent 12 }}
{{- end }}
{{- if .Values.migrationJob.enabled }}
# Schema updates are owned by the dedicated migrations Job; skip
# the proxy's startup `prisma db push` so N replicas don't race
# one DB on every rollout. Placed last (after envVars and
# extraEnvVars) so this override can't be silently shadowed by a
# user-supplied DISABLE_SCHEMA_UPDATE under last-wins duplicate-env
# semantics — same pattern the migrations Job uses.
- name: DISABLE_SCHEMA_UPDATE
value: "true"
{{- end }}
envFrom:
{{- range .Values.environmentSecrets }}
- secretRef:
name: {{ . }}
{{- end }}
{{- range .Values.environmentConfigMaps }}
- configMapRef:
name: {{ . }}
{{- end }}
{{- if .Values.command }}
command: {{ toYaml .Values.command | nindent 12 }}
{{- end }}
{{- if .Values.args }}
args: {{ toYaml .Values.args | nindent 12 }}
{{- else }}
args:
- --config
- /etc/litellm/config.yaml
{{ if .Values.numWorkers }}
- --num_workers
- {{ .Values.numWorkers | quote }}
{{- end }}
{{- end }}
ports:
- name: http
containerPort: {{ .Values.service.port }}
protocol: TCP
livenessProbe:
httpGet:
path: {{ .Values.livenessProbe.path | quote }}
port: "http"
initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.livenessProbe.periodSeconds }}
timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }}
successThreshold: {{ .Values.livenessProbe.successThreshold }}
failureThreshold: {{ .Values.livenessProbe.failureThreshold }}
readinessProbe:
httpGet:
path: {{ .Values.readinessProbe.path | quote }}
port: "http"
initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }}
successThreshold: {{ .Values.readinessProbe.successThreshold }}
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
startupProbe:
httpGet:
path: {{ .Values.startupProbe.path | quote }}
port: "http"
initialDelaySeconds: {{ .Values.startupProbe.initialDelaySeconds }}
periodSeconds: {{ .Values.startupProbe.periodSeconds }}
timeoutSeconds: {{ .Values.startupProbe.timeoutSeconds }}
successThreshold: {{ .Values.startupProbe.successThreshold }}
failureThreshold: {{ .Values.startupProbe.failureThreshold }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
volumeMounts:
- name: litellm-config
mountPath: /etc/litellm/config.yaml
subPath: config.yaml
{{ if .Values.securityContext.readOnlyRootFilesystem }}
- name: tmp
mountPath: /tmp
- name: cache
mountPath: /.cache
- name: npm
mountPath: /.npm
{{- end }}
{{- with .Values.volumeMounts }}
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.lifecycle }}
lifecycle:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.extraContainers }}
{{- tpl (toYaml .) $ | nindent 8 }}
{{- end }}
volumes:
{{ if .Values.securityContext.readOnlyRootFilesystem }}
- name: tmp
emptyDir:
sizeLimit: 500Mi
- name: cache
emptyDir:
sizeLimit: 500Mi
- name: npm
emptyDir:
sizeLimit: 500Mi
{{- end }}
- name: litellm-config
configMap:
{{- if .Values.proxyConfigMap.create }}
name: {{ include "litellm.fullname" . }}-config
{{- else }}
name: {{ .Values.proxyConfigMap.name }}
{{- end }}
items:
- key: {{ .Values.proxyConfigMap.key | default "config.yaml" }}
path: "config.yaml"
{{- with .Values.volumes }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds | default 90 }}
{{- if .Values.topologySpreadConstraints }}
topologySpreadConstraints:
{{- toYaml .Values.topologySpreadConstraints | nindent 8 }}
{{- end }}