litellm/deploy/charts/litellm-helm/values.yaml
Yassin Kortam b5d3a5fc85
feat: add read-replica routing for Prisma DB via DATABASE_URL_READ_REPLICA (#27493)
- Introduce RoutingPrismaWrapper that transparently routes read operations (find_*, count, group_by, query_raw, query_first) to a reader endpoint while writes remain on the writer, enabling Aurora-style reader/writer endpoint splits
- Add IAMEndpoint dataclass and parse_iam_endpoint_from_url() to capture static connection fields from a reader URL so only the IAM token needs to rotate, avoiding the need for separate DATABASE_HOST_READ_REPLICA/etc. env vars
- Enhance PrismaWrapper with per-instance knobs (db_url_env_var, iam_endpoint, recreate_uses_datasource, log_prefix) so writer and reader wrappers are independent: the reader writes its fresh URL to DATABASE_URL_READ_REPLICA and passes datasource override to Prisma since Prisma only auto-reads DATABASE_URL
- Fix deadlock in PrismaWrapper.__getattr__: when called from inside a running event loop, schedule the token refresh as a background task instead of blocking with run_coroutine_threadsafe + future.result(), which would deadlock the loop thread waiting for a coroutine that needs the loop to run
- Fix botocore crash when DATABASE_PORT is unset by defaulting to "5432" in both proxy_cli.py and PrismaWrapper.get_rds_iam_token(); passing None caused botocore to embed the literal string "None" in the presigned URL
- Implement graceful reader degradation: reader connect/recreate failures are non-fatal; wrapper sets _reader_unavailable=True and silently routes reads to the writer to keep the proxy serving traffic during transient reader outages
- Add PrismaClient.writer_db property so the reconnect smoke-test always validates the writer engine specifically; query_raw on the routing wrapper would route to the reader and not verify the newly-recreated writer
- Expose DATABASE_URL_READ_REPLICA in Helm chart (values.yaml + deployment.yaml) via both plain value and secret key reference, and document the field in docker-compose.yml
- Add 887-line test suite covering routing logic, IAM token refresh paths, reader degradation scenarios, datasource override behavior, and the deadlock regression

Co-authored-by: Yassin Kortam <yassinkortam@g.ucla.edu>
2026-05-08 21:05:50 -07:00

406 lines
12 KiB
YAML

# Default values for litellm.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
replicaCount: 1
# numWorkers: 2
image:
# Use "ghcr.io/berriai/litellm-database" for optimized image with database
repository: ghcr.io/berriai/litellm-database
pullPolicy: Always
# Overrides the image tag whose default is the chart appVersion.
# tag: "main-latest"
tag: ""
imagePullSecrets: []
nameOverride: "litellm"
fullnameOverride: ""
serviceAccount:
# Specifies whether a service account should be created
create: false
# Automatically mount a ServiceAccount's API credentials?
automount: true
# Annotations to add to the service account
annotations: {}
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name: ""
# annotations for litellm deployment
deploymentAnnotations: {}
deploymentLabels: {}
deploymentMinReadySeconds: 0
# annotations for litellm pods
podAnnotations: {}
podLabels: {}
# -- Deployment strategy configuration
# Example:
# type: RollingUpdate
# rollingUpdate:
# maxUnavailable: 0
# maxSurge: 1
strategy: {}
terminationGracePeriodSeconds: 90
topologySpreadConstraints:
[]
# - maxSkew: 1
# topologyKey: kubernetes.io/hostname
# whenUnsatisfiable: DoNotSchedule
# labelSelector:
# matchLabels:
# app: litellm
# At the time of writing, the litellm docker image requires write access to the
# filesystem on startup so that prisma can install some dependencies.
podSecurityContext: {}
securityContext:
{}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: false
# runAsNonRoot: true
# runAsUser: 1000
# A list of Kubernetes Secret objects that will be exported to the LiteLLM proxy
# pod as environment variables. These secrets can then be referenced in the
# configuration file (or "litellm" ConfigMap) with `os.environ/<Env Var Name>`
environmentSecrets:
[]
# - litellm-env-secret
# A list of Kubernetes ConfigMap objects that will be exported to the LiteLLM proxy
# pod as environment variables. The ConfigMap kv-pairs can then be referenced in the
# configuration file (or "litellm" ConfigMap) with `os.environ/<Env Var Name>`
environmentConfigMaps:
[]
# - litellm-env-configmap
service:
type: ClusterIP
port: 4000
# If service type is `LoadBalancer` you can
# optionally specify loadBalancerClass
# loadBalancerClass: tailscale
# Probes for LiteLLM gateway container
livenessProbe:
path: /health/liveliness
initialDelaySeconds: 0
periodSeconds: 15
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 5
readinessProbe:
path: /health/readiness
initialDelaySeconds: 0
periodSeconds: 10
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 3
startupProbe:
path: /health/readiness
initialDelaySeconds: 0
periodSeconds: 10
timeoutSeconds: 5
successThreshold: 1
failureThreshold: 30
ingress:
enabled: false
className: "nginx"
labels: {}
annotations:
{}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: api.example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
# masterkey: changeit
# if set, use this secret for the master key; otherwise, autogenerate a new one
masterkeySecretName: ""
# if set, use this secret key for the master key; otherwise, use the default key
masterkeySecretKey: ""
proxyConfigMap:
# when true, creates a new configmap
create: true
# if create is false and name is set, use existing ConfigMap
# create: false
# name: ""
# key: "config.yaml"
# The elements within proxy_config are rendered as config.yaml for the proxy
# Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml
# Reference: https://docs.litellm.ai/docs/proxy/configs
proxy_config:
model_list:
# At least one model must exist for the proxy to start.
- model_name: gpt-3.5-turbo
litellm_params:
model: gpt-3.5-turbo
api_key: eXaMpLeOnLy
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings:
master_key: os.environ/PROXY_MASTER_KEY
resources:
{}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 100
targetCPUUtilizationPercentage: 80
# targetMemoryUtilizationPercentage: 80
# Autoscaling with keda is mutually exclusive with hpa
keda:
enabled: false
minReplicas: 1
maxReplicas: 100
pollingInterval: 30
cooldownPeriod: 300
# fallback:
# failureThreshold: 3
# replicas: 11
restoreToOriginalReplicaCount: false
scaledObject:
annotations: {}
triggers: []
# - type: prometheus
# metadata:
# serverAddress: http://<prometheus-host>:9090
# metricName: http_requests_total
# threshold: '100'
# query: sum(rate(http_requests_total{deployment="my-deployment"}[2m]))
behavior: {}
# scaleDown:
# stabilizationWindowSeconds: 300
# policies:
# - type: Pods
# value: 1
# periodSeconds: 180
# scaleUp:
# stabilizationWindowSeconds: 300
# policies:
# - type: Pods
# value: 2
# periodSeconds: 60
# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
# secret:
# secretName: mysecret
# optional: false
# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
# mountPath: "/etc/foo"
# readOnly: true
nodeSelector: {}
tolerations: []
affinity: {}
db:
# Use an existing postgres server/cluster
useExisting: false
# How to connect to the existing postgres server/cluster
endpoint: localhost
database: litellm
url: postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)
secret:
name: postgres
usernameKey: username
passwordKey: password
# Optional: when set, DATABASE_HOST will be sourced from this secret key instead of db.endpoint
endpointKey: ""
# Optional: when set, DATABASE_URL_READ_REPLICA will be sourced from this
# secret key instead of db.readReplicaUrl. Prefer this over the plain
# value: read-replica URLs typically embed credentials, and a value
# written to db.readReplicaUrl ends up visible in the rendered pod spec
# and the Helm release secret.
readReplicaUrlKey: ""
# Optional read-replica routing. When set, the proxy sends read-only
# queries (find_*, count, group_by, query_raw/_first) to this URL while
# writes continue to go to db.url. Useful for Aurora-style clusters with
# separate reader/writer endpoints. Leave empty to keep single-DB behavior.
# When IAM_TOKEN_DB_AUTH is enabled, the reader URL is auto-refreshed
# alongside the writer (host/port/user/db are parsed from this URL once
# at startup; only the IAM token rotates).
#
# If the URL embeds credentials, prefer db.secret.readReplicaUrlKey over
# this field — the plain value is rendered into the pod spec and the
# Helm release secret. This field is intended for credential-less URLs
# only (e.g. when IAM_TOKEN_DB_AUTH supplies the token at runtime).
readReplicaUrl: ""
# Use the Stackgres Helm chart to deploy an instance of a Stackgres cluster.
# The Stackgres Operator must already be installed within the target
# Kubernetes cluster.
# TODO: Stackgres deployment currently unsupported
useStackgresOperator: false
# Use the Postgres Helm chart to create a single node, stand alone postgres
# instance. See the "postgresql" top level key for additional configuration.
deployStandalone: true
# Lifecycle hooks for the LiteLLM container
# Example:
# lifecycle:
# preStop:
# exec:
# command: ["/bin/sh", "-c", "sleep 10"]
lifecycle: {}
# Settings for Bitnami postgresql chart (if db.deployStandalone is true, ignored
# otherwise)
postgresql:
architecture: standalone
auth:
username: litellm
database: litellm
# You should override these on the helm command line with
# `--set postgresql.auth.postgres-password=<some good password>,postgresql.auth.password=<some good password>`
password: NoTaGrEaTpAsSwOrD
postgres-password: NoTaGrEaTpAsSwOrD
# A secret is created by this chart (litellm-helm) with the credentials that
# the new Postgres instance should use.
# existingSecret: ""
# secretKeys:
# userPasswordKey: password
# requires cache: true in config file
# either enable this or pass a secret for REDIS_HOST, REDIS_PORT, REDIS_PASSWORD or REDIS_URL
# with cache: true to use existing redis instance
redis:
enabled: false
architecture: standalone
# Prisma migration job settings
migrationJob:
enabled: true # Enable or disable the schema migration Job
retries: 3 # Number of retries for the Job in case of failure
backoffLimit: 4 # Backoff limit for Job restarts
disableSchemaUpdate: false # Skip schema migrations for specific environments. When True, the job will exit with code 0.
# Optional service account for the migration job.
# Only used when migrationJob.hooks.helm.enabled=true and serviceAccount.create=true.
# In that case, pre-install/pre-upgrade hooks run before normal resources, so this defaults to "default".
serviceAccountName: ""
annotations: {}
ttlSecondsAfterFinished: 120
resources: {}
# requests:
# cpu: 100m
# memory: 100Mi
extraContainers: []
extraInitContainers: []
# Hook configuration
hooks:
argocd:
enabled: true
helm:
enabled: false
# Log level for the litellm proxy (sets LITELLM_LOG in the deployment env).
# Rendered as a direct `env:` entry, which in Kubernetes takes precedence over
# any `envFrom:` source. If you currently source LITELLM_LOG from an
# environmentSecret or environmentConfigMap, set `logLevel: ""` here to
# disable injection — otherwise this value silently overrides your secret /
# configmap entry.
#
# Setting LITELLM_LOG inside `envVars:` below also wins: the template skips
# this injection entirely when envVars already defines LITELLM_LOG.
logLevel: INFO
# Additional environment variables to be added to the deployment as a map of key-value pairs
envVars: {}
# USE_DDTRACE: "true"
# Additional environment variables to be added to the deployment as a list of k8s env vars
extraEnvVars: {}
# if you want to override the container command, you can do so here
command: {}
# if you want to override the container args, you can do so here
args: {}
# - name: EXTRA_ENV_VAR
# value: EXTRA_ENV_VAR_VALUE
# Additional Kubernetes resources to deploy with litellm
extraResources: []
# - apiVersion: v1
# kind: ConfigMap
# metadata:
# name: my-extra-config
# data:
# foo: bar
# Pod Disruption Budget
pdb:
enabled: false
# Set exactly one of the following. If both are set, minAvailable takes precedence.
minAvailable: null # e.g. "50%" or 1
maxUnavailable: null # e.g. 1 or "20%"
annotations: {}
labels: {}
serviceMonitor:
enabled: false
labels:
{}
# test: test
annotations:
{}
# kubernetes.io/test: test
interval: 15s
scrapeTimeout: 10s
relabelings: []
# - targetLabel: __meta_kubernetes_pod_node_name
# replacement: $1
# action: replace
namespaceSelector:
matchNames: []
# - test-namespace