litellm/deploy/charts/litellm-helm/values.yaml

# Default values for litellm.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

replicaCount: 1
# numWorkers: 2

image:
  # Use "ghcr.io/berriai/litellm-database" for optimized image with database
  repository: ghcr.io/berriai/litellm-database
  pullPolicy: Always
  # Overrides the image tag whose default is the chart appVersion.
  # tag: "latest"
  tag: ""

imagePullSecrets: []
nameOverride: "litellm"
fullnameOverride: ""

serviceAccount:
  # Specifies whether a service account should be created
  create: false
  # Automatically mount a ServiceAccount's API credentials?
  automount: true
  # Annotations to add to the service account
  annotations: {}
  # The name of the service account to use.
  # If not set and create is true, a name is generated using the fullname template
  name: ""

# annotations for litellm deployment
deploymentAnnotations: {}
deploymentLabels: {}
deploymentMinReadySeconds: 0

# annotations for litellm pods
podAnnotations: {}
podLabels: {}

# -- Deployment strategy configuration
# Example:
#   type: RollingUpdate
#   rollingUpdate:
#     maxUnavailable: 0
#     maxSurge: 1
strategy: {}

terminationGracePeriodSeconds: 90
topologySpreadConstraints:
  []
  # - maxSkew: 1
  #   topologyKey: kubernetes.io/hostname
  #   whenUnsatisfiable: DoNotSchedule
  #   labelSelector:
  #     matchLabels:
  #       app: litellm

# At the time of writing, the litellm docker image requires write access to the
#  filesystem on startup so that prisma can install some dependencies.
podSecurityContext: {}
securityContext:
  {}
  # capabilities:
  #   drop:
  #     - ALL
  # readOnlyRootFilesystem: false
  # runAsNonRoot: true
  # runAsUser: 1000

# A list of Kubernetes Secret objects that will be exported to the LiteLLM proxy
#  pod as environment variables.  These secrets can then be referenced in the
#  configuration file (or "litellm" ConfigMap) with `os.environ/<Env Var Name>`
environmentSecrets:
  []
  # - litellm-env-secret

# A list of Kubernetes ConfigMap objects that will be exported to the LiteLLM proxy
#  pod as environment variables.  The ConfigMap kv-pairs can then be referenced in the
#  configuration file (or "litellm" ConfigMap) with `os.environ/<Env Var Name>`
environmentConfigMaps:
  []
  # - litellm-env-configmap

service:
  type: ClusterIP
  port: 4000
  # If service type is `LoadBalancer` you can
  # optionally specify loadBalancerClass
  # loadBalancerClass: tailscale

# Probes for LiteLLM gateway container
livenessProbe:
  path: /health/liveliness
  initialDelaySeconds: 0
  periodSeconds: 15
  timeoutSeconds: 5
  successThreshold: 1
  failureThreshold: 5

readinessProbe:
  path: /health/readiness
  initialDelaySeconds: 0
  periodSeconds: 10
  timeoutSeconds: 5
  successThreshold: 1
  failureThreshold: 3

startupProbe:
  path: /health/readiness
  initialDelaySeconds: 0
  periodSeconds: 10
  timeoutSeconds: 5
  successThreshold: 1
  failureThreshold: 30

ingress:
  enabled: false
  className: "nginx"
  labels: {}
  annotations:
    {}
    # kubernetes.io/ingress.class: nginx
    # kubernetes.io/tls-acme: "true"
  hosts:
    - host: api.example.local
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls: []
  #  - secretName: chart-example-tls
  #    hosts:
  #      - chart-example.local

# masterkey: changeit

# if set, use this secret for the master key; otherwise, autogenerate a new one
masterkeySecretName: ""

# if set, use this secret key for the master key; otherwise, use the default key
masterkeySecretKey: ""

proxyConfigMap:
  # when true, creates a new configmap
  create: true
  # if create is false and name is set, use existing ConfigMap
  # create: false
  # name: ""
  # key: "config.yaml"

# The elements within proxy_config are rendered as config.yaml for the proxy
#  Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml
#  Reference: https://docs.litellm.ai/docs/proxy/configs
proxy_config:
  model_list:
    # At least one model must exist for the proxy to start.
    - model_name: gpt-3.5-turbo
      litellm_params:
        model: gpt-3.5-turbo
        api_key: eXaMpLeOnLy
    - model_name: fake-openai-endpoint
      litellm_params:
        model: openai/fake
        api_key: fake-key
        api_base: https://exampleopenaiendpoint-production.up.railway.app/
  general_settings:
    master_key: os.environ/PROXY_MASTER_KEY

resources:
  {}
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  # limits:
  #   cpu: 100m
  #   memory: 128Mi
  # requests:
  #   cpu: 100m
  #   memory: 128Mi

autoscaling:
  enabled: false
  minReplicas: 1
  maxReplicas: 100
  targetCPUUtilizationPercentage: 80
  # targetMemoryUtilizationPercentage: 80
  # behavior: {}

# Autoscaling with keda is mutually exclusive with hpa
keda:
  enabled: false
  minReplicas: 1
  maxReplicas: 100
  pollingInterval: 30
  cooldownPeriod: 300
  # fallback:
  #   failureThreshold: 3
  #   replicas: 11
  restoreToOriginalReplicaCount: false
  scaledObject:
    annotations: {}
  triggers: []
  # - type: prometheus
  #   metadata:
  #     serverAddress: http://<prometheus-host>:9090
  #     metricName: http_requests_total
  #     threshold: '100'
  #     query: sum(rate(http_requests_total{deployment="my-deployment"}[2m]))
  behavior: {}
  # scaleDown:
  #   stabilizationWindowSeconds: 300
  #   policies:
  #   - type: Pods
  #     value: 1
  #     periodSeconds: 180
  # scaleUp:
  #   stabilizationWindowSeconds: 300
  #   policies:
  #   - type: Pods
  #     value: 2
  #     periodSeconds: 60

# Additional volumes on the output Deployment definition.
volumes: []
# - name: foo
#   secret:
#     secretName: mysecret
#     optional: false

# Additional volumeMounts on the output Deployment definition.
volumeMounts: []
# - name: foo
#   mountPath: "/etc/foo"
#   readOnly: true

nodeSelector: {}

tolerations: []

affinity: {}

db:
  # Use an existing postgres server/cluster
  useExisting: false

  # How to connect to the existing postgres server/cluster
  endpoint: localhost
  database: litellm
  url: postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)
  secret:
    name: postgres
    usernameKey: username
    passwordKey: password
    # Optional: when set, DATABASE_HOST will be sourced from this secret key instead of db.endpoint
    endpointKey: ""
    # Optional: when set, DATABASE_URL_READ_REPLICA will be sourced from this
    # secret key instead of db.readReplicaUrl. Prefer this over the plain
    # value: read-replica URLs typically embed credentials, and a value
    # written to db.readReplicaUrl ends up visible in the rendered pod spec
    # and the Helm release secret.
    readReplicaUrlKey: ""

  # Optional read-replica routing. When set, the proxy sends read-only
  # queries (find_*, count, group_by, query_raw/_first) to this URL while
  # writes continue to go to db.url. Useful for Aurora-style clusters with
  # separate reader/writer endpoints. Leave empty to keep single-DB behavior.
  # When IAM_TOKEN_DB_AUTH is enabled, the reader URL is auto-refreshed
  # alongside the writer (host/port/user/db are parsed from this URL once
  # at startup; only the IAM token rotates).
  #
  # If the URL embeds credentials, prefer db.secret.readReplicaUrlKey over
  # this field — the plain value is rendered into the pod spec and the
  # Helm release secret. This field is intended for credential-less URLs
  # only (e.g. when IAM_TOKEN_DB_AUTH supplies the token at runtime).
  readReplicaUrl: ""

  # Use the Stackgres Helm chart to deploy an instance of a Stackgres cluster.
  #  The Stackgres Operator must already be installed within the target
  #  Kubernetes cluster.
  # TODO: Stackgres deployment currently unsupported
  useStackgresOperator: false

  # Use the Postgres Helm chart to create a single node, stand alone postgres
  #  instance.  See the "postgresql" top level key for additional configuration.
  deployStandalone: true

# Lifecycle hooks for the LiteLLM container
#
# Prefer the native /health/drain preStop hook over a fixed `sleep`: it marks
# the pod NotReady and blocks only until in-flight requests actually finish
# (bounded by GRACEFUL_SHUTDOWN_TIMEOUT, default 30s), instead of always
# waiting the worst-case duration. The drain runs once (the preStop hook and
# the SIGTERM handler share it), so set terminationGracePeriodSeconds a few
# seconds above GRACEFUL_SHUTDOWN_TIMEOUT to leave room for teardown before
# SIGKILL.
#
# /health/drain is off by default; enable it with
# general_settings.enable_drain_endpoint: true. The kubelet calls preStop
# hooks without proxy credentials, so when the health port is reachable from
# other pods (the common case) also set
# general_settings.drain_endpoint_token (or the DRAIN_ENDPOINT_TOKEN env
# var) and send the same value on the X-Drain-Token header from the hook.
# Calls missing/wrong the token get a 401 and have no side effect.
# Example:
# lifecycle:
#   preStop:
#     httpGet:
#       path: /health/drain
#       port: 4000
#       httpHeaders:
#         - name: X-Drain-Token
#           value: <same value as drain_endpoint_token>
lifecycle: {}

# Settings for Bitnami postgresql chart (if db.deployStandalone is true, ignored
#  otherwise)
postgresql:
  architecture: standalone
  auth:
    username: litellm
    database: litellm

    # You should override these on the helm command line with
    #  `--set postgresql.auth.postgres-password=<some good password>,postgresql.auth.password=<some good password>`
    password: NoTaGrEaTpAsSwOrD
    postgres-password: NoTaGrEaTpAsSwOrD

    # A secret is created by this chart (litellm-helm) with the credentials that
    #  the new Postgres instance should use.
    # existingSecret: ""
    # secretKeys:
    #   userPasswordKey: password

# requires cache: true in config file
# either enable this or pass a secret for REDIS_HOST, REDIS_PORT, REDIS_PASSWORD or REDIS_URL
# with cache: true to use existing redis instance
redis:
  enabled: false
  architecture: standalone

# Prisma migration job settings
migrationJob:
  enabled: true # Enable or disable the schema migration Job
  retries: 3 # Number of retries for the Job in case of failure
  backoffLimit: 4 # Backoff limit for Job restarts
  disableSchemaUpdate: false # Skip schema migrations for specific environments. When True, the job will exit with code 0.
  # Optional service account for the migration job.
  # Only used when migrationJob.hooks.helm.enabled=true and serviceAccount.create=true.
  # In that case, pre-install/pre-upgrade hooks run before normal resources, so this defaults to "default".
  serviceAccountName: ""
  annotations: {}
  ttlSecondsAfterFinished: 120
  resources: {}
  #  requests:
  #    cpu: 100m
  #    memory: 100Mi
  extraContainers: []
  extraInitContainers: []

  # Hook configuration
  hooks:
    argocd:
      enabled: true
    helm:
      enabled: false

# Log level for the litellm proxy (sets LITELLM_LOG in the deployment env).
# Rendered as a direct `env:` entry, which in Kubernetes takes precedence over
# any `envFrom:` source. If you currently source LITELLM_LOG from an
# environmentSecret or environmentConfigMap, set `logLevel: ""` here to
# disable injection — otherwise this value silently overrides your secret /
# configmap entry.
#
# Setting LITELLM_LOG inside `envVars:` below also wins: the template skips
# this injection entirely when envVars already defines LITELLM_LOG.
logLevel: INFO

# Additional environment variables to be added to the deployment as a map of key-value pairs
envVars: {}

# USE_DDTRACE: "true"
# Additional environment variables to be added to the deployment as a list of k8s env vars
extraEnvVars: {}

# if you want to override the container command, you can do so here
command: {}
# if you want to override the container args, you can do so here
args: {}

# - name: EXTRA_ENV_VAR
#   value: EXTRA_ENV_VAR_VALUE
# Additional Kubernetes resources to deploy with litellm
extraResources: []

# - apiVersion: v1
#   kind: ConfigMap
#   metadata:
#     name: my-extra-config
#   data:
#     foo: bar
# Pod Disruption Budget
pdb:
  enabled: false
  # Set exactly one of the following. If both are set, minAvailable takes precedence.
  minAvailable: null # e.g. "50%" or 1
  maxUnavailable: null # e.g. 1 or "20%"
  annotations: {}
  labels: {}

serviceMonitor:
  enabled: false
  labels:
    {}
    # test: test
  annotations:
    {}
    # kubernetes.io/test: test
  interval: 15s
  scrapeTimeout: 10s
  relabelings: []
  # - targetLabel: __meta_kubernetes_pod_node_name
  #   replacement: $1
  #   action: replace
  namespaceSelector:
    matchNames: []
    # - test-namespace