427 lines
13 KiB
YAML
427 lines
13 KiB
YAML
# Default values for litellm.
|
|
# This is a YAML-formatted file.
|
|
# Declare variables to be passed into your templates.
|
|
|
|
replicaCount: 1
|
|
# numWorkers: 2
|
|
|
|
image:
|
|
# Use "ghcr.io/berriai/litellm-database" for optimized image with database
|
|
repository: ghcr.io/berriai/litellm-database
|
|
pullPolicy: Always
|
|
# Overrides the image tag whose default is the chart appVersion.
|
|
# tag: "latest"
|
|
tag: ""
|
|
|
|
imagePullSecrets: []
|
|
nameOverride: "litellm"
|
|
fullnameOverride: ""
|
|
|
|
serviceAccount:
|
|
# Specifies whether a service account should be created
|
|
create: false
|
|
# Automatically mount a ServiceAccount's API credentials?
|
|
automount: true
|
|
# Annotations to add to the service account
|
|
annotations: {}
|
|
# The name of the service account to use.
|
|
# If not set and create is true, a name is generated using the fullname template
|
|
name: ""
|
|
|
|
# annotations for litellm deployment
|
|
deploymentAnnotations: {}
|
|
deploymentLabels: {}
|
|
deploymentMinReadySeconds: 0
|
|
|
|
# annotations for litellm pods
|
|
podAnnotations: {}
|
|
podLabels: {}
|
|
|
|
# -- Deployment strategy configuration
|
|
# Example:
|
|
# type: RollingUpdate
|
|
# rollingUpdate:
|
|
# maxUnavailable: 0
|
|
# maxSurge: 1
|
|
strategy: {}
|
|
|
|
terminationGracePeriodSeconds: 90
|
|
topologySpreadConstraints:
|
|
[]
|
|
# - maxSkew: 1
|
|
# topologyKey: kubernetes.io/hostname
|
|
# whenUnsatisfiable: DoNotSchedule
|
|
# labelSelector:
|
|
# matchLabels:
|
|
# app: litellm
|
|
|
|
# At the time of writing, the litellm docker image requires write access to the
|
|
# filesystem on startup so that prisma can install some dependencies.
|
|
podSecurityContext: {}
|
|
securityContext:
|
|
{}
|
|
# capabilities:
|
|
# drop:
|
|
# - ALL
|
|
# readOnlyRootFilesystem: false
|
|
# runAsNonRoot: true
|
|
# runAsUser: 1000
|
|
|
|
# A list of Kubernetes Secret objects that will be exported to the LiteLLM proxy
|
|
# pod as environment variables. These secrets can then be referenced in the
|
|
# configuration file (or "litellm" ConfigMap) with `os.environ/<Env Var Name>`
|
|
environmentSecrets:
|
|
[]
|
|
# - litellm-env-secret
|
|
|
|
# A list of Kubernetes ConfigMap objects that will be exported to the LiteLLM proxy
|
|
# pod as environment variables. The ConfigMap kv-pairs can then be referenced in the
|
|
# configuration file (or "litellm" ConfigMap) with `os.environ/<Env Var Name>`
|
|
environmentConfigMaps:
|
|
[]
|
|
# - litellm-env-configmap
|
|
|
|
service:
|
|
type: ClusterIP
|
|
port: 4000
|
|
# If service type is `LoadBalancer` you can
|
|
# optionally specify loadBalancerClass
|
|
# loadBalancerClass: tailscale
|
|
|
|
# Probes for LiteLLM gateway container
|
|
livenessProbe:
|
|
path: /health/liveliness
|
|
initialDelaySeconds: 0
|
|
periodSeconds: 15
|
|
timeoutSeconds: 5
|
|
successThreshold: 1
|
|
failureThreshold: 5
|
|
|
|
readinessProbe:
|
|
path: /health/readiness
|
|
initialDelaySeconds: 0
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
successThreshold: 1
|
|
failureThreshold: 3
|
|
|
|
startupProbe:
|
|
path: /health/readiness
|
|
initialDelaySeconds: 0
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
successThreshold: 1
|
|
failureThreshold: 30
|
|
|
|
ingress:
|
|
enabled: false
|
|
className: "nginx"
|
|
labels: {}
|
|
annotations:
|
|
{}
|
|
# kubernetes.io/ingress.class: nginx
|
|
# kubernetes.io/tls-acme: "true"
|
|
hosts:
|
|
- host: api.example.local
|
|
paths:
|
|
- path: /
|
|
pathType: ImplementationSpecific
|
|
tls: []
|
|
# - secretName: chart-example-tls
|
|
# hosts:
|
|
# - chart-example.local
|
|
|
|
# masterkey: changeit
|
|
|
|
# if set, use this secret for the master key; otherwise, autogenerate a new one
|
|
masterkeySecretName: ""
|
|
|
|
# if set, use this secret key for the master key; otherwise, use the default key
|
|
masterkeySecretKey: ""
|
|
|
|
proxyConfigMap:
|
|
# when true, creates a new configmap
|
|
create: true
|
|
# if create is false and name is set, use existing ConfigMap
|
|
# create: false
|
|
# name: ""
|
|
# key: "config.yaml"
|
|
|
|
# The elements within proxy_config are rendered as config.yaml for the proxy
|
|
# Examples: https://github.com/BerriAI/litellm/tree/main/litellm/proxy/example_config_yaml
|
|
# Reference: https://docs.litellm.ai/docs/proxy/configs
|
|
proxy_config:
|
|
model_list:
|
|
# At least one model must exist for the proxy to start.
|
|
- model_name: gpt-3.5-turbo
|
|
litellm_params:
|
|
model: gpt-3.5-turbo
|
|
api_key: eXaMpLeOnLy
|
|
- model_name: fake-openai-endpoint
|
|
litellm_params:
|
|
model: openai/fake
|
|
api_key: fake-key
|
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
general_settings:
|
|
master_key: os.environ/PROXY_MASTER_KEY
|
|
|
|
resources:
|
|
{}
|
|
# We usually recommend not to specify default resources and to leave this as a conscious
|
|
# choice for the user. This also increases chances charts run on environments with little
|
|
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
|
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
|
# limits:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
|
|
autoscaling:
|
|
enabled: false
|
|
minReplicas: 1
|
|
maxReplicas: 100
|
|
targetCPUUtilizationPercentage: 80
|
|
# targetMemoryUtilizationPercentage: 80
|
|
# behavior: {}
|
|
|
|
# Autoscaling with keda is mutually exclusive with hpa
|
|
keda:
|
|
enabled: false
|
|
minReplicas: 1
|
|
maxReplicas: 100
|
|
pollingInterval: 30
|
|
cooldownPeriod: 300
|
|
# fallback:
|
|
# failureThreshold: 3
|
|
# replicas: 11
|
|
restoreToOriginalReplicaCount: false
|
|
scaledObject:
|
|
annotations: {}
|
|
triggers: []
|
|
# - type: prometheus
|
|
# metadata:
|
|
# serverAddress: http://<prometheus-host>:9090
|
|
# metricName: http_requests_total
|
|
# threshold: '100'
|
|
# query: sum(rate(http_requests_total{deployment="my-deployment"}[2m]))
|
|
behavior: {}
|
|
# scaleDown:
|
|
# stabilizationWindowSeconds: 300
|
|
# policies:
|
|
# - type: Pods
|
|
# value: 1
|
|
# periodSeconds: 180
|
|
# scaleUp:
|
|
# stabilizationWindowSeconds: 300
|
|
# policies:
|
|
# - type: Pods
|
|
# value: 2
|
|
# periodSeconds: 60
|
|
|
|
# Additional volumes on the output Deployment definition.
|
|
volumes: []
|
|
# - name: foo
|
|
# secret:
|
|
# secretName: mysecret
|
|
# optional: false
|
|
|
|
# Additional volumeMounts on the output Deployment definition.
|
|
volumeMounts: []
|
|
# - name: foo
|
|
# mountPath: "/etc/foo"
|
|
# readOnly: true
|
|
|
|
nodeSelector: {}
|
|
|
|
tolerations: []
|
|
|
|
affinity: {}
|
|
|
|
db:
|
|
# Use an existing postgres server/cluster
|
|
useExisting: false
|
|
|
|
# How to connect to the existing postgres server/cluster
|
|
endpoint: localhost
|
|
database: litellm
|
|
url: postgresql://$(DATABASE_USERNAME):$(DATABASE_PASSWORD)@$(DATABASE_HOST)/$(DATABASE_NAME)
|
|
secret:
|
|
name: postgres
|
|
usernameKey: username
|
|
passwordKey: password
|
|
# Optional: when set, DATABASE_HOST will be sourced from this secret key instead of db.endpoint
|
|
endpointKey: ""
|
|
# Optional: when set, DATABASE_URL_READ_REPLICA will be sourced from this
|
|
# secret key instead of db.readReplicaUrl. Prefer this over the plain
|
|
# value: read-replica URLs typically embed credentials, and a value
|
|
# written to db.readReplicaUrl ends up visible in the rendered pod spec
|
|
# and the Helm release secret.
|
|
readReplicaUrlKey: ""
|
|
|
|
# Optional read-replica routing. When set, the proxy sends read-only
|
|
# queries (find_*, count, group_by, query_raw/_first) to this URL while
|
|
# writes continue to go to db.url. Useful for Aurora-style clusters with
|
|
# separate reader/writer endpoints. Leave empty to keep single-DB behavior.
|
|
# When IAM_TOKEN_DB_AUTH is enabled, the reader URL is auto-refreshed
|
|
# alongside the writer (host/port/user/db are parsed from this URL once
|
|
# at startup; only the IAM token rotates).
|
|
#
|
|
# If the URL embeds credentials, prefer db.secret.readReplicaUrlKey over
|
|
# this field — the plain value is rendered into the pod spec and the
|
|
# Helm release secret. This field is intended for credential-less URLs
|
|
# only (e.g. when IAM_TOKEN_DB_AUTH supplies the token at runtime).
|
|
readReplicaUrl: ""
|
|
|
|
# Use the Stackgres Helm chart to deploy an instance of a Stackgres cluster.
|
|
# The Stackgres Operator must already be installed within the target
|
|
# Kubernetes cluster.
|
|
# TODO: Stackgres deployment currently unsupported
|
|
useStackgresOperator: false
|
|
|
|
# Use the Postgres Helm chart to create a single node, stand alone postgres
|
|
# instance. See the "postgresql" top level key for additional configuration.
|
|
deployStandalone: true
|
|
|
|
# Lifecycle hooks for the LiteLLM container
|
|
#
|
|
# Prefer the native /health/drain preStop hook over a fixed `sleep`: it marks
|
|
# the pod NotReady and blocks only until in-flight requests actually finish
|
|
# (bounded by GRACEFUL_SHUTDOWN_TIMEOUT, default 30s), instead of always
|
|
# waiting the worst-case duration. The drain runs once (the preStop hook and
|
|
# the SIGTERM handler share it), so set terminationGracePeriodSeconds a few
|
|
# seconds above GRACEFUL_SHUTDOWN_TIMEOUT to leave room for teardown before
|
|
# SIGKILL.
|
|
#
|
|
# /health/drain is off by default; enable it with
|
|
# general_settings.enable_drain_endpoint: true. The kubelet calls preStop
|
|
# hooks without proxy credentials, so when the health port is reachable from
|
|
# other pods (the common case) also set
|
|
# general_settings.drain_endpoint_token (or the DRAIN_ENDPOINT_TOKEN env
|
|
# var) and send the same value on the X-Drain-Token header from the hook.
|
|
# Calls missing/wrong the token get a 401 and have no side effect.
|
|
# Example:
|
|
# lifecycle:
|
|
# preStop:
|
|
# httpGet:
|
|
# path: /health/drain
|
|
# port: 4000
|
|
# httpHeaders:
|
|
# - name: X-Drain-Token
|
|
# value: <same value as drain_endpoint_token>
|
|
lifecycle: {}
|
|
|
|
# Settings for Bitnami postgresql chart (if db.deployStandalone is true, ignored
|
|
# otherwise)
|
|
postgresql:
|
|
architecture: standalone
|
|
auth:
|
|
username: litellm
|
|
database: litellm
|
|
|
|
# You should override these on the helm command line with
|
|
# `--set postgresql.auth.postgres-password=<some good password>,postgresql.auth.password=<some good password>`
|
|
password: NoTaGrEaTpAsSwOrD
|
|
postgres-password: NoTaGrEaTpAsSwOrD
|
|
|
|
# A secret is created by this chart (litellm-helm) with the credentials that
|
|
# the new Postgres instance should use.
|
|
# existingSecret: ""
|
|
# secretKeys:
|
|
# userPasswordKey: password
|
|
|
|
# requires cache: true in config file
|
|
# either enable this or pass a secret for REDIS_HOST, REDIS_PORT, REDIS_PASSWORD or REDIS_URL
|
|
# with cache: true to use existing redis instance
|
|
redis:
|
|
enabled: false
|
|
architecture: standalone
|
|
|
|
# Prisma migration job settings
|
|
migrationJob:
|
|
enabled: true # Enable or disable the schema migration Job
|
|
retries: 3 # Number of retries for the Job in case of failure
|
|
backoffLimit: 4 # Backoff limit for Job restarts
|
|
disableSchemaUpdate: false # Skip schema migrations for specific environments. When True, the job will exit with code 0.
|
|
# Optional service account for the migration job.
|
|
# Only used when migrationJob.hooks.helm.enabled=true and serviceAccount.create=true.
|
|
# In that case, pre-install/pre-upgrade hooks run before normal resources, so this defaults to "default".
|
|
serviceAccountName: ""
|
|
annotations: {}
|
|
ttlSecondsAfterFinished: 120
|
|
resources: {}
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 100Mi
|
|
extraContainers: []
|
|
extraInitContainers: []
|
|
|
|
# Hook configuration
|
|
hooks:
|
|
argocd:
|
|
enabled: true
|
|
helm:
|
|
enabled: false
|
|
|
|
# Log level for the litellm proxy (sets LITELLM_LOG in the deployment env).
|
|
# Rendered as a direct `env:` entry, which in Kubernetes takes precedence over
|
|
# any `envFrom:` source. If you currently source LITELLM_LOG from an
|
|
# environmentSecret or environmentConfigMap, set `logLevel: ""` here to
|
|
# disable injection — otherwise this value silently overrides your secret /
|
|
# configmap entry.
|
|
#
|
|
# Setting LITELLM_LOG inside `envVars:` below also wins: the template skips
|
|
# this injection entirely when envVars already defines LITELLM_LOG.
|
|
logLevel: INFO
|
|
|
|
# Additional environment variables to be added to the deployment as a map of key-value pairs
|
|
envVars: {}
|
|
|
|
# USE_DDTRACE: "true"
|
|
# Additional environment variables to be added to the deployment as a list of k8s env vars
|
|
extraEnvVars: {}
|
|
|
|
# if you want to override the container command, you can do so here
|
|
command: {}
|
|
# if you want to override the container args, you can do so here
|
|
args: {}
|
|
|
|
# - name: EXTRA_ENV_VAR
|
|
# value: EXTRA_ENV_VAR_VALUE
|
|
# Additional Kubernetes resources to deploy with litellm
|
|
extraResources: []
|
|
|
|
# - apiVersion: v1
|
|
# kind: ConfigMap
|
|
# metadata:
|
|
# name: my-extra-config
|
|
# data:
|
|
# foo: bar
|
|
# Pod Disruption Budget
|
|
pdb:
|
|
enabled: false
|
|
# Set exactly one of the following. If both are set, minAvailable takes precedence.
|
|
minAvailable: null # e.g. "50%" or 1
|
|
maxUnavailable: null # e.g. 1 or "20%"
|
|
annotations: {}
|
|
labels: {}
|
|
|
|
serviceMonitor:
|
|
enabled: false
|
|
labels:
|
|
{}
|
|
# test: test
|
|
annotations:
|
|
{}
|
|
# kubernetes.io/test: test
|
|
interval: 15s
|
|
scrapeTimeout: 10s
|
|
relabelings: []
|
|
# - targetLabel: __meta_kubernetes_pod_node_name
|
|
# replacement: $1
|
|
# action: replace
|
|
namespaceSelector:
|
|
matchNames: []
|
|
# - test-namespace
|