Merge branch 'main' into fix-schema-drift

This commit is contained in:
ishaan-berri 2026-03-31 13:13:10 -07:00 committed by GitHub
commit a8e002dbf6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1196 changed files with 45533 additions and 17821 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,36 +0,0 @@
{
"permissions": {
"allow": [
"Bash(git show:*)",
"Bash(git worktree add:*)",
"Read(//Users/krrishdholakia/Documents/litellm/**)",
"Read(//Users/krrishdholakia/Documents/litellm-claude-code-guardrails/litellm/types/**)",
"Read(//Users/krrishdholakia/Documents/litellm-claude-code-guardrails/**)",
"Read(//Users/krrishdholakia/Documents/litellm-claude-code-guardrails/litellm/**)",
"Bash(python:*)",
"Bash(python -c \"\nimport sys; sys.path.insert\\(0, ''.''\\)\nfrom litellm.proxy.guardrails.guardrail_hooks.claude_code.guardrail import ClaudeCodeGuardrail, HOSTED_TOOL_PREFIXES\nprint\\(''HOSTED_TOOL_PREFIXES:'', HOSTED_TOOL_PREFIXES\\)\nprint\\(''ClaudeCodeGuardrail imported OK''\\)\n\")",
"Read(//Users/krrishdholakia/Documents/litellm-mcp-jwt-groups/litellm/proxy/**)",
"Read(//Users/krrishdholakia/Documents/litellm-mcp-jwt-groups/**)",
"Bash(poetry run pytest:*)",
"Bash(git add:*)",
"Bash(git commit:*)",
"Bash(poetry run python:*)",
"Bash(poetry run pip:*)",
"Bash(git reset:*)",
"Bash(git cherry-pick:*)",
"Bash(git checkout:*)",
"Read(//Users/krrishdholakia/Documents/litellm/litellm/proxy/guardrails/guardrail_hooks/**)",
"Read(//Users/krrishdholakia/Documents/**)",
"Bash(git -C /Users/krrishdholakia/Documents/litellm-mcp-user-permissions worktree list)",
"Bash(ls:*)"
],
"additionalDirectories": [
"/Users/krrishdholakia/Documents/litellm-mcp-group-plan/plan",
"/Users/krrishdholakia/Documents/litellm-claude-code-guardrails/litellm/proxy/guardrails/guardrail_hooks/claude_code",
"/Users/krrishdholakia/Documents/litellm-claude-code-guardrails/litellm/types",
"/Users/krrishdholakia/Documents/litellm-claude-code-guardrails",
"/Users/krrishdholakia/Documents/litellm-mcp-jwt-groups/litellm/proxy",
"/Users/krrishdholakia/Documents/litellm-mcp-jwt-groups/tests/test_litellm/proxy/auth"
]
}
}

View File

@ -1,7 +1,7 @@
blank_issues_enabled: true
contact_links:
- name: Schedule Demo
url: https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions
url: https://enterprise.litellm.ai/demo
about: Speak directly with Krrish and Ishaan, the founders, to discuss issues, share feedback, or explore improvements for LiteLLM
- name: Discord
url: https://discord.com/invite/wuPM9dRgDw

View File

@ -41,32 +41,54 @@ runs:
using: composite
steps:
- name: Helm | Setup
uses: azure/setup-helm@v4
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1
with:
version: v3.20.0
- name: Helm | Login
shell: bash
run: echo ${{ inputs.registry_password }} | helm registry login -u ${{ inputs.registry_username }} --password-stdin ${{ inputs.registry }}
env:
REGISTRY_PASSWORD: ${{ inputs.registry_password }}
REGISTRY_USERNAME: ${{ inputs.registry_username }}
REGISTRY: ${{ inputs.registry }}
run: echo "$REGISTRY_PASSWORD" | helm registry login -u "$REGISTRY_USERNAME" --password-stdin "$REGISTRY"
- name: Helm | Dependency
if: inputs.update_dependencies == 'true'
shell: bash
run: helm dependency update ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }}
env:
CHART_PATH: ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }}
run: helm dependency update "$CHART_PATH"
- name: Helm | Package
shell: bash
run: helm package ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }} --version ${{ inputs.tag }} --app-version ${{ inputs.app_version }}
env:
CHART_PATH: ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }}
TAG: ${{ inputs.tag }}
APP_VERSION: ${{ inputs.app_version }}
run: helm package "$CHART_PATH" --version "$TAG" --app-version "$APP_VERSION"
- name: Helm | Push
shell: bash
run: helm push ${{ inputs.name }}-${{ inputs.tag }}.tgz oci://${{ inputs.registry }}/${{ inputs.repository }}
env:
NAME: ${{ inputs.name }}
TAG: ${{ inputs.tag }}
REGISTRY: ${{ inputs.registry }}
REPOSITORY: ${{ inputs.repository }}
run: helm push "${NAME}-${TAG}.tgz" "oci://${REGISTRY}/${REPOSITORY}"
- name: Helm | Logout
shell: bash
run: helm registry logout ${{ inputs.registry }}
env:
REGISTRY: ${{ inputs.registry }}
run: helm registry logout "$REGISTRY"
- name: Helm | Output
id: output
shell: bash
run: echo "image=${{ inputs.registry }}/${{ inputs.repository }}/${{ inputs.name }}:${{ inputs.tag }}" >> $GITHUB_OUTPUT
env:
REGISTRY: ${{ inputs.registry }}
REPOSITORY: ${{ inputs.repository }}
NAME: ${{ inputs.name }}
TAG: ${{ inputs.tag }}
run: echo "image=${REGISTRY}/${REPOSITORY}/${NAME}:${TAG}" >> $GITHUB_OUTPUT

View File

@ -1,22 +1,21 @@
name: "LiteLLM CodeQL config"
# Use security-extended suite instead of security-and-quality to avoid
# result sets > 2 GiB on this codebase that cause fatal OOM failures.
queries:
- uses: security-extended
- uses: security-and-quality
# These two queries are security queries included in security-extended that
# individually produce result sets > 2 GiB on this codebase, causing fatal
# OOM failures. Exclude them as a safety net until CI confirms they no longer
# OOM; drop these exclusions in a follow-up once verified.
# Known OOM queries on large Python codebases:
# CodeQL builds a full data flow graph in memory. These two queries trace
# sensitive data through every log call / regex pattern, causing combinatorial
# path explosion on codebases with extensive logging like LiteLLM (>2 GiB
# result sets). This is a known CodeQL scaling limitation, not a code issue.
# Re-test periodically as CodeQL improves or the codebase refactors logging.
query-filters:
- exclude:
id: py/clear-text-logging-sensitive-data # CWE-312 — > 2 GiB result set
id: py/clear-text-logging-sensitive-data # CWE-312
- exclude:
id: py/polynomial-redos # CWE-730 — > 2 GiB result set
id: py/polynomial-redos # CWE-730
paths-ignore:
- tests
- docs
- "**/*.md"
- litellm/proxy/_experimental/out

View File

@ -4,6 +4,9 @@ updates:
directory: "/"
schedule:
interval: "daily"
cooldown:
default-days: 7
semver-major-days: 14
groups:
github-actions:
patterns:

96
.github/workflows/_test-unit-base.yml vendored Normal file
View File

@ -0,0 +1,96 @@
name: _Unit Test Base (Reusable)
on:
workflow_call:
inputs:
test-path:
description: "Pytest path(s) to run"
required: true
type: string
workers:
description: "Number of pytest-xdist workers"
required: false
type: number
default: 2
reruns:
description: "Number of reruns for flaky tests"
required: false
type: number
default: 2
timeout-minutes:
description: "Job timeout in minutes"
required: false
type: number
default: 20
max-failures:
description: "Stop after this many failures"
required: false
type: number
default: 10
permissions:
contents: read
jobs:
run:
name: Run tests
runs-on: ubuntu-latest
timeout-minutes: ${{ inputs.timeout-minutes }}
steps:
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
- name: Install Poetry
run: pip install 'poetry==2.3.2'
- name: Cache Poetry dependencies
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: |
~/.cache/pypoetry
~/.cache/pip
.venv
key: ${{ runner.os }}-poetry-${{ hashFiles('poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install dependencies
run: |
poetry config virtualenvs.in-project true
poetry install --with dev,proxy-dev --extras "proxy semantic-router"
poetry run pip install google-genai==1.22.0 \
google-cloud-aiplatform==1.115.0 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core==0.23.0
- name: Setup litellm-enterprise
run: |
poetry run pip install --force-reinstall --no-deps -e enterprise/
- name: Generate Prisma client
env:
PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
run: |
poetry run pip install nodejs-wheel-binaries==24.13.1
poetry run prisma generate --schema litellm/proxy/schema.prisma
- name: Run tests
env:
TEST_PATH: ${{ inputs.test-path }}
MAX_FAILURES: ${{ inputs.max-failures }}
WORKERS: ${{ inputs.workers }}
RERUNS: ${{ inputs.reruns }}
run: |
poetry run pytest ${TEST_PATH:?} \
--tb=short -vv \
--maxfail="${MAX_FAILURES}" \
-n "${WORKERS}" \
--reruns "${RERUNS}" \
--reruns-delay 1 \
--dist=loadscope \
--durations=20

View File

@ -0,0 +1,164 @@
name: _Unit Test Services Base (Reusable)
on:
workflow_call:
inputs:
test-path:
description: "Pytest path(s) to run"
required: true
type: string
workers:
description: "Number of pytest-xdist workers (0 = no parallelism)"
required: false
type: number
default: 2
reruns:
description: "Number of reruns for flaky tests"
required: false
type: number
default: 2
timeout-minutes:
description: "Job timeout in minutes"
required: false
type: number
default: 20
max-failures:
description: "Stop after this many failures"
required: false
type: number
default: 10
enable-redis:
description: "Pass Redis Cloud credentials to tests via REDIS_HOST/PORT/PASSWORD env vars"
required: false
type: boolean
default: false
enable-postgres:
description: "Start a local Postgres service container and run Prisma migrations"
required: false
type: boolean
default: false
secrets:
REDIS_HOST:
required: false
REDIS_PORT:
required: false
REDIS_PASSWORD:
required: false
DATABASE_URL:
required: false
POSTGRES_USER:
required: false
POSTGRES_PASSWORD:
required: false
permissions:
contents: read
jobs:
run:
name: Run tests
runs-on: ubuntu-latest
timeout-minutes: ${{ inputs.timeout-minutes }}
# Environment is derived from the enable-* flags, not caller-controllable.
# This prevents callers from passing arbitrary environment names to bypass secret scoping.
# Note: Postgres service container always starts (GHA limitation), so any Redis job
# also needs Postgres secrets → uses integration-redis-postgres, not integration-redis.
environment: >-
${{
inputs.enable-redis && 'integration-redis-postgres' ||
inputs.enable-postgres && 'integration-postgres' ||
''
}}
services:
postgres:
image: postgres@sha256:705a5d5b5836f3fcba0d02c4d281e6a7dd9ed2dd4078640f08a1e1e9896e097d # postgres:14
env:
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
POSTGRES_DB: litellm_test
ports:
- 5432:5432
options: >-
--health-cmd "pg_isready"
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
- name: Install Poetry
run: pip install 'poetry==2.3.2'
- name: Cache Poetry dependencies
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: |
~/.cache/pypoetry
~/.cache/pip
.venv
key: ${{ runner.os }}-poetry-services-${{ hashFiles('poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-services-
- name: Install dependencies
run: |
poetry config virtualenvs.in-project true
poetry install --with dev,proxy-dev --extras "proxy semantic-router"
poetry run pip install google-genai==1.22.0 \
google-cloud-aiplatform==1.115.0 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core==0.23.0
- name: Setup litellm-enterprise
run: |
poetry run pip install --force-reinstall --no-deps -e enterprise/
- name: Generate Prisma client
env:
PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
run: |
poetry run pip install nodejs-wheel-binaries==24.13.1
poetry run prisma generate --schema litellm/proxy/schema.prisma
- name: Run Prisma migrations
if: ${{ inputs.enable-postgres }}
env:
DATABASE_URL: ${{ secrets.DATABASE_URL }}
run: |
poetry run prisma db push --schema litellm/proxy/schema.prisma --accept-data-loss
- name: Run tests
env:
TEST_PATH: ${{ inputs.test-path }}
MAX_FAILURES: ${{ inputs.max-failures }}
WORKERS: ${{ inputs.workers }}
RERUNS: ${{ inputs.reruns }}
DATABASE_URL: ${{ inputs.enable-postgres && secrets.DATABASE_URL || '' }}
REDIS_HOST: ${{ inputs.enable-redis && secrets.REDIS_HOST || '' }}
REDIS_PORT: ${{ inputs.enable-redis && secrets.REDIS_PORT || '' }}
REDIS_PASSWORD: ${{ inputs.enable-redis && secrets.REDIS_PASSWORD || '' }}
run: |
if [ "${WORKERS}" = "0" ]; then
poetry run pytest ${TEST_PATH:?} \
--tb=short -vv \
--maxfail="${MAX_FAILURES}" \
--reruns "${RERUNS}" \
--reruns-delay 1 \
--durations=20
else
poetry run pytest ${TEST_PATH:?} \
--tb=short -vv \
--maxfail="${MAX_FAILURES}" \
-n "${WORKERS}" \
--reruns "${RERUNS}" \
--reruns-delay 1 \
--dist=loadscope \
--durations=20
fi

View File

@ -2,18 +2,24 @@ name: Updates model_prices_and_context_window.json and Create Pull Request
on:
schedule:
- cron: "0 0 * * 0" # Run every Sundays at midnight
- cron: "0 0 * * 0" # Run every Sundays at midnight
#- cron: "0 0 * * *" # Run daily at midnight
permissions:
contents: write
pull-requests: write
jobs:
auto_update_price_and_context_window:
if: github.repository == 'BerriAI/litellm'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Install Dependencies
run: |
pip install aiohttp
pip install 'aiohttp==3.13.3'
- name: Update JSON Data
run: |
python ".github/workflows/auto_update_price_and_context_window_file.py"
@ -26,4 +32,4 @@ jobs:
--head auto-update-price-and-context-window-$(date +'%Y-%m-%d') \
--base main
env:
GH_TOKEN: ${{ secrets.GH_TOKEN }}
GH_TOKEN: ${{ secrets.GH_TOKEN }}

58
.github/workflows/check-schema-sync.yml vendored Normal file
View File

@ -0,0 +1,58 @@
name: Check Schema Sync
on:
pull_request:
paths:
- 'schema.prisma'
- 'litellm/proxy/schema.prisma'
- 'litellm-proxy-extras/litellm_proxy_extras/schema.prisma'
permissions:
contents: read
jobs:
check-sync:
name: Verify schema.prisma copies match root
runs-on: ubuntu-latest
timeout-minutes: 5
steps:
- name: Checkout PR
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Reject symlinked schema files
run: |
for f in schema.prisma litellm/proxy/schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma; do
if [ -L "$f" ]; then
echo "::error file=$f::$f is a symlink, which is not allowed"
exit 1
fi
done
- name: Check all schemas match root
run: |
EXIT=0
diff schema.prisma litellm/proxy/schema.prisma || {
echo "::error file=litellm/proxy/schema.prisma::litellm/proxy/schema.prisma differs from root schema.prisma"
EXIT=1
}
diff schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma || {
echo "::error file=litellm-proxy-extras/litellm_proxy_extras/schema.prisma::litellm-proxy-extras/litellm_proxy_extras/schema.prisma differs from root schema.prisma"
EXIT=1
}
if [ "$EXIT" -ne 0 ]; then
echo ""
echo "Schema files are out of sync."
echo "The root schema.prisma is the source of truth."
echo ""
echo "To fix, run from the repo root:"
echo " cp schema.prisma litellm/proxy/schema.prisma"
echo " cp schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma"
exit 1
fi
echo "All schema copies are in sync with root."

View File

@ -12,7 +12,7 @@ jobs:
contents: read
steps:
- name: Check for potential duplicates
uses: wow-actions/potential-duplicates@v1
uses: wow-actions/potential-duplicates@4d4ea0352e0383859279938e255179dd1dbb67b5 # v1.1.0
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
label: potential-duplicate
@ -30,13 +30,14 @@ jobs:
- name: Checkout close script
if: github.event.action == 'opened'
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
sparse-checkout: .github/scripts
persist-credentials: false
- name: Set up Python
if: github.event.action == 'opened'
uses: actions/setup-python@v5
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.11"

View File

@ -6,8 +6,8 @@ on:
pull_request:
branches: [main]
schedule:
# Run weekly on Sundays at 04:00 UTC
- cron: "0 4 * * 0"
# Run daily at 04:00 UTC
- cron: "0 4 * * *"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@ -15,6 +15,7 @@ concurrency:
jobs:
analyze:
if: github.event_name != 'schedule' || github.repository == 'BerriAI/litellm'
name: Analyze (${{ matrix.language }})
runs-on: ubuntu-latest
timeout-minutes: 30
@ -37,16 +38,18 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
uses: github/codeql-action/init@ebcb5b36ded6beda4ceefea6a8bc4cc885255bb3 # v3
with:
languages: ${{ matrix.language }}
build-mode: ${{ matrix.build-mode }}
config-file: ./.github/codeql/codeql-config.yml
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
uses: github/codeql-action/analyze@ebcb5b36ded6beda4ceefea6a8bc4cc885255bb3 # v3
with:
category: "/language:${{ matrix.language }}"

View File

@ -25,10 +25,12 @@ jobs:
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
@ -38,7 +40,7 @@ jobs:
pip install pytest pytest-codspeed==4.3.0
- name: Run benchmarks
uses: CodSpeedHQ/action@v4
uses: CodSpeedHQ/action@1c8ae4843586d3ba879736b7f6b7b0c990757fab # v4.12.1
with:
mode: simulation
run: pytest tests/benchmarks/ --codspeed

View File

@ -2,18 +2,22 @@ name: Create Daily Staging Branch
on:
schedule:
- cron: '0 0,12 * * *' # Runs every 12 hours at midnight and noon UTC
workflow_dispatch: # Allow manual trigger
- cron: "0 0,12 * * *" # Runs every 12 hours at midnight and noon UTC
workflow_dispatch: # Allow manual trigger
jobs:
create-staging-branch:
if: github.repository == 'BerriAI/litellm'
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
fetch-depth: 0
persist-credentials: false
- name: Create daily staging branch
env:
@ -43,13 +47,17 @@ jobs:
fi
create-internal-dev-branch:
if: github.repository == 'BerriAI/litellm'
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
fetch-depth: 0
persist-credentials: false
- name: Create internal dev branch
env:

View File

@ -1,444 +0,0 @@
# this workflow is triggered by an API call when there is a new PyPI release of LiteLLM
name: Build, Publish LiteLLM Docker Image. New Release
on:
workflow_dispatch:
inputs:
tag:
description: "The tag version you want to build"
required: true
release_type:
description: "The release type you want to build. Can be 'latest', 'stable', 'dev', 'rc'"
type: string
default: "latest"
commit_hash:
description: "Commit hash"
required: true
# Defines two custom environment variables for the workflow. Used for the Container registry domain, and a name for the Docker image that this workflow builds.
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
CHART_NAME: litellm-helm
# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
jobs:
# print commit hash, tag, and release type
print:
runs-on: ubuntu-latest
steps:
- run: |
echo "Commit hash: ${{ github.event.inputs.commit_hash }}"
echo "Tag: ${{ github.event.inputs.tag }}"
echo "Release type: ${{ github.event.inputs.release_type }}"
docker-hub-deploy:
if: github.repository == 'BerriAI/litellm'
runs-on: ubuntu-latest
steps:
-
name: Checkout
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
-
name: Set up QEMU
uses: docker/setup-qemu-action@v3
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
-
name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Build and push
uses: docker/build-push-action@v5
with:
context: .
push: true
tags: litellm/litellm:${{ github.event.inputs.tag || 'latest' }}
-
name: Build and push litellm-database image
uses: docker/build-push-action@v5
with:
context: .
push: true
file: ./docker/Dockerfile.database
tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
-
name: Build and push litellm-spend-logs image
uses: docker/build-push-action@v5
with:
context: .
push: true
file: ./litellm-js/spend-logs/Dockerfile
tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }}
-
name: Build and push litellm-non_root image
uses: docker/build-push-action@v5
with:
context: .
push: true
file: ./docker/Dockerfile.non_root
tags: litellm/litellm-non_root:${{ github.event.inputs.tag || 'latest' }}
build-and-push-image:
runs-on: ubuntu-latest
# Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
# Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels.
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
# Configure multi platform Docker builds
- name: Set up QEMU
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
# This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages.
# It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository.
# It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step.
- name: Build and push Docker image
uses: docker/build-push-action@4976231911ebf5f32aad765192d35f942aa48cb8
with:
context: .
push: true
tags: |
${{ steps.meta.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
${{ steps.meta.outputs.tags }}-${{ github.event.inputs.release_type }}
${{ (github.event.inputs.release_type == 'stable' || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm:main-stable', env.REGISTRY) || '' }},
${{ (github.event.inputs.release_type == 'stable' || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm:{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
labels: ${{ steps.meta.outputs.labels }}
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
build-and-push-image-ee:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for EE Dockerfile
id: meta-ee
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-ee
# Configure multi platform Docker builds
- name: Set up QEMU
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
- name: Build and push EE Docker image
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
with:
context: .
file: Dockerfile
push: true
tags: |
${{ steps.meta-ee.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
${{ steps.meta-ee.outputs.tags }}-${{ github.event.inputs.release_type }}
${{ (github.event.inputs.release_type == 'stable' || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm-ee:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-ee:main-stable', env.REGISTRY) || '' }}
labels: ${{ steps.meta-ee.outputs.labels }}
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
build-and-push-image-database:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for database Dockerfile
id: meta-database
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-database
# Configure multi platform Docker builds
- name: Set up QEMU
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
- name: Build and push Database Docker image
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
with:
context: .
file: ./docker/Dockerfile.database
push: true
tags: |
${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.release_type }}
${{ (github.event.inputs.release_type == 'stable' || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm-database:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-database:main-stable', env.REGISTRY) || '' }}
labels: ${{ steps.meta-database.outputs.labels }}
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
build-and-push-image-non_root:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for non_root Dockerfile
id: meta-non_root
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-non_root
# Configure multi platform Docker builds
- name: Set up QEMU
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
- name: Build and push non_root Docker image
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
with:
context: .
file: ./docker/Dockerfile.non_root
push: true
tags: |
${{ steps.meta-non_root.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
${{ steps.meta-non_root.outputs.tags }}-${{ github.event.inputs.release_type }}
${{ (github.event.inputs.release_type == 'stable' || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm-non_root:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-non_root:main-stable', env.REGISTRY) || '' }}
labels: ${{ steps.meta-non_root.outputs.labels }}
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
build-and-push-image-spend-logs:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.commit_hash }}
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata (tags, labels) for spend-logs Dockerfile
id: meta-spend-logs
uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-spend_logs
# Configure multi platform Docker builds
- name: Set up QEMU
uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
- name: Build and push Database Docker image
uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
with:
context: .
file: ./litellm-js/spend-logs/Dockerfile
push: true
tags: |
${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.release_type }}
${{ (github.event.inputs.release_type == 'stable' || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm-spend_logs:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-spend_logs:main-stable', env.REGISTRY) || '' }}
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
run-observatory-tests:
if: github.event.inputs.release_type == 'rc' || github.event.inputs.release_type == 'stable'
needs: [docker-hub-deploy]
uses: ./.github/workflows/run_observatory_tests.yml
with:
tag: ${{ github.event.inputs.tag }}
commit_hash: ${{ github.event.inputs.commit_hash }}
secrets: inherit
build-and-push-helm-chart:
if: github.event.inputs.release_type != 'dev'
needs: [docker-hub-deploy, build-and-push-image, build-and-push-image-database]
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: lowercase github.repository_owner
run: |
echo "REPO_OWNER=`echo ${{github.repository_owner}} | tr '[:upper:]' '[:lower:]'`" >>${GITHUB_ENV}
# Sync Helm chart version with LiteLLM release version (1-1 versioning)
# This allows users to easily map Helm chart versions to LiteLLM versions
# See: https://codefresh.io/docs/docs/ci-cd-guides/helm-best-practices/
- name: Calculate chart and app versions
id: chart_version
shell: bash
run: |
INPUT_TAG="${{ github.event.inputs.tag }}"
RELEASE_TYPE="${{ github.event.inputs.release_type }}"
# Chart version = LiteLLM version without 'v' prefix (Helm semver convention)
# v1.81.0 -> 1.81.0, v1.81.0.rc.1 -> 1.81.0.rc.1
CHART_VERSION="${INPUT_TAG#v}"
# Add suffix for 'latest' releases (rc already has suffix in tag)
if [ "$RELEASE_TYPE" = "latest" ]; then
CHART_VERSION="${CHART_VERSION}-latest"
fi
# App version = Docker tag (keeps 'v' prefix to match Docker image tags)
APP_VERSION="${INPUT_TAG}"
echo "version=${CHART_VERSION}" | tee -a $GITHUB_OUTPUT
echo "app_version=${APP_VERSION}" | tee -a $GITHUB_OUTPUT
- uses: ./.github/actions/helm-oci-chart-releaser
with:
name: ${{ env.CHART_NAME }}
repository: ${{ env.REPO_OWNER }}
tag: ${{ steps.chart_version.outputs.version }}
app_version: ${{ steps.chart_version.outputs.app_version }}
path: deploy/charts/${{ env.CHART_NAME }}
registry: ${{ env.REGISTRY }}
registry_username: ${{ github.actor }}
registry_password: ${{ secrets.GITHUB_TOKEN }}
update_dependencies: true
release:
name: "New LiteLLM Release"
needs: [docker-hub-deploy, build-and-push-image, build-and-push-image-database]
permissions:
contents: write
runs-on: "ubuntu-latest"
steps:
- name: Display version
run: echo "Current version is ${{ github.event.inputs.tag }}"
- name: "Set Release Tag"
run: echo "RELEASE_TAG=${{ github.event.inputs.tag }}" >> $GITHUB_ENV
- name: Display release tag
run: echo "RELEASE_TAG is $RELEASE_TAG"
- name: "Create release"
uses: "actions/github-script@v6"
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
script: |
const commitHash = "${{ github.event.inputs.commit_hash}}";
console.log("Commit Hash:", commitHash); // Add this line for debugging
try {
const response = await github.rest.repos.createRelease({
draft: false,
generate_release_notes: true,
target_commitish: commitHash,
name: process.env.RELEASE_TAG,
owner: context.repo.owner,
prerelease: false,
repo: context.repo.repo,
tag_name: process.env.RELEASE_TAG,
});
core.exportVariable('RELEASE_ID', response.data.id);
core.exportVariable('RELEASE_UPLOAD_URL', response.data.upload_url);
} catch (error) {
core.setFailed(error.message);
}
- name: Fetch Release Notes
id: release-notes
uses: actions/github-script@v6
with:
github-token: "${{ secrets.GITHUB_TOKEN }}"
script: |
try {
const response = await github.rest.repos.getRelease({
owner: context.repo.owner,
repo: context.repo.repo,
release_id: process.env.RELEASE_ID,
});
const formattedBody = JSON.stringify(response.data.body).slice(1, -1);
return formattedBody;
} catch (error) {
core.setFailed(error.message);
}
env:
RELEASE_ID: ${{ env.RELEASE_ID }}
- name: Github Releases To Discord
env:
WEBHOOK_URL: ${{ secrets.WEBHOOK_URL }}
REALEASE_TAG: ${{ env.RELEASE_TAG }}
RELEASE_NOTES: ${{ steps.release-notes.outputs.result }}
run: |
curl -H "Content-Type: application/json" -X POST -d '{
"content": "New LiteLLM release '"${RELEASE_TAG}"'",
"username": "Release Changelog",
"avatar_url": "https://cdn.discordapp.com/avatars/487431320314576937/bd64361e4ba6313d561d54e78c9e7171.png",
"embeds": [
{
"title": "Changelog for LiteLLM '"${RELEASE_TAG}"'",
"description": "'"${RELEASE_NOTES}"'",
"color": 2105893
}
]
}' $WEBHOOK_URL

View File

@ -1,67 +0,0 @@
# Standalone workflow to publish LiteLLM Helm Chart
# Note: The main ghcr_deploy.yml workflow also publishes the Helm chart as part of a full release
name: Build, Publish LiteLLM Helm Chart. New Release
on:
workflow_dispatch:
inputs:
tag:
description: "LiteLLM version tag (e.g., v1.81.0)"
required: true
# Defines two custom environment variables for the workflow. Used for the Container registry domain, and a name for the Docker image that this workflow builds.
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
REPO_OWNER: ${{github.repository_owner}}
# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
jobs:
build-and-push-helm-chart:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Log in to the Container registry
uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: lowercase github.repository_owner
run: |
echo "REPO_OWNER=`echo ${{github.repository_owner}} | tr '[:upper:]' '[:lower:]'`" >>${GITHUB_ENV}
# Sync Helm chart version with LiteLLM release version (1-1 versioning)
- name: Calculate chart and app versions
id: chart_version
shell: bash
run: |
INPUT_TAG="${{ github.event.inputs.tag }}"
# Chart version = LiteLLM version without 'v' prefix
# v1.81.0 -> 1.81.0
CHART_VERSION="${INPUT_TAG#v}"
# App version = Docker tag (keeps 'v' prefix)
APP_VERSION="${INPUT_TAG}"
echo "version=${CHART_VERSION}" | tee -a $GITHUB_OUTPUT
echo "app_version=${APP_VERSION}" | tee -a $GITHUB_OUTPUT
- name: Lint helm chart
run: helm lint deploy/charts/litellm-helm
- uses: ./.github/actions/helm-oci-chart-releaser
with:
name: litellm-helm
repository: ${{ env.REPO_OWNER }}
tag: ${{ steps.chart_version.outputs.version }}
app_version: ${{ steps.chart_version.outputs.app_version }}
path: deploy/charts/litellm-helm
registry: ${{ env.REGISTRY }}
registry_username: ${{ github.actor }}
registry_password: ${{ secrets.GITHUB_TOKEN }}
update_dependencies: true

View File

@ -6,22 +6,36 @@ on:
branches:
- main
permissions:
contents: read
jobs:
unit-test:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Set up Helm 3.11.1
uses: azure/setup-helm@v1
uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1
with:
version: '3.11.1'
version: "3.11.1"
- name: Install Helm Unit Test Plugin
run: |
helm plugin install https://github.com/helm-unittest/helm-unittest --version v0.4.4
- name: Verify Helm Unit Test Plugin integrity
run: |
EXPECTED_SHA="e251ba198448629678ff2168e1a469249d998155"
PLUGIN_DIR="$(helm env HELM_PLUGINS)/helm-unittest"
ACTUAL_SHA="$(git -C "$PLUGIN_DIR" rev-parse HEAD)"
if [ "$ACTUAL_SHA" != "$EXPECTED_SHA" ]; then
echo "::error::Helm unittest plugin checksum mismatch! Expected $EXPECTED_SHA but got $ACTUAL_SHA"
exit 1
fi
echo "Helm unittest plugin integrity verified: $ACTUAL_SHA"
- name: Run unit tests
run:
helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm
run: helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm

View File

@ -1,139 +0,0 @@
import csv
import os
from github import Github
def interpret_results(csv_file):
with open(csv_file, newline="") as csvfile:
csvreader = csv.DictReader(csvfile)
rows = list(csvreader)
"""
in this csv reader
- Create 1 new column "Status"
- if a row has a median response time < 300 and an average response time < 300, Status = "Passed ✅"
- if a row has a median response time >= 300 or an average response time >= 300, Status = "Failed ❌"
- Order the table in this order Name, Status, Median Response Time, Average Response Time, Requests/s,Failures/s, Min Response Time, Max Response Time, all other columns
"""
# Add a new column "Status"
for row in rows:
median_response_time = float(
row["Median Response Time"].strip().rstrip("ms")
)
average_response_time = float(
row["Average Response Time"].strip().rstrip("s")
)
request_count = int(row["Request Count"])
failure_count = int(row["Failure Count"])
failure_percent = round((failure_count / request_count) * 100, 2)
# Determine status based on conditions
if (
median_response_time < 300
and average_response_time < 300
and failure_percent < 5
):
row["Status"] = "Passed ✅"
else:
row["Status"] = "Failed ❌"
# Construct Markdown table header
markdown_table = "| Name | Status | Median Response Time (ms) | Average Response Time (ms) | Requests/s | Failures/s | Request Count | Failure Count | Min Response Time (ms) | Max Response Time (ms) |"
markdown_table += (
"\n| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |"
)
# Construct Markdown table rows
for row in rows:
markdown_table += f"\n| {row['Name']} | {row['Status']} | {row['Median Response Time']} | {row['Average Response Time']} | {row['Requests/s']} | {row['Failures/s']} | {row['Request Count']} | {row['Failure Count']} | {row['Min Response Time']} | {row['Max Response Time']} |"
print("markdown table: ", markdown_table)
return markdown_table
def _get_docker_run_command_stable_release(release_version):
return f"""
\n\n
## Docker Run LiteLLM Proxy
```
docker run \\
-e STORE_MODEL_IN_DB=True \\
-p 4000:4000 \\
ghcr.io/berriai/litellm:litellm_stable_release_branch-{release_version}
```
"""
def _get_docker_run_command(release_version):
return f"""
\n\n
## Docker Run LiteLLM Proxy
```
docker run \\
-e STORE_MODEL_IN_DB=True \\
-p 4000:4000 \\
ghcr.io/berriai/litellm:main-{release_version}
```
"""
def get_docker_run_command(release_version):
if "stable" in release_version:
return _get_docker_run_command_stable_release(release_version)
else:
return _get_docker_run_command(release_version)
if __name__ == "__main__":
return
csv_file = "load_test_stats.csv" # Change this to the path of your CSV file
markdown_table = interpret_results(csv_file)
# Update release body with interpreted results
github_token = os.getenv("GITHUB_TOKEN")
g = Github(github_token)
repo = g.get_repo(
"BerriAI/litellm"
) # Replace with your repository's username and name
latest_release = repo.get_latest_release()
print("got latest release: ", latest_release)
print(latest_release.title)
print(latest_release.tag_name)
release_version = latest_release.title
print("latest release body: ", latest_release.body)
print("markdown table: ", markdown_table)
# check if "Load Test LiteLLM Proxy Results" exists
existing_release_body = latest_release.body
if "Load Test LiteLLM Proxy Results" in latest_release.body:
# find the "Load Test LiteLLM Proxy Results" section and delete it
start_index = latest_release.body.find("Load Test LiteLLM Proxy Results")
existing_release_body = latest_release.body[:start_index]
docker_run_command = get_docker_run_command(release_version)
print("docker run command: ", docker_run_command)
new_release_body = (
existing_release_body
+ docker_run_command
+ "\n\n"
+ "### Don't want to maintain your internal proxy? get in touch 🎉"
+ "\nHosted Proxy Alpha: https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions"
+ "\n\n"
+ "## Load Test LiteLLM Proxy Results"
+ "\n\n"
+ markdown_table
)
print("new release body: ", new_release_body)
try:
latest_release.update_release(
name=latest_release.tag_name,
message=new_release_body,
)
except Exception as e:
print(e)

View File

@ -2,8 +2,8 @@ name: Issue Keyword Labeler
on:
issues:
types:
- opened
types:
- opened
jobs:
scan-and-label:
@ -13,7 +13,9 @@ jobs:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Scan for provider keywords
id: scan
@ -24,7 +26,7 @@ jobs:
- name: Ensure label exists
if: steps.scan.outputs.found == 'true'
uses: actions/github-script@v7
uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
@ -51,7 +53,7 @@ jobs:
- name: Add label to the issue
if: steps.scan.outputs.found == 'true'
uses: actions/github-script@v7
uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
@ -61,4 +63,3 @@ jobs:
issue_number: context.issue.number,
labels: ['llm translation']
});

View File

@ -12,7 +12,7 @@ jobs:
issues: write
steps:
- name: Add component labels
uses: actions/github-script@v7
uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |

View File

@ -4,38 +4,41 @@ on:
workflow_dispatch:
inputs:
release_candidate_tag:
description: 'Release candidate tag/version'
description: "Release candidate tag/version"
required: true
type: string
push:
tags:
- 'v*-rc*' # Triggers on release candidate tags like v1.0.0-rc1
- "v*-rc*" # Triggers on release candidate tags like v1.0.0-rc1
permissions:
contents: read
jobs:
run-llm-translation-tests:
runs-on: ubuntu-latest
timeout-minutes: 90
steps:
- name: Checkout code
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
ref: ${{ github.event.inputs.release_candidate_tag || github.ref }}
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: '3.11'
python-version: "3.11"
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: latest
virtualenvs-create: true
virtualenvs-in-project: true
- name: Cache Poetry dependencies
uses: actions/cache@v3
run: |
pip install 'poetry==2.3.2'
poetry config virtualenvs.create true
poetry config virtualenvs.in-project true
- name: Restore Poetry dependencies cache
uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.0.0
with:
path: |
~/.cache/pypoetry
@ -43,15 +46,15 @@ jobs:
key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install dependencies
run: |
poetry install --with dev
poetry run pip install pytest-xdist pytest-timeout
poetry run pip install 'pytest-xdist==3.8.0' 'pytest-timeout==2.4.0'
- name: Create test results directory
run: mkdir -p test-results
- name: Run LLM Translation Tests
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@ -61,13 +64,14 @@ jobs:
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }}
# Add other API keys as needed
RC_TAG: ${{ github.event.inputs.release_candidate_tag || github.ref_name }}
COMMIT_SHA: ${{ github.sha }}
run: |
python .github/workflows/run_llm_translation_tests.py \
--tag "${{ github.event.inputs.release_candidate_tag || github.ref_name }}" \
--commit "${{ github.sha }}" \
--tag "$RC_TAG" \
--commit "$COMMIT_SHA" \
|| true # Continue even if tests fail
- name: Display test summary
if: always()
run: |
@ -79,9 +83,9 @@ jobs:
else
echo "Warning: Test report was not generated"
fi
- name: Upload test artifacts
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
if: always()
with:
name: LLM-Translation-Artifact-${{ github.event.inputs.release_candidate_tag || github.ref_name }}

View File

@ -1,59 +0,0 @@
name: Test Locust Load Test
on:
workflow_run:
workflows: ["Build, Publish LiteLLM Docker Image. New Release"]
types:
- completed
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v1
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install PyGithub
- name: re-deploy proxy
run: |
echo "Current working directory: $PWD"
ls
python ".github/workflows/redeploy_proxy.py"
env:
LOAD_TEST_REDEPLOY_URL1: ${{ secrets.LOAD_TEST_REDEPLOY_URL1 }}
LOAD_TEST_REDEPLOY_URL2: ${{ secrets.LOAD_TEST_REDEPLOY_URL2 }}
working-directory: ${{ github.workspace }}
- name: Run Load Test
id: locust_run
uses: BerriAI/locust-github-action@master
with:
LOCUSTFILE: ".github/workflows/locustfile.py"
URL: "https://post-release-load-test-proxy.onrender.com/"
USERS: "20"
RATE: "20"
RUNTIME: "300s"
- name: Process Load Test Stats
run: |
echo "Current working directory: $PWD"
ls
python ".github/workflows/interpret_load_test.py"
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
working-directory: ${{ github.workspace }}
- name: Upload CSV as Asset to Latest Release
uses: xresloader/upload-to-github-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
file: "load_test_stats.csv;load_test.html"
update_latest_release: true
tag_name: "load-test"
overwrite: true

View File

@ -1,28 +0,0 @@
from locust import HttpUser, task, between
class MyUser(HttpUser):
wait_time = between(1, 5)
@task
def chat_completion(self):
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer sk-8N1tLOOyH8TIxwOLahhIVg",
# Include any additional headers you may need for authentication, etc.
}
# Customize the payload with "model" and "messages" keys
payload = {
"model": "fake-openai-endpoint",
"messages": [
{"role": "system", "content": "You are a chat bot."},
{"role": "user", "content": "Hello, how are you?"},
],
# Add more data as necessary
}
# Make a POST request to the "chat/completions" endpoint
response = self.client.post("chat/completions", json=payload, headers=headers)
# Print or log the response if needed

View File

@ -1,34 +0,0 @@
name: Publish Dev Release to PyPI
on:
workflow_dispatch:
jobs:
publish-dev-release:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.8 # Adjust the Python version as needed
- name: Install dependencies
run: pip install toml twine
- name: Read version from pyproject.toml
id: read-version
run: |
version=$(python -c 'import toml; print(toml.load("pyproject.toml")["tool"]["commitizen"]["version"])')
printf "LITELLM_VERSION=%s" "$version" >> $GITHUB_ENV
- name: Check if version exists on PyPI
id: check-version
run: |
set -e
if twine check --repository-url https://pypi.org/simple/ "litellm==$LITELLM_VERSION" >/dev/null 2>&1; then
echo "Version $LITELLM_VERSION already exists on PyPI. Skipping publish."

View File

@ -1,207 +0,0 @@
name: Publish Prisma Migrations
permissions:
contents: write
pull-requests: write
on:
push:
paths:
- 'schema.prisma' # Check root schema.prisma
branches:
- main
jobs:
publish-migrations:
if: github.repository == 'BerriAI/litellm'
runs-on: ubuntu-latest
services:
postgres:
image: postgres:14
env:
POSTGRES_DB: temp_db
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
# Add shadow database service
postgres_shadow:
image: postgres:14
env:
POSTGRES_DB: shadow_db
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
ports:
- 5433:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install Dependencies
run: |
pip install prisma
pip install python-dotenv
- name: Generate Initial Migration if None Exists
env:
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
run: |
mkdir -p deploy/migrations
echo 'provider = "postgresql"' > deploy/migrations/migration_lock.toml
if [ -z "$(ls -A deploy/migrations/2* 2>/dev/null)" ]; then
echo "No existing migrations found, creating baseline..."
VERSION=$(date +%Y%m%d%H%M%S)
mkdir -p deploy/migrations/${VERSION}_initial
echo "Generating initial migration..."
# Save raw output for debugging
prisma migrate diff \
--from-empty \
--to-schema-datamodel schema.prisma \
--shadow-database-url "${SHADOW_DATABASE_URL}" \
--script > deploy/migrations/${VERSION}_initial/raw_migration.sql
echo "Raw migration file content:"
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
echo "Cleaning migration file..."
# Clean the file
sed '/^Installing/d' deploy/migrations/${VERSION}_initial/raw_migration.sql > deploy/migrations/${VERSION}_initial/migration.sql
# Verify the migration file
if [ ! -s deploy/migrations/${VERSION}_initial/migration.sql ]; then
echo "ERROR: Migration file is empty after cleaning"
echo "Original content was:"
cat deploy/migrations/${VERSION}_initial/raw_migration.sql
exit 1
fi
echo "Final migration file content:"
cat deploy/migrations/${VERSION}_initial/migration.sql
# Verify it starts with SQL
if ! head -n 1 deploy/migrations/${VERSION}_initial/migration.sql | grep -q "^--\|^CREATE\|^ALTER"; then
echo "ERROR: Migration file does not start with SQL command or comment"
echo "First line is:"
head -n 1 deploy/migrations/${VERSION}_initial/migration.sql
echo "Full content is:"
cat deploy/migrations/${VERSION}_initial/migration.sql
exit 1
fi
echo "Initial migration generated at $(date -u)" > deploy/migrations/${VERSION}_initial/README.md
fi
- name: Compare and Generate Migration
if: success()
env:
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
run: |
# Create temporary migration workspace
mkdir -p temp_migrations
# Copy existing migrations (will not fail if directory is empty)
cp -r deploy/migrations/* temp_migrations/ 2>/dev/null || true
VERSION=$(date +%Y%m%d%H%M%S)
# Generate diff against existing migrations or empty state
prisma migrate diff \
--from-migrations temp_migrations \
--to-schema-datamodel schema.prisma \
--shadow-database-url "${SHADOW_DATABASE_URL}" \
--script > temp_migrations/migration_${VERSION}.sql
# Check if there are actual changes
if [ -s temp_migrations/migration_${VERSION}.sql ]; then
echo "Changes detected, creating new migration"
mkdir -p deploy/migrations/${VERSION}_schema_update
mv temp_migrations/migration_${VERSION}.sql deploy/migrations/${VERSION}_schema_update/migration.sql
echo "Migration generated at $(date -u)" > deploy/migrations/${VERSION}_schema_update/README.md
else
echo "No schema changes detected"
exit 0
fi
- name: Verify Migration
if: success()
env:
DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
run: |
# Create test database
psql "${SHADOW_DATABASE_URL}" -c 'CREATE DATABASE migration_test;'
# Apply all migrations in order to verify
for migration in deploy/migrations/*/migration.sql; do
echo "Applying migration: $migration"
psql "${SHADOW_DATABASE_URL}" -f $migration
done
# Add this step before create-pull-request to debug permissions
- name: Check Token Permissions
run: |
echo "Checking token permissions..."
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-H "Accept: application/vnd.github.v3+json" \
https://api.github.com/repos/BerriAI/litellm/collaborators
echo "\nChecking if token can create PRs..."
curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-H "Accept: application/vnd.github.v3+json" \
https://api.github.com/repos/BerriAI/litellm
# Add this debug step before git push
- name: Debug Changed Files
run: |
echo "Files staged for commit:"
git diff --name-status --staged
echo "\nAll changed files:"
git status
- name: Create Pull Request
if: success()
uses: peter-evans/create-pull-request@v5
with:
token: ${{ secrets.GITHUB_TOKEN }}
commit-message: "chore: update prisma migrations"
title: "Update Prisma Migrations"
body: |
Auto-generated migration based on schema.prisma changes.
Generated files:
- deploy/migrations/${VERSION}_schema_update/migration.sql
- deploy/migrations/${VERSION}_schema_update/README.md
branch: feat/prisma-migration-${{ env.VERSION }}
base: main
delete-branch: true
- name: Generate and Save Migrations
run: |
# Only add migration files
git add deploy/migrations/
git status # Debug what's being committed
git commit -m "chore: update prisma migrations"

View File

@ -1,94 +0,0 @@
name: Publish litellm-enterprise to PyPI
on:
workflow_dispatch:
inputs:
bump:
description: "Version bump type"
required: true
default: "patch"
type: choice
options:
- patch
- minor
- major
jobs:
publish:
runs-on: ubuntu-latest
if: github.repository == 'BerriAI/litellm'
permissions:
contents: write
pull-requests: write
defaults:
run:
working-directory: enterprise
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Poetry
run: pip install poetry
- name: Bump version
id: bump
run: |
OLD=$(poetry version -s)
poetry version ${{ github.event.inputs.bump }}
NEW=$(poetry version -s)
echo "old=$OLD" >> $GITHUB_OUTPUT
echo "new=$NEW" >> $GITHUB_OUTPUT
- name: Update version refs in root pyproject.toml and requirements.txt
run: |
OLD=${{ steps.bump.outputs.old }}
NEW=${{ steps.bump.outputs.new }}
sed -i "s/litellm-enterprise = {version = \"${OLD}\"/litellm-enterprise = {version = \"${NEW}\"/" ../pyproject.toml
sed -i "s/litellm-enterprise==${OLD}/litellm-enterprise==${NEW}/" ../requirements.txt
- name: Update poetry.lock
working-directory: .
run: poetry lock
- name: Build
run: poetry build
- name: Commit version bump and create PR
id: create-pr
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
cd ..
BRANCH="bump/enterprise-${{ steps.bump.outputs.new }}"
git checkout -b "$BRANCH"
git add enterprise/pyproject.toml pyproject.toml requirements.txt poetry.lock
git commit -m "bump: litellm-enterprise ${{ steps.bump.outputs.old }} → ${{ steps.bump.outputs.new }}"
git push origin "$BRANCH" --force
gh pr create \
--title "bump: litellm-enterprise ${{ steps.bump.outputs.old }} → ${{ steps.bump.outputs.new }}" \
--body "Version bump for litellm-enterprise. Merge to update main." \
--head "$BRANCH" \
--base main \
|| true
PR_URL=$(gh pr list --head "$BRANCH" --json url -q '.[0].url')
echo "pr_url=$PR_URL" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Enable auto-merge
run: |
gh pr merge "${{ steps.create-pr.outputs.pr_url }}" --auto --squash
env:
GH_TOKEN: ${{ github.token }}
- name: Publish to PyPI
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_ENTERPRISE }}
run: |
pip install twine
twine upload dist/litellm_enterprise-${{ steps.bump.outputs.new }}*

View File

@ -1,74 +0,0 @@
name: Publish litellm-proxy-extras to PyPI
on:
workflow_dispatch:
inputs:
bump:
description: "Version bump type"
required: true
default: "patch"
type: choice
options:
- patch
- minor
- major
jobs:
publish:
runs-on: ubuntu-latest
if: github.repository == 'BerriAI/litellm'
permissions:
contents: write
defaults:
run:
working-directory: litellm-proxy-extras
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Poetry
run: pip install poetry
- name: Bump version
id: bump
run: |
OLD=$(poetry version -s)
poetry version ${{ github.event.inputs.bump }}
NEW=$(poetry version -s)
echo "old=$OLD" >> $GITHUB_OUTPUT
echo "new=$NEW" >> $GITHUB_OUTPUT
- name: Update version refs in root pyproject.toml and requirements.txt
run: |
OLD=${{ steps.bump.outputs.old }}
NEW=${{ steps.bump.outputs.new }}
sed -i "s/litellm-proxy-extras = {version = \"${OLD}\"/litellm-proxy-extras = {version = \"${NEW}\"/" ../pyproject.toml
sed -i "s/litellm-proxy-extras==${OLD}/litellm-proxy-extras==${NEW}/" ../requirements.txt
- name: Update poetry.lock
working-directory: .
run: poetry lock
- name: Build
run: poetry build
- name: Commit version bump
run: |
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
cd ..
git add litellm-proxy-extras/pyproject.toml pyproject.toml requirements.txt poetry.lock
git commit -m "bump: litellm-proxy-extras ${{ steps.bump.outputs.old }} → ${{ steps.bump.outputs.new }}"
git push
- name: Publish to PyPI
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_PUBLISH_PASSWORD }}
run: |
pip install twine
twine upload dist/litellm_proxy_extras-${{ steps.bump.outputs.new }}*

136
.github/workflows/publish_to_pypi.yml vendored Normal file
View File

@ -0,0 +1,136 @@
name: Publish to PyPI
on:
workflow_dispatch:
jobs:
preflight-checks:
name: Preflight Checks
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
contents: read
# No environment — read-only checks, no approval needed
outputs:
needs_publish: ${{ steps.check-litellm.outputs.needs_publish }}
version: ${{ steps.check-litellm.outputs.version }}
steps:
- name: Checkout repo
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
- name: Check litellm version on PyPI
id: check-litellm
run: |
VERSION=$(grep -m1 '^version' pyproject.toml | sed 's/version = "\(.*\)"/\1/')
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
echo "Checking if litellm $VERSION exists on PyPI..."
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://pypi.org/pypi/litellm/$VERSION/json")
if [ "$HTTP_STATUS" = "200" ]; then
echo "litellm $VERSION already exists on PyPI. Skipping publish."
echo "needs_publish=false" >> "$GITHUB_OUTPUT"
else
echo "litellm $VERSION not found on PyPI. Publish needed."
echo "needs_publish=true" >> "$GITHUB_OUTPUT"
fi
- name: Sanity check proxy-extras version
run: |
# Read pinned version from requirements.txt
REQ_VERSION=$(grep -oP 'litellm-proxy-extras==\K[0-9.]+' requirements.txt)
if [ -z "$REQ_VERSION" ]; then
echo "::error::Could not find litellm-proxy-extras version in requirements.txt"
exit 1
fi
echo "requirements.txt pins litellm-proxy-extras==$REQ_VERSION"
# Read pinned version from pyproject.toml dependency
PYPROJECT_VERSION=$(python3 -c "
import re
with open('pyproject.toml') as f:
content = f.read()
match = re.search(r'litellm-proxy-extras\s*=\s*\{version\s*=\s*\"([^\"]+)\"', content)
if match:
print(match.group(1).lstrip('^~>='))
else:
import sys
print('::error::Could not find litellm-proxy-extras dependency in pyproject.toml', file=sys.stderr)
sys.exit(1)
")
echo "pyproject.toml pins litellm-proxy-extras version: $PYPROJECT_VERSION"
# Check that both pinned versions match
if [ "$REQ_VERSION" != "$PYPROJECT_VERSION" ]; then
echo "::error::Version mismatch: requirements.txt has $REQ_VERSION but pyproject.toml has $PYPROJECT_VERSION"
exit 1
fi
# Check that the pinned version exists on PyPI
echo "Checking if litellm-proxy-extras $REQ_VERSION exists on PyPI..."
HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://pypi.org/pypi/litellm-proxy-extras/$REQ_VERSION/json")
if [ "$HTTP_STATUS" != "200" ]; then
echo "::error::litellm-proxy-extras $REQ_VERSION is not published on PyPI yet. Publish it before releasing litellm."
exit 1
fi
echo "litellm-proxy-extras $REQ_VERSION exists on PyPI. Sanity check passed."
publish-litellm:
name: Publish litellm to PyPI
needs: preflight-checks
if: needs.preflight-checks.outputs.needs_publish == 'true'
runs-on: ubuntu-latest
timeout-minutes: 10
permissions:
id-token: write
contents: read
environment: pypi-publish
steps:
- name: Checkout repo
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
- name: Copy model prices backup
run: cp model_prices_and_context_window.json litellm/model_prices_and_context_window_backup.json
- name: Install build tools
run: python -m pip install --upgrade pip build==1.4.2
- name: Build package
run: |
rm -rf build dist
python -m build
- name: Verify build artifacts
env:
EXPECTED_VERSION: ${{ needs.preflight-checks.outputs.version }}
run: |
echo "Contents of dist/:"
ls -la dist/
# Ensure we have both sdist and wheel
ls dist/*.tar.gz
ls dist/*.whl
# Verify built version matches expected
ls dist/ | grep -q "litellm-${EXPECTED_VERSION}" || {
echo "::error::Built artifacts do not match expected version $EXPECTED_VERSION"
ls dist/
exit 1
}
- name: Validate package metadata
run: |
pip install twine==6.2.0
twine check dist/*
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0

View File

@ -3,7 +3,10 @@ name: Read Version from pyproject.toml
on:
push:
branches:
- main # Change this to the default branch of your repository
- main # Change this to the default branch of your repository
permissions:
contents: read
jobs:
read-version:
@ -11,20 +14,14 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
python-version: 3.8 # Adjust the Python version as needed
- name: Install dependencies
run: pip install toml
persist-credentials: false
- name: Read version from pyproject.toml
id: read-version
run: |
version=$(python -c 'import toml; print(toml.load("pyproject.toml")["tool"]["commitizen"]["version"])')
version=$(grep -m1 '^version' pyproject.toml | sed 's/version = "\(.*\)"/\1/')
printf "LITELLM_VERSION=%s" "$version" >> $GITHUB_ENV
- name: Display version

View File

@ -1,20 +0,0 @@
"""
redeploy_proxy.py
"""
import os
import requests
import time
# send a get request to this endpoint
deploy_hook1 = os.getenv("LOAD_TEST_REDEPLOY_URL1")
response = requests.get(deploy_hook1, timeout=20)
deploy_hook2 = os.getenv("LOAD_TEST_REDEPLOY_URL2")
response = requests.get(deploy_hook2, timeout=20)
print("SENT GET REQUESTS to re-deploy proxy")
print("sleeeping.... for 60s")
time.sleep(60)

View File

@ -1,80 +0,0 @@
name: Regenerate poetry.lock
# Runs whenever pyproject.toml is merged into main (the most common cause of
# the "pyproject.toml changed significantly since poetry.lock was last generated"
# CI failure). Can also be triggered manually.
on:
push:
branches:
- main
paths:
- pyproject.toml
workflow_dispatch:
permissions:
contents: write # needed to push the auto/regenerate-poetry-lock-* branch
pull-requests: write # needed to open the PR and enable auto-merge
jobs:
regenerate-lock:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install Poetry
run: pip install poetry
- name: Regenerate poetry.lock
run: poetry lock
- name: Check whether poetry.lock actually changed
id: diff
run: |
if git diff --quiet poetry.lock; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Open PR with the refreshed lock file
if: steps.diff.outputs.changed == 'true'
id: open-pr
run: |
BRANCH="auto/regenerate-poetry-lock-$(date +'%Y%m%d%H%M%S')"
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
git checkout -b "$BRANCH"
git add poetry.lock
git commit -m "chore: regenerate poetry.lock to match pyproject.toml"
git push -f origin "$BRANCH"
cat > /tmp/pr-body.md << 'BODY'
Automated regeneration of `poetry.lock` after `pyproject.toml` was updated on `main`.
Fixes the recurring CI failure:
```
pyproject.toml changed significantly since poetry.lock was last generated.
Run `poetry lock` to fix the lock file.
```
BODY
PR_URL=$(gh pr create \
--title "chore: regenerate poetry.lock to match pyproject.toml" \
--body-file /tmp/pr-body.md \
--head "$BRANCH" \
--base main)
echo "pr_url=$PR_URL" >> "$GITHUB_OUTPUT"
env:
GH_TOKEN: ${{ github.token }}
- name: Enable auto-merge
if: steps.diff.outputs.changed == 'true'
run: |
gh pr merge "${{ steps.open-pr.outputs.pr_url }}" --auto --squash
env:
GH_TOKEN: ${{ github.token }}

View File

@ -1,39 +0,0 @@
name: Reset litellm_stable branch
on:
release:
types: [published, created]
jobs:
update-stable-branch:
if: ${{ startsWith(github.event.release.tag_name, 'v') && !endsWith(github.event.release.tag_name, '-stable') }}
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Reset litellm_stable_release_branch branch to the release commit
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Configure Git user
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
# Fetch all branches and tags
git fetch --all
# Check if the litellm_stable_release_branch branch exists
if git show-ref --verify --quiet refs/remotes/origin/litellm_stable_release_branch; then
echo "litellm_stable_release_branch branch exists."
git checkout litellm_stable_release_branch
else
echo "litellm_stable_release_branch branch does not exist. Creating it."
git checkout -b litellm_stable_release_branch
fi
# Reset litellm_stable_release_branch branch to the release commit
git reset --hard $GITHUB_SHA
# Push the updated litellm_stable_release_branch branch
git push origin litellm_stable_release_branch --force

View File

@ -33,7 +33,9 @@ jobs:
timeout-minutes: 30
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Validate tag input
env:
@ -49,11 +51,12 @@ jobs:
TAG: ${{ inputs.tag }}
AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
WORKSPACE: ${{ github.workspace }}
run: |
docker run -d \
--name litellm-rc \
-p 4000:4000 \
-v "${{ github.workspace }}/.github/observatory/litellm_config.yaml:/app/config.yaml" \
-v "${WORKSPACE}/.github/observatory/litellm_config.yaml:/app/config.yaml" \
-e LITELLM_MASTER_KEY="${LITELLM_MASTER_KEY}" \
-e AZURE_API_KEY="${AZURE_API_KEY}" \
-e AZURE_API_BASE="${AZURE_API_BASE}" \
@ -77,8 +80,9 @@ jobs:
- name: Start cloudflared tunnel
run: |
# Install cloudflared
# Install cloudflared (pinned version + checksum)
curl -sL https://github.com/cloudflare/cloudflared/releases/download/2025.2.1/cloudflared-linux-amd64 -o /usr/local/bin/cloudflared
echo "afdfadd1ef552e66bffc35246fe30a9bd578356d2d386de95585ccfc432472b8 /usr/local/bin/cloudflared" | sha256sum -c -
chmod +x /usr/local/bin/cloudflared
# Start a quick tunnel (no account needed) and capture the URL
@ -103,11 +107,11 @@ jobs:
- name: Verify tunnel connectivity
run: |
echo "Testing tunnel at ${{ env.TUNNEL_URL }}..."
echo "Testing tunnel at ${TUNNEL_URL}..."
# Quick tunnels need time for DNS propagation; retry to avoid
# transient NXDOMAIN (curl exit code 6) on first attempt.
for i in $(seq 1 10); do
if curl -sf "${{ env.TUNNEL_URL }}/health/liveliness" > /dev/null 2>&1; then
if curl -sf "${TUNNEL_URL}/health/liveliness" > /dev/null 2>&1; then
echo "Tunnel is working (attempt $i)"
exit 0
fi
@ -221,5 +225,5 @@ jobs:
- name: Cleanup
if: always()
run: |
kill "${{ env.CLOUDFLARED_PID }}" 2>/dev/null || true
kill "$CLOUDFLARED_PID" 2>/dev/null || true
docker rm -f litellm-rc 2>/dev/null || true

View File

@ -21,14 +21,15 @@ jobs:
contents: read
steps:
- name: Checkout scripts
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
sparse-checkout: .github/scripts
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.11"
python-version: "3.13"
- name: Scan for duplicate issues
env:

47
.github/workflows/scorecard.yml vendored Normal file
View File

@ -0,0 +1,47 @@
name: Scorecard supply-chain security
on:
branch_protection_rule:
schedule:
- cron: '27 12 * * 4'
push:
branches: ["main"]
permissions: read-all
jobs:
analysis:
name: Scorecard analysis
runs-on: ubuntu-latest
if: github.event.repository.default_branch == github.ref_name
permissions:
security-events: write
id-token: write
# Uncomment for private repos if needed:
# contents: read
# actions: read
steps:
- name: Checkout code
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Run analysis
uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75731b0186 # v2.4.1
with:
results_file: results.sarif
results_format: sarif
publish_results: true
- name: Upload artifact
uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
with:
name: SARIF file
path: results.sarif
retention-days: 5
- name: Upload to code scanning
uses: github/codeql-action/upload-sarif@c10b806170c8ee63ea24152429041b5624f0baf5 # v4.35.1
with:
sarif_file: results.sarif

View File

@ -1,67 +0,0 @@
name: Simple PyPI Publish
on:
workflow_dispatch:
inputs:
version:
description: 'Version to publish (e.g., 1.74.10)'
required: true
type: string
env:
TWINE_USERNAME: __token__
jobs:
publish:
runs-on: ubuntu-latest
if: github.repository == 'BerriAI/litellm'
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install toml build wheel twine
- name: Update version in pyproject.toml
run: |
python -c "
import toml
with open('pyproject.toml', 'r') as f:
data = toml.load(f)
data['tool']['poetry']['version'] = '${{ github.event.inputs.version }}'
with open('pyproject.toml', 'w') as f:
toml.dump(data, f)
print(f'Updated version to ${{ github.event.inputs.version }}')
"
- name: Copy model prices file
run: |
cp model_prices_and_context_window.json litellm/model_prices_and_context_window_backup.json
- name: Build package
run: |
rm -rf build dist
python -m build
- name: Publish to PyPI
env:
TWINE_PASSWORD: ${{ secrets.PYPI_PUBLISH_PASSWORD }}
run: |
twine upload dist/*
- name: Output success
run: |
echo "✅ Successfully published litellm v${{ github.event.inputs.version }} to PyPI"
echo "📦 Package: https://pypi.org/project/litellm/${{ github.event.inputs.version }}/"

View File

@ -2,19 +2,24 @@ name: "Stale Issue Management"
on:
schedule:
- cron: '0 0 * * *' # Runs daily at midnight UTC
- cron: "0 0 * * *" # Runs daily at midnight UTC
workflow_dispatch:
permissions:
issues: write
pull-requests: write
jobs:
stale:
if: github.repository == 'BerriAI/litellm'
runs-on: ubuntu-latest
steps:
- uses: actions/stale@v8
- uses: actions/stale@1160a2240286f5da8ec72b1c0816ce2481aabf84 # v8
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
stale-issue-message: "This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs."
stale-pr-message: "This pull request has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs."
days-before-stale: 90 # Revert to 60 days
days-before-close: 7 # Revert to 7 days
days-before-stale: 90 # Revert to 60 days
days-before-close: 7 # Revert to 7 days
stale-issue-label: "stale"
operations-per-run: 1000
operations-per-run: 1000

73
.github/workflows/sync-schema.yml vendored Normal file
View File

@ -0,0 +1,73 @@
name: Sync schema.prisma copies
on:
pull_request:
paths:
- 'schema.prisma'
# Scoped to ONLY the permissions needed:
# - contents:write to push the sync commit to the PR branch
# - pull-requests:read is implicit (needed to check out the PR)
permissions:
contents: write
jobs:
sync:
name: Copy root schema to proxy and proxy-extras
runs-on: ubuntu-latest
timeout-minutes: 5
# Only run on PRs from branches in THIS repo (not forks).
# Fork PRs cannot push back to the head branch with GITHUB_TOKEN,
# and pull_request events from forks have read-only tokens anyway.
# Also reject PRs from branches named after protected branches to
# prevent pushing directly to main/master.
if: >-
github.event.pull_request.head.repo.full_name == github.repository
&& github.head_ref != 'main'
&& github.head_ref != 'master'
steps:
- name: Checkout PR branch by SHA
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
# Use the merge commit SHA for safety — github.head_ref is an
# attacker-controlled string (the branch name) and could contain
# unusual characters that cause unexpected git behavior.
ref: ${{ github.event.pull_request.head.sha }}
persist-credentials: true # needed for git push
- name: Reject symlinked schema files
run: |
for f in schema.prisma litellm/proxy/schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma; do
if [ -L "$f" ]; then
echo "::error file=$f::$f is a symlink, which is not allowed"
exit 1
fi
done
- name: Copy root schema to other locations
run: |
cp schema.prisma litellm/proxy/schema.prisma
cp schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma
- name: Check for changes
id: diff
run: |
if git diff --quiet -- litellm/proxy/schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma; then
echo "changed=false" >> "$GITHUB_OUTPUT"
echo "Schemas already in sync. Nothing to do."
else
echo "changed=true" >> "$GITHUB_OUTPUT"
echo "Schema copies need updating."
fi
- name: Commit synced schemas
if: steps.diff.outputs.changed == 'true'
run: |
# Push to the PR's head branch (need the branch name for git push).
# We checked out by SHA above for safety, so configure the push target explicitly.
git config user.name "github-actions[bot]"
git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
git checkout -B "$GITHUB_HEAD_REF"
git add -- litellm/proxy/schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma
git commit -m "chore: sync schema.prisma copies from root"
git push origin "HEAD:$GITHUB_HEAD_REF"

View File

@ -2,7 +2,10 @@ name: LiteLLM Linting
on:
pull_request:
branches: [ main ]
branches: [main]
permissions:
contents: read
jobs:
lint:
@ -10,69 +13,73 @@ jobs:
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
clean: true
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
fetch-depth: 0
clean: true
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Install Poetry
run: pip install 'poetry==2.3.2'
- name: Clean Python cache
run: |
find . -type d -name "__pycache__" -exec rm -rf {} + || true
find . -name "*.pyc" -delete || true
- name: Clean Python cache
run: |
find . -type d -name "__pycache__" -exec rm -rf {} + || true
find . -name "*.pyc" -delete || true
- name: Install dependencies
run: |
poetry lock
poetry install --with dev
- name: Check poetry.lock is up to date
run: |
poetry check --lock || (echo "❌ poetry.lock is out of sync with pyproject.toml. Run 'poetry lock' locally and commit the result." && exit 1)
- name: Check Black formatting
run: |
cd litellm
poetry run black --check --exclude '/enterprise/' .
cd ..
- name: Install dependencies
run: |
poetry install --with dev
- name: Debug - Check file state
run: |
echo "Current branch:"
git branch --show-current
echo "Last 3 commits:"
git log --oneline -3
echo "File content around line 43:"
head -50 litellm/litellm_core_utils/custom_logger_registry.py | tail -10
- name: Run Ruff linting
run: |
cd litellm
poetry run ruff check .
cd ..
- name: Check Black formatting
run: |
cd litellm
poetry run black --check --exclude '/enterprise/' .
cd ..
- name: Print OpenAI version
run: |
poetry run python -c "import openai; print(f'OpenAI version: {openai.__version__}')"
- name: Debug - Check file state
run: |
echo "Current branch:"
git branch --show-current
echo "Last 3 commits:"
git log --oneline -3
echo "File content around line 43:"
head -50 litellm/litellm_core_utils/custom_logger_registry.py | tail -10
- name: Run MyPy type checking
run: |
cd litellm
poetry run mypy .
cd ..
- name: Run Ruff linting
run: |
cd litellm
poetry run ruff check .
cd ..
- name: Check for circular imports
run: |
cd litellm
poetry run python ../tests/documentation_tests/test_circular_imports.py
cd ..
- name: Print OpenAI version
run: |
poetry run python -c "import openai; print(f'OpenAI version: {openai.__version__}')"
- name: Check import safety
run: |
poetry run python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
- name: Run MyPy type checking
run: |
cd litellm
poetry run mypy .
cd ..
- name: Check for circular imports
run: |
cd litellm
poetry run python ../tests/documentation_tests/test_circular_imports.py
cd ..
- name: Check import safety
run: |
poetry run python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
secret-scan:
runs-on: ubuntu-latest
@ -81,27 +88,28 @@ jobs:
contents: read
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
fetch-depth: 0
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
- name: Run secret scan test
run: |
pip install pytest
pytest tests/litellm/test_no_hardcoded_secrets.py -v
- name: Run secret scan test
run: |
pip install 'pytest==9.0.2'
pytest tests/litellm/test_no_hardcoded_secrets.py -v
- name: Run ggshield secret scan
env:
GITGUARDIAN_API_KEY: ${{ secrets.GITGUARDIAN_API_KEY }}
run: |
if [ -n "$GITGUARDIAN_API_KEY" ]; then
pip install ggshield
ggshield secret scan repo .
else
echo "GITGUARDIAN_API_KEY not set, skipping ggshield scan"
fi
- name: Run ggshield secret scan
env:
GITGUARDIAN_API_KEY: ${{ secrets.GITGUARDIAN_API_KEY }}
run: |
if [ -n "$GITGUARDIAN_API_KEY" ]; then
pip install 'ggshield==1.48.0'
ggshield secret scan repo .
else
echo "GITGUARDIAN_API_KEY not set, skipping ggshield scan"
fi

View File

@ -4,6 +4,9 @@ on:
pull_request:
branches: [main]
permissions:
contents: read
# Cancel in-progress runs for the same PR
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@ -12,7 +15,7 @@ concurrency:
jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 20 # Increased from 15 to 20
timeout-minutes: 20 # Increased from 15 to 20
strategy:
fail-fast: false
matrix:
@ -43,7 +46,7 @@ jobs:
- name: "integrations"
path: "tests/test_litellm/integrations"
workers: 2
reruns: 3 # Integration tests tend to be flakier
reruns: 3 # Integration tests tend to be flakier
- name: "core-utils"
path: "tests/test_litellm/litellm_core_utils"
workers: 2
@ -117,18 +120,20 @@ jobs:
name: test (${{ matrix.test-group.name }})
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
- name: Install Poetry
uses: snok/install-poetry@v1
run: pip install 'poetry==2.3.2'
- name: Cache Poetry dependencies
uses: actions/cache@v4
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.0.0
with:
path: |
~/.cache/pypoetry
@ -144,14 +149,17 @@ jobs:
poetry install --with dev,proxy-dev --extras "proxy semantic-router"
# pytest-rerunfailures and pytest-xdist are in pyproject.toml dev dependencies
poetry run pip install google-genai==1.22.0 \
google-cloud-aiplatform>=1.38 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core
google-cloud-aiplatform==1.115.0 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core==0.23.0
- name: Setup litellm-enterprise
run: |
poetry run pip install --force-reinstall --no-deps -e enterprise/
- name: Generate Prisma client
env:
PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
run: |
poetry run pip install nodejs-wheel-binaries==24.13.1
poetry run prisma generate --schema litellm/proxy/schema.prisma
- name: Run tests - ${{ matrix.test-group.name }}

View File

@ -16,10 +16,12 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Setup Node.js
uses: actions/setup-node@v4
uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0
with:
node-version: "20"
cache: "npm"

View File

@ -4,45 +4,50 @@ name: LiteLLM Mock Tests (folder - tests/test_litellm)
# the same tests in parallel across 10 jobs for faster CI times.
# Kept for manual debugging only.
on:
workflow_dispatch: # Manual trigger only
workflow_dispatch: # Manual trigger only
# pull_request:
# branches: [ main ]
permissions:
contents: read
jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 25
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Thank You Message
run: |
echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY
- name: Thank You Message
run: |
echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Install Poetry
run: pip install 'poetry==2.3.2'
- name: Install dependencies
run: |
poetry lock
poetry install --with dev,proxy-dev --extras "proxy semantic-router"
poetry run pip install "pytest-retry==1.6.3"
poetry run pip install pytest-xdist
poetry run pip install "google-genai==1.22.0"
poetry run pip install "google-cloud-aiplatform>=1.38"
poetry run pip install "fastapi-offline==1.7.3"
poetry run pip install "python-multipart>=0.0.20"
poetry run pip install "openapi-core"
- name: Setup litellm-enterprise as local package
run: |
poetry run pip install --force-reinstall --no-deps -e enterprise/
- name: Run tests
run: |
poetry run pytest tests/test_litellm --tb=short -vv --maxfail=10 -n 4 --durations=50
- name: Install dependencies
run: |
poetry lock
poetry install --with dev,proxy-dev --extras "proxy semantic-router"
poetry run pip install "pytest-retry==1.6.3"
poetry run pip install 'pytest-xdist==3.8.0'
poetry run pip install "google-genai==1.22.0"
poetry run pip install "google-cloud-aiplatform==1.115.0"
poetry run pip install "fastapi-offline==1.7.3"
poetry run pip install "python-multipart==0.0.22"
poetry run pip install "openapi-core==0.23.0"
- name: Setup litellm-enterprise as local package
run: |
poetry run pip install --force-reinstall --no-deps -e enterprise/
- name: Run tests
run: |
poetry run pytest tests/test_litellm --tb=short -vv --maxfail=10 -n 4 --durations=50

View File

@ -2,7 +2,10 @@ name: LiteLLM MCP Tests (folder - tests/mcp_tests)
on:
pull_request:
branches: [ main ]
branches: [main]
permissions:
contents: read
jobs:
test:
@ -10,38 +13,40 @@ jobs:
timeout-minutes: 25
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Thank You Message
run: |
echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY
- name: Thank You Message
run: |
echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12'
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
- name: Install Poetry
uses: snok/install-poetry@v1
- name: Install Poetry
run: pip install 'poetry==2.3.2'
- name: Install dependencies
run: |
poetry lock
poetry install --with dev,proxy-dev --extras "proxy semantic-router"
poetry run pip install "pytest==7.3.1"
poetry run pip install "pytest-retry==1.6.3"
poetry run pip install "pytest-cov==5.0.0"
poetry run pip install "pytest-asyncio==0.21.1"
poetry run pip install "respx==0.22.0"
poetry run pip install "pydantic==2.11.0"
poetry run pip install "mcp==1.25.0"
poetry run pip install pytest-xdist
- name: Install dependencies
run: |
poetry lock
poetry install --with dev,proxy-dev --extras "proxy semantic-router"
poetry run pip install "pytest==7.3.1"
poetry run pip install "pytest-retry==1.6.3"
poetry run pip install "pytest-cov==5.0.0"
poetry run pip install "pytest-asyncio==0.21.1"
poetry run pip install "respx==0.22.0"
poetry run pip install "pydantic==2.11.0"
poetry run pip install "mcp==1.25.0"
poetry run pip install 'pytest-xdist==3.8.0'
- name: Setup litellm-enterprise as local package
run: |
poetry run pip install --force-reinstall --no-deps -e enterprise/
- name: Setup litellm-enterprise as local package
run: |
poetry run pip install --force-reinstall --no-deps -e enterprise/
- name: Run MCP tests
run: |
poetry run pytest tests/mcp_tests -x -vv -n 4 --cov=litellm --cov-report=xml --durations=5
- name: Run MCP tests
run: |
poetry run pytest tests/mcp_tests -x -vv -n 4 --cov=litellm --cov-report=xml --durations=5

View File

@ -2,13 +2,18 @@ name: Validate model_prices_and_context_window.json
on:
pull_request:
branches: [ main ]
branches: [main]
permissions:
contents: read
jobs:
validate-model-prices-json:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Validate model_prices_and_context_window.json
run: |

View File

@ -9,6 +9,9 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
proxy_e2e_azure_batches_tests:
runs-on: ubuntu-latest
@ -30,18 +33,20 @@ jobs:
--health-retries 5
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
- name: Install Poetry
uses: snok/install-poetry@v1
run: pip install 'poetry==2.3.2'
- name: Cache Poetry dependencies
uses: actions/cache@v4
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.0.0
with:
path: |
~/.cache/pypoetry
@ -56,14 +61,17 @@ jobs:
run: |
poetry config virtualenvs.in-project true
poetry install --with dev,proxy-dev --extras "proxy"
poetry run pip install psycopg2-binary uvicorn fastapi httpx tenacity
poetry run pip install psycopg2-binary==2.9.11 uvicorn==0.42.0 fastapi==0.135.2 httpx==0.28.1 tenacity==9.1.4
- name: Setup litellm-enterprise
run: |
poetry run pip install --force-reinstall --no-deps -e enterprise/
- name: Generate Prisma client
env:
PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
run: |
poetry run pip install nodejs-wheel-binaries==24.13.1
poetry run prisma generate --schema litellm/proxy/schema.prisma
- name: Run Prisma migrations
@ -87,4 +95,3 @@ jobs:
--tb=short \
--maxfail=3 \
--durations=10

View File

@ -0,0 +1,38 @@
name: "Unit Tests: Caching (Redis)"
# Uses cloud Redis credentials — only runs on trusted branches, not PRs.
# This prevents external PRs from accessing Redis credentials.
on:
push:
branches: [main, "litellm_*"]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
caching-redis:
uses: ./.github/workflows/_test-unit-services-base.yml
with:
# Redis-only tests that do NOT require provider API keys.
# Tests needing API keys (test_caching.py, test_caching_ssl.py, test_prometheus_service.py,
# test_router_caching.py) are in Phase 3 integration workflows.
test-path: >-
tests/local_testing/test_dual_cache.py
tests/local_testing/test_redis_batch_optimizations.py
tests/local_testing/test_router_utils.py
workers: 2
reruns: 2
timeout-minutes: 20
enable-redis: true
enable-postgres: false
secrets:
REDIS_HOST: ${{ secrets.REDIS_HOST }}
REDIS_PORT: ${{ secrets.REDIS_PORT }}
REDIS_PASSWORD: ${{ secrets.REDIS_PASSWORD }}
DATABASE_URL: ${{ secrets.DATABASE_URL }}
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}

View File

@ -0,0 +1,20 @@
name: "Unit Tests: Core Utilities"
on:
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
core-utils:
uses: ./.github/workflows/_test-unit-base.yml
with:
test-path: "tests/test_litellm/litellm_core_utils"
workers: 2
reruns: 1

View File

@ -0,0 +1,67 @@
name: "Unit Tests: Documentation Validation"
on:
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
documentation:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
- name: Install Poetry
run: pip install 'poetry==2.3.2'
- name: Cache Poetry dependencies
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: |
~/.cache/pypoetry
~/.cache/pip
.venv
key: ${{ runner.os }}-poetry-${{ hashFiles('poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install dependencies
run: |
poetry config virtualenvs.in-project true
poetry install --with dev,proxy-dev --extras "proxy semantic-router"
poetry run pip install google-genai==1.22.0 \
google-cloud-aiplatform==1.115.0 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core==0.23.0
- name: Setup litellm-enterprise
run: |
poetry run pip install --force-reinstall --no-deps -e enterprise/
- name: Generate Prisma client
env:
PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
run: |
poetry run pip install nodejs-wheel-binaries==24.13.1
poetry run prisma generate --schema litellm/proxy/schema.prisma
# Run the same documentation tests that CircleCI ran (as direct Python scripts)
- name: Run documentation validation tests
run: |
poetry run python ./tests/documentation_tests/test_env_keys.py
poetry run python ./tests/documentation_tests/test_router_settings.py
poetry run python ./tests/documentation_tests/test_api_docs.py
poetry run python ./tests/documentation_tests/test_circular_imports.py

View File

@ -0,0 +1,24 @@
name: "Unit Tests: Enterprise, Google GenAI & Routing"
on:
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
enterprise-routing:
uses: ./.github/workflows/_test-unit-base.yml
with:
test-path: >-
tests/test_litellm/enterprise
tests/test_litellm/google_genai
tests/test_litellm/router_utils
tests/test_litellm/router_strategy
workers: 2
reruns: 2

View File

@ -0,0 +1,20 @@
name: "Unit Tests: Integrations (Callbacks & Logging)"
on:
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
integrations:
uses: ./.github/workflows/_test-unit-base.yml
with:
test-path: "tests/test_litellm/integrations"
workers: 2
reruns: 3

View File

@ -0,0 +1,29 @@
name: "Unit Tests: LLM Provider Transformations"
on:
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
vertex-ai:
name: Vertex AI
uses: ./.github/workflows/_test-unit-base.yml
with:
test-path: "tests/test_litellm/llms/vertex_ai"
workers: 1
reruns: 2
other-providers:
name: All Other Providers
uses: ./.github/workflows/_test-unit-base.yml
with:
test-path: "tests/test_litellm/llms --ignore=tests/test_litellm/llms/vertex_ai"
workers: 2
reruns: 2

31
.github/workflows/test-unit-misc.yml vendored Normal file
View File

@ -0,0 +1,31 @@
name: "Unit Tests: MCP, Secrets, Containers & Misc"
on:
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
misc:
uses: ./.github/workflows/_test-unit-base.yml
with:
test-path: >-
tests/test_litellm/secret_managers
tests/test_litellm/a2a_protocol
tests/test_litellm/anthropic_interface
tests/test_litellm/completion_extras
tests/test_litellm/containers
tests/test_litellm/experimental_mcp_client
tests/test_litellm/images
tests/test_litellm/interactions
tests/test_litellm/passthrough
tests/test_litellm/vector_stores
tests/test_litellm/test_*.py
workers: 2
reruns: 2

View File

@ -0,0 +1,20 @@
name: "Unit Tests: Proxy Auth & Key Management"
on:
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
proxy-auth:
uses: ./.github/workflows/_test-unit-base.yml
with:
test-path: "tests/test_litellm/proxy/auth tests/test_litellm/proxy/hooks tests/test_litellm/proxy/policy_engine tests/test_litellm/proxy/client"
workers: 2
reruns: 2

View File

@ -0,0 +1,45 @@
name: "Unit Tests: Proxy DB Operations"
# Uses DATABASE_URL secret — only runs on trusted branches, not PRs.
on:
push:
branches: [main, "litellm_*"]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
proxy-db:
strategy:
fail-fast: false
matrix:
include:
# Key generation tests must NOT run in parallel (event loop conflicts with logging worker)
- test-group: key-generation
test-path: "tests/proxy_unit_tests/test_key_generate_prisma.py"
workers: 0
timeout: 30
- test-group: auth-checks
test-path: "tests/proxy_unit_tests/test_auth_checks.py tests/proxy_unit_tests/test_user_api_key_auth.py"
workers: 8
timeout: 20
- test-group: remaining
test-path: "tests/proxy_unit_tests --ignore=tests/proxy_unit_tests/test_key_generate_prisma.py --ignore=tests/proxy_unit_tests/test_auth_checks.py --ignore=tests/proxy_unit_tests/test_user_api_key_auth.py"
workers: 8
timeout: 20
uses: ./.github/workflows/_test-unit-services-base.yml
with:
test-path: ${{ matrix.test-path }}
workers: ${{ matrix.workers }}
reruns: 2
timeout-minutes: ${{ matrix.timeout }}
enable-redis: false
enable-postgres: true
secrets:
DATABASE_URL: ${{ secrets.DATABASE_URL }}
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}

View File

@ -0,0 +1,35 @@
name: "Unit Tests: Proxy API Endpoints"
on:
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
proxy-endpoints:
uses: ./.github/workflows/_test-unit-base.yml
with:
test-path: >-
tests/test_litellm/proxy/management_endpoints
tests/test_litellm/proxy/guardrails
tests/test_litellm/proxy/management_helpers
tests/test_litellm/proxy/anthropic_endpoints
tests/test_litellm/proxy/google_endpoints
tests/test_litellm/proxy/openai_files_endpoint
tests/test_litellm/proxy/response_api_endpoints
tests/test_litellm/proxy/image_endpoints
tests/test_litellm/proxy/vector_store_endpoints
tests/test_litellm/proxy/agent_endpoints
tests/test_litellm/proxy/discovery_endpoints
tests/test_litellm/proxy/health_endpoints
tests/test_litellm/proxy/public_endpoints
tests/test_litellm/proxy/prompts
tests/test_litellm/proxy/ui_crud_endpoints
workers: 2
reruns: 2

View File

@ -0,0 +1,28 @@
name: "Unit Tests: Proxy Infrastructure"
on:
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
proxy-infra:
uses: ./.github/workflows/_test-unit-base.yml
with:
test-path: >-
tests/test_litellm/proxy/db
tests/test_litellm/proxy/middleware
tests/test_litellm/proxy/spend_tracking
tests/test_litellm/proxy/pass_through_endpoints
tests/test_litellm/proxy/_experimental
tests/test_litellm/proxy/experimental
tests/test_litellm/proxy/common_utils
tests/test_litellm/proxy/test_*.py
workers: 2
reruns: 2

View File

@ -0,0 +1,96 @@
name: "Unit Tests: Proxy Legacy Tests"
on:
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
test-group:
- name: "auth-and-jwt"
path: "tests/proxy_unit_tests/test_[a-j]*.py"
- name: "key-generation"
path: "tests/proxy_unit_tests/test_[k-o]*.py"
- name: "proxy-config"
path: "tests/proxy_unit_tests/test_prisma*.py tests/proxy_unit_tests/test_project*.py tests/proxy_unit_tests/test_prompt*.py tests/proxy_unit_tests/test_proxy_[c-r]*.py"
- name: "proxy-server"
path: "tests/proxy_unit_tests/test_proxy_server.py"
- name: "proxy-server-extras"
path: "tests/proxy_unit_tests/test_proxy_server_*.py tests/proxy_unit_tests/test_proxy_setting_guardrails.py"
- name: "proxy-utils"
path: "tests/proxy_unit_tests/test_proxy_utils.py"
- name: "proxy-token-counter"
path: "tests/proxy_unit_tests/test_proxy_token_counter.py"
- name: "proxy-response-and-misc"
path: "tests/proxy_unit_tests/test_[r-t]*.py"
- name: "proxy-user-auth-and-spend"
path: "tests/proxy_unit_tests/test_[u-z]*.py"
name: ${{ matrix.test-group.name }}
steps:
- uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: "3.12"
- name: Install Poetry
run: pip install 'poetry==2.3.2'
- name: Cache Poetry dependencies
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
with:
path: |
~/.cache/pypoetry
~/.cache/pip
.venv
key: ${{ runner.os }}-poetry-${{ hashFiles('poetry.lock') }}
restore-keys: |
${{ runner.os }}-poetry-
- name: Install dependencies
run: |
poetry config virtualenvs.in-project true
poetry install --with dev,proxy-dev --extras "proxy semantic-router"
poetry run pip install google-genai==1.22.0 \
google-cloud-aiplatform==1.115.0 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core==0.23.0
- name: Setup litellm-enterprise
run: |
poetry run pip install --force-reinstall --no-deps -e enterprise/
- name: Generate Prisma client
env:
PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
run: |
poetry run pip install nodejs-wheel-binaries==24.13.1
poetry run prisma generate --schema litellm/proxy/schema.prisma
- name: Run tests - ${{ matrix.test-group.name }}
env:
TEST_PATH: ${{ matrix.test-group.path }}
run: |
poetry run pytest ${TEST_PATH} \
--tb=short -vv \
--maxfail=10 \
-n 2 \
--reruns 1 \
--reruns-delay 1 \
--dist=loadscope \
--durations=20

View File

@ -0,0 +1,20 @@
name: "Unit Tests: Responses, Caching & Types"
on:
pull_request:
branches: [main]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
responses-caching-types:
uses: ./.github/workflows/_test-unit-base.yml
with:
test-path: "tests/test_litellm/responses tests/test_litellm/caching tests/test_litellm/types"
workers: 2
reruns: 2

View File

@ -0,0 +1,28 @@
name: "Unit Tests: Security"
# Uses DATABASE_URL secret — only runs on trusted branches, not PRs.
on:
push:
branches: [main, "litellm_*"]
permissions:
contents: read
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
security:
uses: ./.github/workflows/_test-unit-services-base.yml
with:
test-path: "tests/proxy_security_tests/"
workers: 1
reruns: 2
timeout-minutes: 20
enable-redis: false
enable-postgres: true
secrets:
DATABASE_URL: ${{ secrets.DATABASE_URL }}
POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}

View File

@ -17,13 +17,15 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12
- name: Build Docker image
uses: docker/build-push-action@v5
uses: docker/build-push-action@0adf9959216b96bec444f325f1e493d4aa344497 #v6.14
with:
context: .
file: ./docker/Dockerfile.non_root

31
.github/workflows/zizmor.yml vendored Normal file
View File

@ -0,0 +1,31 @@
name: GitHub Actions Security Analysis
on:
push:
branches: [main]
pull_request:
branches: [main]
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
permissions: {}
jobs:
zizmor:
name: zizmor
runs-on: ubuntu-latest
timeout-minutes: 5
permissions:
security-events: write
contents: read
actions: read
steps:
- name: Checkout repository
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
with:
persist-credentials: false
- name: Run zizmor
uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2

View File

@ -14,12 +14,12 @@ repos:
types: [python]
files: (litellm/|litellm_proxy_extras/|enterprise/).*\.py
exclude: ^litellm/__init__.py$
# - id: black
# name: black
# entry: poetry run black
# language: system
# types: [python]
# files: (litellm/|litellm_proxy_extras/|enterprise/).*\.py
- id: black
name: black
entry: poetry run black
language: system
types: [python]
files: (litellm/|litellm_proxy_extras/).*\.py
- repo: https://github.com/pycqa/flake8
rev: 7.0.0 # The version of flake8 to use
hooks:

View File

@ -0,0 +1,18 @@
rules:
- id: no-claude-directory-committed
message: >
.claude/ directory must not be committed to the repository.
It contains local Claude Code settings (permissions, worktree paths) that are
developer-machine-specific and may expose internal paths or credentials.
Add .claude/ to .gitignore instead.
severity: ERROR
languages: [generic]
paths:
include:
- "/.claude/**"
- "/.claude/*"
pattern-regex: '[\s\S]+'
metadata:
category: security
tags: [supply-chain, secrets]
confidence: HIGH

View File

@ -266,6 +266,7 @@ Support for more providers. Missing a provider or LLM Platform, raise a [feature
<table>
<tr>
<td><img height="60" alt="Stripe" src="https://github.com/user-attachments/assets/f7296d4f-9fbd-460d-9d05-e4df31697c4b" /></td>
<td><img height="60" alt="image" src="https://github.com/user-attachments/assets/436fca71-988b-40bb-b5fe-8450c80fdbd0" /></td>
<td><img height="60" alt="Google ADK" src="https://github.com/user-attachments/assets/caf270a2-5aee-45c4-8222-41a2070c4f19" /></td>
<td><img height="60" alt="Greptile" src="https://github.com/user-attachments/assets/0be4bd8a-7cfa-48d3-9090-f415fe948280" /></td>
<td><img height="60" alt="OpenHands" src="https://github.com/user-attachments/assets/a6150c4c-149e-4cae-888b-8b92be6e003f" /></td>
@ -402,7 +403,7 @@ Support for more providers. Missing a provider or LLM Platform, raise a [feature
# Enterprise
For companies that need better security, user management and professional support
[Talk to founders](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions)
[Talk to founders](https://enterprise.litellm.ai/demo)
This covers:
- ✅ **Features under the [LiteLLM Commercial License](https://docs.litellm.ai/docs/proxy/enterprise):**
@ -452,7 +453,6 @@ All these checks must pass before your PR can be merged.
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- [Community Slack 💭](https://www.litellm.ai/support)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
# Why did we build this

View File

@ -10,13 +10,13 @@ echo "Starting security scans for LiteLLM..."
# Function to install Trivy and required tools
install_trivy() {
echo "Installing Trivy and required tools..."
TRIVY_VERSION="0.35.0"
sudo apt-get update
sudo apt-get install -y wget apt-transport-https gnupg lsb-release jq curl bsdmainutils
wget -qO - https://aquasecurity.github.io/trivy-repo/deb/public.key | sudo apt-key add -
echo "deb https://aquasecurity.github.io/trivy-repo/deb $(lsb_release -sc) main" | sudo tee -a /etc/apt/sources.list.d/trivy.list
sudo apt-get update
sudo apt-get install trivy
echo "Trivy and required tools installed successfully"
sudo apt-get install -y wget jq curl bsdmainutils
wget -qO trivy.deb "https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/trivy_${TRIVY_VERSION}_Linux-64bit.deb"
sudo dpkg -i trivy.deb
rm trivy.deb
echo "Trivy ${TRIVY_VERSION} installed successfully"
}
# Function to install Grype
@ -163,6 +163,9 @@ run_grype_scans() {
"CVE-2026-25639" # axios - full fix requires 1.x major version bump; pinned to >=0.30.2 to clear other axios CVEs, upgrade to 1.x in follow-up
"CVE-2026-2297" # Python 3.13 SourcelessFileLoader audit hook bypass - no fix available in base image
"GHSA-qffp-2rhf-9h96" # tar hardlink path traversal - from nodejs_wheel bundled npm, not used in application runtime code
"CVE-2026-2673" # OpenSSL 3.6.1 TLS 1.3 key exchange group negotiation issue - no fix available yet
"CVE-2026-3644" # Python 3.13 vulnerability - no fix available in base image
"CVE-2026-4224" # Python 3.13 Expat parser stack overflow in ElementDeclHandler - no fix available in base image
)
# Build JSON array of allowlisted CVE IDs for jq

View File

@ -178,4 +178,4 @@ Benchmark Results for 'When will BerriAI IPO?':
```
## Support
**🤝 Schedule a 1-on-1 Session:** Book a [1-on-1 session](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions) with Krrish and Ishaan, the founders, to discuss any issues, provide feedback, or explore how we can improve LiteLLM for you.
**🤝 Schedule a 1-on-1 Session:** Book a [1-on-1 session](https://enterprise.litellm.ai/demo) with Krrish and Ishaan, the founders, to discuss any issues, provide feedback, or explore how we can improve LiteLLM for you.

View File

@ -143,7 +143,6 @@ All responses from the server are returned in the following format (for all LLM
- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
## Roadmap

View File

@ -164,7 +164,6 @@ All responses from the server are returned in the following format (for all LLM
- [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
## Roadmap

View File

@ -0,0 +1,55 @@
---
slug: ci-cd-v2-improvements
title: "Announcing CI/CD v2 for LiteLLM"
date: 2026-03-30T21:30:00
authors:
- krrish
description: "CI/CD v2 introduces isolated environments, stronger security gates, and safer release separation for LiteLLM."
tags: [engineering, ci-cd, security]
hide_table_of_contents: false
---
import Image from '@theme/IdealImage';
The CI/CD v2 is now live for LiteLLM.
<Image
img={require('../../img/ci_cd_architecture.png')}
style={{width: '700px', height: 'auto', display: 'block'}}
/>
<br/>
Building on the roadmap from our [security incident](https://docs.litellm.ai/blog/security-townhall-updates#roadmap), CI/CD v2 introduces isolated environments, stronger security gates, and safer release separation for LiteLLM.
## What changed
- Security scans and unit tests run in isolated environments.
- Validation and release are separated into different repositories, making it harder for an attacker to reach release credentials.
- Trusted Publishing for PyPI releases - this means no long-lived credentials are used to publish releases.
- Immutable Docker release tags - this means no tampering of Docker release tags after they are published [Learn more](https://docs.docker.com/docker-hub/repos/manage/hub-images/immutable-tags/). Note: work for GHCR docker releases is planned as well.
## What's next
Moving forward, we plan on:
- Adopting OpenSSF (this is a set of security criteria that projects should meet to demonstrate a strong security posture - [Learn more](https://baseline.openssf.org/versions/2026-02-19.html))
- We've added Scorecard and Allstar to our Github
- Adding SLSA Build Provenance to our CI/CD pipeline - this means we allow users to independently verify that a release came from us and prevent silent modifications of releases after they are published.
We hope that this will mean you can be confident that the releases you are using are safe and from us.
## The principle
The new CI/CD pipeline reflects the principles, outlined below, and is designed to be more secure and reliable:
- **Limit** what each package can access
- **Reduce** the number of sensitive environment variables
- **Avoid** compromised packages
- **Prevent** release tampering
## How to help:
Help us plan April's stability sprint - https://github.com/BerriAI/litellm/issues/24825

View File

@ -0,0 +1,106 @@
---
slug: gpt_5_4_mini_nano
title: "Day 0 Support: GPT-5.4-mini and GPT-5.4-nano"
date: 2026-03-17T10:00:00
authors:
- name: Sameer Kankute
title: SWE @ LiteLLM (LLM Translation)
url: https://www.linkedin.com/in/sameer-kankute/
image_url: https://pbs.twimg.com/profile_images/2001352686994907136/ONgNuSk5_400x400.jpg
- name: Krrish Dholakia
title: "CEO, LiteLLM"
url: https://www.linkedin.com/in/krish-d/
image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg
- name: Ishaan Jaff
title: "CTO, LiteLLM"
url: https://www.linkedin.com/in/reffajnaahsi/
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
description: "GPT-5.4-mini and GPT-5.4-nano model support in LiteLLM"
tags: [openai, gpt-5.4-mini, gpt-5.4-nano, completion]
hide_table_of_contents: false
---
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
LiteLLM now supports GPT-5.4-mini and GPT-5.4-nano — cost-effective models for simple completions and high-throughput workloads.
:::note
If you're on **v1.82.3-stable** or above, you don't need any update to use these models.
:::
## Usage
<Tabs>
<TabItem value="proxy" label="LiteLLM Proxy">
**1. Setup config.yaml**
```yaml
model_list:
- model_name: gpt-5.4-mini
litellm_params:
model: openai/gpt-5.4-mini
api_key: os.environ/OPENAI_API_KEY
- model_name: gpt-5.4-nano
litellm_params:
model: openai/gpt-5.4-nano
api_key: os.environ/OPENAI_API_KEY
```
**2. Start the proxy**
```bash
litellm --config /path/to/config.yaml
```
**3. Test it**
```bash
# GPT-5.4-mini
curl -X POST "http://localhost:4000/v1/chat/completions" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $LITELLM_KEY" \
-d '{
"model": "gpt-5.4-mini",
"messages": [{"role": "user", "content": "What is the capital of France?"}]
}'
# GPT-5.4-nano
curl -X POST "http://localhost:4000/v1/chat/completions" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $LITELLM_KEY" \
-d '{
"model": "gpt-5.4-nano",
"messages": [{"role": "user", "content": "What is 2 + 2?"}]
}'
```
</TabItem>
<TabItem value="sdk" label="LiteLLM SDK">
```python
from litellm import completion
# GPT-5.4-mini
response = completion(
model="openai/gpt-5.4-mini",
messages=[{"role": "user", "content": "What is the capital of France?"}],
)
print(response.choices[0].message.content)
# GPT-5.4-nano
response = completion(
model="openai/gpt-5.4-nano",
messages=[{"role": "user", "content": "What is 2 + 2?"}],
)
print(response.choices[0].message.content)
```
</TabItem>
</Tabs>
## Notes
- Both models support function calling, vision, and tool-use — see the [OpenAI provider docs](../../docs/providers/openai) for advanced usage.
- GPT-5.4-nano is the most cost-effective option for simple tasks; GPT-5.4-mini offers a balance of speed and capability.

View File

@ -0,0 +1,78 @@
---
slug: guardrail-logging-secret-exposure-incident
title: "Incident Report: Guardrail logging exposed secret headers in spend logs and traces"
date: 2026-03-18T10:00:00
authors:
- litellm
tags: [incident-report, security, guardrails]
hide_table_of_contents: false
---
**Date:** March 18, 2026
**Duration:** Unknown
**Severity:** High
**Status:** Resolved
## Summary
When a custom guardrail returned the full LiteLLM request/data dictionary, the guardrail response logged by LiteLLM could include `secret_fields.raw_headers`, including plaintext `Authorization` headers containing API keys or other credentials.
This information could then propagate to logging and observability surfaces that consume guardrail metadata, including:
- **Spend logs in the LiteLLM UI:** visible to admins with access to spend-log data
- **OpenTelemetry traces:** visible to anyone with access to the relevant telemetry backend
LLM calls, proxy routing, and provider execution were not blocked by this bug. The impact was exposure of sensitive request headers in observability and logging paths.
{/* truncate */}
---
## Background
LiteLLM keeps internal request data (including request headers) for use during the call. That data is not meant to be written to logs or telemetry.
When custom guardrails run, their outcomes are logged so they can appear in spend logs, OpenTelemetry traces, and other observability backends. If a guardrail returned the full request payload instead of a minimal result, that internal request data could be included in what was logged. Before the fix, the guardrail logging path did not strip that data before sending it to those systems.
```mermaid
flowchart TD
inboundRequest["1. Incoming proxy request"] --> storeSecrets["2. Store internal request data"]
storeSecrets --> guardrailRuns["3. Custom guardrail runs"]
guardrailRuns --> fullDataReturn["4. Guardrail returns full request payload"]
fullDataReturn --> loggingBuild["5. Build guardrail log payload"]
loggingBuild --> spendLogs["6a. Persist to spend logs / UI"]
loggingBuild --> otelTraces["6b. Attach to OTEL guardrail spans"]
```
---
## Root Cause
The root cause was incomplete sanitization in the guardrail logging path. When building the payload that gets sent to spend logs and traces, LiteLLM prepared guardrail responses for logging but did not strip internal request data (such as headers) from them. If a guardrail returned a response that included that data, it was passed through to the logging and observability systems unchanged.
---
## Impact
This issue required all of the following:
1. A custom guardrail returned the full LiteLLM request/data dictionary, or another response object containing `secret_fields`.
2. LiteLLM logged that guardrail response through the standard guardrail logging path.
3. An operator, admin, or telemetry consumer had access to the resulting logs or traces.
When those conditions were met, sensitive values could become visible through:
- **Spend logs / UI responses:** guardrail metadata could be included in spend-log payloads rendered in the admin UI.
- **OpenTelemetry traces:** `guardrail_response` could be written as a span attribute on guardrail spans.
- **Other downstream observability backends:** any integration consuming the same guardrail metadata could receive the leaked values.
This was a logging and telemetry exposure bug. It did not let callers bypass auth, access other tenants directly, or change model behavior, but it could expose plaintext credentials to people with access to those observability systems.
---
## Guidance For Users
- Upgrade to LiteLLM 1.82.3+.
- If you operated custom guardrails that return the full request/data dict, review whether spend logs or telemetry traces were retained during the affected period.
- Rotate any credentials that may have appeared in `Authorization` or other forwarded request headers in those systems.
- Apply least-privilege access controls to spend-log views and telemetry backends that may contain request-derived metadata.

View File

@ -0,0 +1,190 @@
---
slug: security-townhall-updates
title: "Security Townhall Updates"
date: 2026-03-27T12:00:00
authors:
- krrish
- ishaan-alt
description: "What happened, what we've done, and what comes next for LiteLLM's release and security processes."
tags: [security, incident-report]
hide_table_of_contents: false
---
import Image from '@theme/IdealImage';
Thank you to everyone who joined our town hall.
We wanted to use that time to walk through what we know, what we've done so far, and how we're improving LiteLLM's release and security processes going forward. This post is a written version of that update. [Slides available here](https://drive.google.com/file/d/17hsSG7nk-OYL7VRCTbTa7McrWREtS9OO/view?usp=sharing)
{/* truncate */}
## What happened
On March 24, 2026 at 10:39 UTC, LiteLLM v1.82.7 was pushed to PyPI. Version v1.82.8 was published soon after. Those packages were live for about 40 minutes before being quarantined by PyPI. By 16:00 UTC, the LiteLLM team had worked with PyPI to delete the affected packages.
At this point, our understanding is that this was a supply-chain incident affecting those two published versions.
## How did this happen?
Our understanding is that the issue came from the [compromised Trivy security scanner](https://www.aquasec.com/blog/trivy-supply-chain-attack-what-you-need-to-know/) dependency in our CI/CD pipeline.
<Image
img={require('../../img/shared_ci_cd_environment.png')}
style={{width: '500px', height: '400px', display: 'block'}}
/>
There were three major contributing factors:
### 1. Shared CI/CD environment
At the time, everything was running on CircleCI, and all steps shared a common environment. That increased blast radius: if one component was compromised, it could potentially access credentials or context intended for other parts of the pipeline.
### 2. Static credentials in environment variables
Release credentials, including credentials for PyPI, GHCR, and Docker publishing, were available as static secrets in the environment. That meant a compromised step could access long-lived release credentials.
### 3. Unpinned Trivy dependency
In our security scanning component, we had an unpinned Trivy dependency. Our present understanding is that a compromised Trivy package ran during the scan, had access to environment variables, and enabled attackers to obtain those credentials.
**In summary:** a compromised package in CI had access to secrets it should not have had, and those secrets were then used in the release path.
## What we've already done
In the last 3 days, we've taken the following steps:
### 1. Minimize Scope of Impact
#### Prevented further key abuse
We deleted or rotated all impacted or adjacent secret keys, including PyPI, GitHub, Docker, and related credentials. Out of an abundance of caution, we've also rotated LiteLLM maintainer accounts.
#### Prevent branch attacks
We removed roughly 6,000 open branches and added an auto-deletion policy for branches merged into `main`. This reduces the surface area for branch-based abuse.
#### Pinned CI/CD dependencies
We've pinned all Github Actions, and are working on pinning all CircleCI dependencies as well.
#### Paused releases
We've paused new releases until we've confirmed codebase security and put stronger release controls in place.
### 2. Secured LiteLLM
#### Forensic analysis
We are working with Google's Mandiant cybersecurity team to confirm the source of the attack and verify the security of the codebase. We also confirmed that no malicious code was pushed to `main`.
#### Confirm Application Security
In parallel, we are working with whitehat hackers at [Veria Labs](https://verialabs.com/) to verify application security and review improvements to our CI/CD process.
We have also confirmed that the last 20 LiteLLM releases contain no indicators of compromise, and that no unauthenticated attacks can be made against LiteLLM Proxy based on our current investigation. [Check Security Blog for release verification.](https://docs.litellm.ai/blog/security-update-march-2026#verified-safe-versions)
#### Created a security working group
We created a new security working group inside LiteLLM focused on:
- Building threat models
- Auditing the build process and dependencies
If you're interested in joining the security working group, please file an issue [here](https://github.com/BerriAI/litellm-security-wg).
### 3. Improved CI/CD
We've already begun making structural changes to how releases are built and published. These align with our goals (covered in the next section) around isolated environments, ephemeral credentials, and release auditing.
## Roadmap
We plan on following 4 guiding principles for our new CI/CD pipeline:
1. **Limit** what each package can access
2. **Reduce** the number of sensitive environment variables
3. **Avoid** compromised packages
4. **Prevent** release tampering
### Isolated environments
<Image
img={require('../../img/isolated_ci_cd_environments.png')}
style={{width: '400px', height: 'auto'}}
/>
We are breaking our CI/CD into 4 semantic concepts:
1. Unit tests
2. Integration tests
3. Security scans
4. Release publishing
And will be running each of these in isolated environments.
This will limit the damage that any single compromised component can cause.
### Ephemeral credentials
We plan to move to ephemeral credentials for PyPI (Trusted Publisher) and GHCR (Token-based authentication) releases. This will reduce the risk of credentials being leaked or compromised.
We have already begun doing this:
- PyPI Trusted Publisher on GitHub Actions [PR](https://github.com/BerriAI/litellm/pull/24654)
- GHCR Token-based authentication on GitHub Actions [PR](https://github.com/BerriAI/litellm/pull/24683)
### Release auditing
Our goal is to allow users to independently verify that a release came from us and prevent silent modifications of releases after they are published.
This will ensure, your releases are safe, even when:
- Stolen PyPI/GHCR credentials are used to publish malicious releases
- Tampered registry artifacts are published
- Tag mutations are made after the release is published
We believe that [Cosign](https://github.com/sigstore/cosign) is a good fit for this, and have already begun working on it [PR](https://github.com/BerriAI/litellm/pull/24683).
### Avoid Compromised Packages
- Move to pinned, verified SHAs for packages and actions used in CI/CD, avoiding `latest` wherever possible.
- Add a cooldown period before upgrading to a new version of a package - allows more time to investigate and verify the new version.
We've added zizmor to help us catch issues such as unpinned dependencies and credential leakage. [commit](https://github.com/BerriAI/litellm/commit/a671275f5c5b0e1fb1adacdf3b6ef779aaa5d56c).
## Frequently Asked Questions
**Q: Did you observe any lateral movement into your corporate environment during this incident?**
A: No. Our investigation to date, conducted in coordination with external security experts, has found no evidence of lateral movement into our internal corporate systems. The incident was isolated to the CI/CD pipeline and the release path for specific versions (v1.82.7 and v1.82.8). As a proactive measure, we have rotated all potentially impacted or adjacent secrets—including PyPI, GitHub, and Docker credentials—and updated maintainer account security to ensure continued isolation.
**Q: Do you expect delays in future product releases due to these new security measures?**
A: We are committed to balancing security with speed. While we have temporarily paused releases to implement stronger controls, we are moving quickly to automate our new security protocols. We are currently implementing isolated CI/CD environments, ephemeral credentials (via Trusted Publishers), and release auditing with Cosign. These improvements are designed to be integrated into our automated pipeline, allowing us to maintain a fast release cadence while ensuring every package is verified and secure.
**Q: Were older packages impacted?**
Our current findings show no indicators of compromise in the last 20 versions of LiteLLM. This was manually verified by our team and independently reviewed by Veria Labs.
We have also published the verified versions for users to use. [Check Security Blog for release verification.](https://docs.litellm.ai/blog/security-update-march-2026#verified-safe-versions)
## Questions & Support
If you believe your systems may be affected, contact us immediately:
- **Security:** security@berri.ai
- **Support:** support@berri.ai
- **Slack:** Reach out to the LiteLLM team directly [here](https://join.slack.com/t/litellmossslack/shared_invite/zt-3o7nkuyfr-p_kbNJj8taRfXGgQI1~YyA)
## Hiring
We are currently hiring for:
- DevOps Engineer - to keep ci/cd secure and running smoothly
- Security Engineer - to keep the application secure
If you're interest in joining, please apply [here](https://jobs.ashbyhq.com/litellm)

Binary file not shown.

After

Width:  |  Height:  |  Size: 45 KiB

View File

@ -0,0 +1,786 @@
---
slug: security-update-march-2026
title: "Security Update: Suspected Supply Chain Incident"
date: 2026-03-24T14:00:00
authors:
- krrish
- ishaan-alt
description: "As of 2:00 PM ET on March 24, 2026"
tags: [security, incident-report]
hide_table_of_contents: false
---
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import VersionVerificationTable from '@site/src/components/VersionVerificationTable';
> **Status:** Active investigation
> **Last updated:** March 27, 2026
> **Update (March 30):** A new **clean** version of LiteLLM is now available (v1.83.0). This was released by our new [CI/CD v2](https://docs.litellm.ai/blog/ci-cd-v2-improvements) pipeline which added isolated environments, stronger security gates, and safer release separation for LiteLLM.
> **Update (March 27):** Review Townhall updates, including explanation of the incident, what we've done, and what comes next. [Learn more](https://docs.litellm.ai/blog/security-townhall-updates)
> **Update (March 27):** Added [Verified safe versions](#verified-safe-versions) section with SHA-256 checksums for all audited PyPI and Docker releases.
> **Update (March 26):** Added `checkmarx[.]zone` to [Indicators of compromise](#indicators-of-compromise-iocs)
> **Update (March 25):** Added community-contributed scripts for scanning GitHub Actions and GitLab CI pipelines for the compromised versions. See [How to check if you are affected](#how-to-check-if-you-are-affected). s/o [@Zach Fury](https://www.linkedin.com/in/fryware/) for these scripts.
## TLDR;
- The compromised PyPI packages were **litellm==1.82.7** and **litellm==1.82.8**. Those packages were live on March 24, 2026 from 10:39 UTC for about 40 minutes before being quarantined by PyPI.
- We believe that the compromise originated from the [Trivy dependency](https://www.aquasec.com/blog/trivy-supply-chain-attack-what-you-need-to-know/) used in our CI/CD security scanning workflow.
- Customers running the official LiteLLM Proxy Docker image were not impacted. That deployment path pins dependencies in requirements.txt and does not rely on the compromised PyPI packages.
- ~~We have paused all new LiteLLM releases until we complete a broader supply-chain review and confirm the release path is safe.~~ **Updated:** We have now released a new **safe** version of LiteLLM (v1.83.0) by our new [CI/CD v2](https://docs.litellm.ai/blog/ci-cd-v2-improvements) pipeline which added isolated environments, stronger security gates, and safer release separation for LiteLLM. We have also verified the codebase is safe and no malicious code was pushed to `main`.
## Overview
LiteLLM AI Gateway is investigating a suspected supply chain attack involving unauthorized PyPI package publishes. Current evidence suggests a maintainer's PyPI account may have been compromised and used to distribute malicious code.
At this time, we believe this incident may be linked to the broader [Trivy security compromise](https://www.aquasec.com/blog/trivy-supply-chain-attack-what-you-need-to-know/), in which stolen credentials were reportedly used to gain unauthorized access to the LiteLLM publishing pipeline.
This investigation is ongoing. Details below may change as we confirm additional findings.
## Confirmed affected versions
The following LiteLLM versions published to PyPI were impacted:
- **v1.82.7**: contained a malicious payload in the LiteLLM AI Gateway `proxy_server.py`
- **v1.82.8**: contained `litellm_init.pth` and a malicious payload in the LiteLLM AI Gateway `proxy_server.py`
If you installed or ran either of these versions, review the recommendations below immediately.
Note: These versions have already been removed from PyPI.
## What happened
Initial evidence suggests the attacker bypassed official CI/CD workflows and uploaded malicious packages directly to PyPI.
These compromised versions appear to have included a credential stealer designed to:
- Harvest secrets by scanning for:
- environment variables
- SSH keys
- cloud provider credentials (AWS, GCP, Azure)
- Kubernetes tokens
- database passwords
- Encrypt and exfiltrate data via a `POST` request to `models.litellm.cloud`, which is **not** an official BerriAI / LiteLLM domain
## Who is affected
You may be affected if **any** of the following are true:
- You installed or upgraded LiteLLM via `pip` on **March 24, 2026**, between **10:39 UTC and 16:00 UTC**
- You ran `pip install litellm` without pinning a version and received **v1.82.7** or **v1.82.8**
- You built a Docker image during this window that included `pip install litellm` without a pinned version
- A dependency in your project pulled in LiteLLM as a transitive, unpinned dependency
(for example through AI agent frameworks, MCP servers, or LLM orchestration tools)
You are **not** affected if any of the following are true:
**LiteLLM AI Gateway/Proxy users:** Customers running the official LiteLLM Proxy Docker image were not impacted. That deployment path pins dependencies in requirements.txt and does not rely on the compromised PyPI packages.
- You are using **LiteLLM Cloud**
- You are using the official LiteLLM AI Gateway Docker image: `ghcr.io/berriai/litellm`
- You are on **v1.82.6 or earlier** and did not upgrade during the affected window
- You installed LiteLLM from source via the GitHub repository, which was **not** compromised
### How to check if you are affected
<Tabs>
<TabItem value="sdk" label="SDK">
```bash
pip show litellm
```
</TabItem>
<TabItem value="proxy" label="PROXY">
Go to the proxy base url, and check the version of the installed LiteLLM.
![Proxy version check](../../img/security_update_march_2026/proxy_version.png)
</TabItem>
<TabItem value="github" label="GitHub Actions">
Scans all repositories in a GitHub organization for workflow jobs that installed the compromised versions.
**Requirements:** Python 3 and `requests` (`pip install requests`).
**Setup:**
```bash
export GITHUB_TOKEN="your-github-pat"
```
**Run:**
```bash
python find_litellm_github.py
```
Set the `ORG` variable in the script to your GitHub organization name.
Both scripts default to scanning jobs from **today**. Adjust the `WINDOW_START` and `WINDOW_END` constants to cover **March 24, 2026** (the incident date) if running on a different day.
<details>
<summary>View full script (find_litellm_github.py)</summary>
```python
#!/usr/bin/env python3
"""
Scan all GitHub Actions jobs in a GitHub org that ran between
0800-1244 UTC today and identify any that installed litellm 1.82.7 or 1.82.8.
Adjust WINDOW_START / WINDOW_END to cover March 24, 2026 if running later.
"""
import io
import os
import re
import sys
import zipfile
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timezone
import requests
GITHUB_URL = "https://api.github.com"
ORG = "your-org" # <-- set to your GitHub organization
TOKEN = os.environ.get("GITHUB_TOKEN", "")
TODAY = datetime.now(timezone.utc).date()
WINDOW_START = datetime(TODAY.year, TODAY.month, TODAY.day, 8, 0, 0, tzinfo=timezone.utc)
WINDOW_END = datetime(TODAY.year, TODAY.month, TODAY.day, 12, 44, 0, tzinfo=timezone.utc)
TARGET_VERSIONS = {"1.82.7", "1.82.8"}
VERSION_PATTERN = re.compile(r"litellm[=\-](\d+\.\d+\.\d+)", re.IGNORECASE)
SESSION = requests.Session()
SESSION.headers.update({
"Authorization": f"Bearer {TOKEN}",
"Accept": "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
})
def get_paginated(url, params=None):
params = dict(params or {})
params.setdefault("per_page", 100)
page = 1
while True:
params["page"] = page
resp = SESSION.get(url, params=params, timeout=30)
if resp.status_code == 404:
return
resp.raise_for_status()
data = resp.json()
if isinstance(data, dict):
items = next((v for v in data.values() if isinstance(v, list)), [])
else:
items = data
if not items:
break
yield from items
if len(items) < params["per_page"]:
break
page += 1
def parse_ts(ts_str):
if not ts_str:
return None
return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
def get_repos():
repos = []
for r in get_paginated(f"{GITHUB_URL}/orgs/{ORG}/repos", {"type": "all"}):
repos.append({"id": r["id"], "name": r["name"], "full_name": r["full_name"]})
return repos
def get_runs_in_window(repo_full_name):
created_filter = (
f"{WINDOW_START.strftime('%Y-%m-%dT%H:%M:%SZ')}"
f"..{WINDOW_END.strftime('%Y-%m-%dT%H:%M:%SZ')}"
)
url = f"{GITHUB_URL}/repos/{repo_full_name}/actions/runs"
runs = []
for run in get_paginated(url, {"created": created_filter, "per_page": 100}):
ts = parse_ts(run.get("run_started_at") or run.get("created_at"))
if ts and WINDOW_START <= ts <= WINDOW_END:
runs.append(run)
return runs
def get_jobs_for_run(repo_full_name, run_id):
url = f"{GITHUB_URL}/repos/{repo_full_name}/actions/runs/{run_id}/jobs"
jobs = []
for job in get_paginated(url, {"filter": "all"}):
ts = parse_ts(job.get("started_at"))
if ts and WINDOW_START <= ts <= WINDOW_END:
jobs.append(job)
return jobs
def fetch_job_log(repo_full_name, job_id):
url = f"{GITHUB_URL}/repos/{repo_full_name}/actions/jobs/{job_id}/logs"
resp = SESSION.get(url, timeout=60, allow_redirects=True)
if resp.status_code in (403, 404, 410):
return ""
resp.raise_for_status()
content_type = resp.headers.get("Content-Type", "")
if "zip" in content_type or resp.content[:2] == b"PK":
try:
with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
parts = []
for name in sorted(zf.namelist()):
with zf.open(name) as f:
parts.append(f.read().decode("utf-8", errors="replace"))
return "\n".join(parts)
except zipfile.BadZipFile:
pass
return resp.text
def check_job(repo_full_name, job):
job_id = job["id"]
job_name = job["name"]
run_id = job["run_id"]
started = job.get("started_at", "")
log_text = fetch_job_log(repo_full_name, job_id)
if not log_text:
return None
found_versions = set()
context_lines = []
for line in log_text.splitlines():
m = VERSION_PATTERN.search(line)
if m:
ver = m.group(1)
if ver in TARGET_VERSIONS:
found_versions.add(ver)
context_lines.append(line.strip())
if not found_versions:
return None
return {
"repo": repo_full_name,
"run_id": run_id,
"job_id": job_id,
"job_name": job_name,
"started_at": started,
"versions": sorted(found_versions),
"context": context_lines[:10],
"job_url": job.get("html_url", f"https://github.com/{repo_full_name}/actions/runs/{run_id}"),
}
def main():
if not TOKEN:
print("ERROR: Set GITHUB_TOKEN environment variable.", file=sys.stderr)
sys.exit(1)
print(f"Time window : {WINDOW_START.isoformat()} -> {WINDOW_END.isoformat()}")
print(f"Hunting for : litellm {', '.join(sorted(TARGET_VERSIONS))}")
print()
print(f"Fetching repositories for org '{ORG}'...")
repos = get_repos()
print(f" Found {len(repos)} repositories")
print()
jobs_to_check = []
print("Scanning workflow runs for time window...")
for repo in repos:
full_name = repo["full_name"]
try:
runs = get_runs_in_window(full_name)
except requests.HTTPError as e:
print(f" WARN: {full_name} - {e}", file=sys.stderr)
continue
if not runs:
continue
print(f" {full_name}: {len(runs)} run(s) in window")
for run in runs:
try:
jobs = get_jobs_for_run(full_name, run["id"])
except requests.HTTPError as e:
print(f" WARN: run {run['id']} - {e}", file=sys.stderr)
continue
for job in jobs:
jobs_to_check.append((full_name, job))
total = len(jobs_to_check)
print(f"\nFetching logs for {total} job(s)...")
print()
hits = []
with ThreadPoolExecutor(max_workers=8) as pool:
futures = {
pool.submit(check_job, full_name, job): (full_name, job["id"])
for full_name, job in jobs_to_check
}
done = 0
for future in as_completed(futures):
done += 1
full_name, jid = futures[future]
try:
result = future.result()
except Exception as e:
print(f" ERROR {full_name} job {jid}: {e}", file=sys.stderr)
continue
if result:
hits.append(result)
print(
f" [{done}/{total}] {full_name} job {jid}" +
(f" *** HIT: litellm {result['versions']} ***" if result else ""),
flush=True,
)
print()
print("=" * 72)
print(f"RESULTS: {len(hits)} job(s) installed litellm {' or '.join(sorted(TARGET_VERSIONS))}")
print("=" * 72)
if not hits:
print("No matches found.")
return
for h in sorted(hits, key=lambda x: x["started_at"]):
print()
print(f" Repo : {h['repo']}")
print(f" Job : {h['job_name']} (#{h['job_id']})")
print(f" Run ID : {h['run_id']}")
print(f" Started : {h['started_at']}")
print(f" Versions : litellm {', '.join(h['versions'])}")
print(f" URL : {h['job_url']}")
print(f" Log lines :")
for line in h["context"]:
print(f" {line}")
if __name__ == "__main__":
main()
```
</details>
</TabItem>
<TabItem value="gitlab" label="GitLab CI">
Scans all projects in a GitLab group (including subgroups) for CI/CD jobs that installed the compromised versions.
**Requirements:** Python 3 and `requests` (`pip install requests`).
**Setup:**
```bash
export GITLAB_TOKEN="your-gitlab-pat"
```
**Run:**
```bash
python find_litellm_jobs.py
```
Set the `GROUP_NAME` variable in the script to your GitLab group name.
Both scripts default to scanning jobs from **today**. Adjust the `WINDOW_START` and `WINDOW_END` constants to cover **March 24, 2026** (the incident date) if running on a different day.
<details>
<summary>View full script (find_litellm_jobs.py)</summary>
```python
#!/usr/bin/env python3
"""
Scan all GitLab CI/CD jobs in a GitLab group that ran between
0800-1244 UTC today and identify any that installed litellm 1.82.7 or 1.82.8.
Adjust WINDOW_START / WINDOW_END to cover March 24, 2026 if running later.
"""
import os
import re
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime, timezone
import requests
GITLAB_URL = "https://gitlab.com"
GROUP_NAME = "YourGroup" # <-- set to your GitLab group name
TOKEN = os.environ.get("GITLAB_TOKEN", "")
TODAY = datetime.now(timezone.utc).date()
WINDOW_START = datetime(TODAY.year, TODAY.month, TODAY.day, 8, 0, 0, tzinfo=timezone.utc)
WINDOW_END = datetime(TODAY.year, TODAY.month, TODAY.day, 12, 44, 0, tzinfo=timezone.utc)
TARGET_VERSIONS = {"1.82.7", "1.82.8"}
VERSION_PATTERN = re.compile(r"litellm[=\-](\d+\.\d+\.\d+)", re.IGNORECASE)
HEADERS = {"PRIVATE-TOKEN": TOKEN}
SESSION = requests.Session()
SESSION.headers.update(HEADERS)
def get_paginated(url, params=None):
params = dict(params or {})
params.setdefault("per_page", 100)
page = 1
while True:
params["page"] = page
resp = SESSION.get(url, params=params, timeout=30)
resp.raise_for_status()
data = resp.json()
if not data:
break
yield from data
if len(data) < params["per_page"]:
break
page += 1
def get_group_id(group_name):
resp = SESSION.get(f"{GITLAB_URL}/api/v4/groups/{group_name}", timeout=30)
resp.raise_for_status()
return resp.json()["id"]
def get_all_projects(group_id):
projects = []
for p in get_paginated(
f"{GITLAB_URL}/api/v4/groups/{group_id}/projects",
{"include_subgroups": "true", "archived": "false"},
):
projects.append({"id": p["id"], "name": p["path_with_namespace"]})
return projects
def parse_ts(ts_str):
if not ts_str:
return None
ts_str = ts_str.replace("Z", "+00:00")
return datetime.fromisoformat(ts_str)
def jobs_in_window(project_id):
matching = []
url = f"{GITLAB_URL}/api/v4/projects/{project_id}/jobs"
params = {"per_page": 100, "scope[]": ["success", "failed", "canceled", "running"]}
page = 1
while True:
params["page"] = page
resp = SESSION.get(url, params=params, timeout=30)
if resp.status_code == 403:
return matching
resp.raise_for_status()
jobs = resp.json()
if not jobs:
break
stop_early = False
for job in jobs:
ts = parse_ts(job.get("started_at") or job.get("created_at"))
if ts is None:
continue
if ts > WINDOW_END:
continue
if ts < WINDOW_START:
stop_early = True
continue
matching.append(job)
if stop_early or len(jobs) < 100:
break
page += 1
return matching
def fetch_trace(project_id, job_id):
url = f"{GITLAB_URL}/api/v4/projects/{project_id}/jobs/{job_id}/trace"
resp = SESSION.get(url, timeout=60)
if resp.status_code in (403, 404):
return ""
resp.raise_for_status()
return resp.text
def check_job(project_name, project_id, job):
job_id = job["id"]
job_name = job["name"]
ref = job.get("ref", "")
started = job.get("started_at", job.get("created_at", ""))
trace = fetch_trace(project_id, job_id)
if not trace:
return None
found_versions = set()
for match in VERSION_PATTERN.finditer(trace):
ver = match.group(1)
if ver in TARGET_VERSIONS:
found_versions.add(ver)
if not found_versions:
return None
context_lines = []
for line in trace.splitlines():
if VERSION_PATTERN.search(line):
ver_match = VERSION_PATTERN.search(line)
if ver_match and ver_match.group(1) in TARGET_VERSIONS:
context_lines.append(line.strip())
return {
"project": project_name,
"project_id": project_id,
"job_id": job_id,
"job_name": job_name,
"ref": ref,
"started_at": started,
"versions": sorted(found_versions),
"context": context_lines[:10],
"job_url": f"{GITLAB_URL}/{project_name}/-/jobs/{job_id}",
}
def main():
if not TOKEN:
print("ERROR: Set GITLAB_TOKEN environment variable.", file=sys.stderr)
sys.exit(1)
print(f"Time window : {WINDOW_START.isoformat()} -> {WINDOW_END.isoformat()}")
print(f"Hunting for : litellm {', '.join(sorted(TARGET_VERSIONS))}")
print()
print(f"Resolving group '{GROUP_NAME}'...")
group_id = get_group_id(GROUP_NAME)
print("Fetching projects...")
projects = get_all_projects(group_id)
print(f" Found {len(projects)} projects")
print()
all_jobs_to_check = []
print("Scanning job listings for time window...")
for proj in projects:
try:
jobs = jobs_in_window(proj["id"])
except requests.HTTPError as e:
print(f" WARN: {proj['name']} - {e}", file=sys.stderr)
continue
if jobs:
print(f" {proj['name']}: {len(jobs)} job(s) in window")
for j in jobs:
all_jobs_to_check.append((proj["name"], proj["id"], j))
total = len(all_jobs_to_check)
print(f"\nFetching traces for {total} job(s)...")
print()
hits = []
with ThreadPoolExecutor(max_workers=10) as pool:
futures = {
pool.submit(check_job, pname, pid, job): (pname, job["id"])
for pname, pid, job in all_jobs_to_check
}
done = 0
for future in as_completed(futures):
done += 1
pname, jid = futures[future]
try:
result = future.result()
except Exception as e:
print(f" ERROR checking {pname} job {jid}: {e}", file=sys.stderr)
continue
if result:
hits.append(result)
print(f" [{done}/{total}] checked {pname} job {jid}" +
(f" *** HIT: litellm {result['versions']} ***" if result else ""),
flush=True)
print()
print("=" * 72)
print(f"RESULTS: {len(hits)} job(s) installed litellm {' or '.join(sorted(TARGET_VERSIONS))}")
print("=" * 72)
if not hits:
print("No matches found.")
return
for h in sorted(hits, key=lambda x: x["started_at"]):
print()
print(f" Project : {h['project']}")
print(f" Job : {h['job_name']} (#{h['job_id']})")
print(f" Branch/tag: {h['ref']}")
print(f" Started : {h['started_at']}")
print(f" Versions : litellm {', '.join(h['versions'])}")
print(f" URL : {h['job_url']}")
print(f" Log lines :")
for line in h["context"]:
print(f" {line}")
if __name__ == "__main__":
main()
```
</details>
</TabItem>
</Tabs>
*CI/CD scripts contributed by the community ([original gist](https://gist.github.com/fryz/93ec8d4898ffe5b5ac5706a208823ef3)). Review before running.*
## Indicators of compromise (IoCs)
Review affected systems for the following indicators:
- `litellm_init.pth` present in your `site-packages`
- Outbound traffic or requests to `models.litellm[.]cloud`
This domain is **not** affiliated with LiteLLM
- Outbound traffic or requests to `checkmarx[.]zone`
This domain is **not** affiliated with LiteLLM
## Immediate actions for affected users
If you installed or ran **v1.82.7** or **v1.82.8**, take the following actions immediately.
### 1. Rotate all secrets
Treat any credentials present on the affected systems as compromised, including:
- API keys
- Cloud access keys
- Database passwords
- SSH keys
- Kubernetes tokens
- Any secrets stored in environment variables or configuration files
### 2. Inspect your filesystem
Check your `site-packages` directory for a file named `litellm_init.pth`:
```bash
find /usr/lib/python3.13/site-packages/ -name "litellm_init.pth"
```
If present:
- remove it immediately
- investigate the host for further compromise
- preserve relevant artifacts if your security team is performing forensics
### 3. Audit version history
Review your:
- Local environments
- CI/CD pipelines
- Docker builds
- Deployment logs
Confirm whether **v1.82.7** or **v1.82.8** was installed anywhere.
Pin LiteLLM to a known safe version such as **v1.82.6 or earlier**, or to a later verified release once announced.
## Response and remediation
The LiteLLM AI Gateway team has already taken the following steps:
- Removed compromised packages from PyPI
- Rotated maintainer credentials and established new authorized maintainers
- Engaged Google's Mandiant security team to assist with forensic analysis of the build and publishing chain
## Verified safe versions
We have audited every LiteLLM release published between v1.78.0 and v1.82.6 across both PyPI and Docker. Each artifact was verified by:
1. Downloading the published artifact and computing its SHA-256 digest
2. Scanning for the known [indicators of compromise](#indicators-of-compromise-iocs) (IOCs)
3. Comparing the artifact contents against the corresponding Git commit in the BerriAI/litellm repository
**All versions listed below are confirmed clean.**
<Tabs>
<TabItem value="pypi" label="PyPI Releases">
<VersionVerificationTable entries={[
{ version: "1.82.6", sha256: "164a3ef3e19f309e3cabc199bef3d2045212712fefdfa25fc7f75884a5b5b205", gitCommit: "38d477507dad" },
{ version: "1.82.5", sha256: "e1012ab816352215c4e00776dd48b0c68058b537888a8ff82cca62af19e6fb11", gitCommit: "1998c4f3703f" },
{ version: "1.82.4", sha256: "d37c34a847e7952a146ed0e2888a24d3edec7787955c6826337395e755ad5c4b", gitCommit: "cfeafbe38811" },
{ version: "1.82.3", sha256: "609901f6c5a5cf8c24386e4e3f50738bb8a9db719709fd76b208c8ee6d00f7a7", gitCommit: "61409275c8d8" },
{ version: "1.82.2", sha256: "641ed024774fa3d5b4dd9347f0efb1e31fa422fba2a6500aabedee085d1194cb", gitCommit: "f351bbdb3683" },
{ version: "1.82.1", sha256: "a9ec3fe42eccb1611883caaf8b1bf33c9f4e12163f94c7d1004095b14c379eb2", gitCommit: "94b002066e3a" },
{ version: "1.82.0", sha256: "5496b5d4532cccdc7a095c21cbac4042f7662021c57bc1d17be4e39838929e80", gitCommit: "6c6585af568e" },
{ version: "1.81.16", sha256: "d6bcc13acbd26719e07bfa6b9923740e88409cbf1f9d626d85fc9ae0e0eec88c", gitCommit: "678200ee4887" },
{ version: "1.81.15", sha256: "2fa253658702509ce09fe0e172e5a47baaadf697fb0f784c7fd4ff665ae76ae1", gitCommit: "2e819656cee9" },
{ version: "1.81.14", sha256: "6394e61bbdef7121e5e3800349f6b01e9369e7cf611e034f1832750c481abfed", gitCommit: "96bcee0b0af7" },
{ version: "1.81.13", sha256: "ae4aea2a55e85993f5f6dd36d036519422d24812a1a3e8540d9e987f2d7a4304", gitCommit: "cc957a19a560" },
{ version: "1.81.12", sha256: "219cf9729e5ea30c6d3f75aa43fef3c56a717369939a6d717cbad0fd78e3c146", gitCommit: "ba0d541b1982" },
{ version: "1.81.11", sha256: "06a66c24742e082ddd2813c87f40f5c12fe7baa73ce1f9457eaf453dc44a0f65", gitCommit: "231aedeeff7e" },
{ version: "1.81.10", sha256: "9efa1cbe61ac051f6500c267b173d988ff2d511c2eecf1c8f2ee546c0870747c", gitCommit: "7488abece8e7" },
{ version: "1.81.9", sha256: "24ee273bc8a62299fbb754035f83fb7d8d44329c383701a2bd034f4fd1c19084", gitCommit: "a09d3e9162eb" },
{ version: "1.81.8", sha256: "78cca92f36bc6c267c191d1fe1e2630c812bff6daec32c58cade75748c2692f6", gitCommit: "4fea649f519b" },
{ version: "1.81.7", sha256: "58466c88c3289c6a3830d88768cf8f307581d9e6c87861de874d1128bb2de90d", gitCommit: "3f6a281d0f7a" },
{ version: "1.81.6", sha256: "573206ba194d49a1691370ba33f781671609ac77c35347f8a0411d852cf6341a", gitCommit: "8da3a93e6e63" },
{ version: "1.81.5", sha256: "206505c5a0c6503e465154b9c979772be3ede3f5bf746d15b37dca5ae54d239f", gitCommit: "2cc3778761d4" },
{ version: "1.81.3", sha256: "3f60fd8b727587952ad3dd18b68f5fed538d6f43d15bb0356f4c3a11bccb2b92", gitCommit: "f30742fe6e8e" },
]} />
</TabItem>
<TabItem value="docker" label="Docker Images">
<VersionVerificationTable entries={[
{ version: "1.82.3", sha256: "0a571da849db5f9c3cf3fead2ffbf1df982eebff7e7b38b46dbec3f640dafdbb", gitCommit: "61409275c8d8" },
{ version: "1.82.3-stable", sha256: "0c2b2a0ad3e50af1702fc493ecd07f22a5180b6d1cfb169440b429b40e340e29", gitCommit: "61409275c8d8" },
{ version: "1.82.0-stable", sha256: "71bf7283767ca436edcfa9f1f26c1743487b5fa29736c61c3eb6977776007c42", gitCommit: "97947c254252" },
{ version: "1.81.15", sha256: "303c31af87e7915e7b34d6c4d55a6ac753ef947a5deaa899e9ccfd3d1d58f7c2", gitCommit: "20bf3aa8070a" },
{ version: "1.81.14-stable", sha256: "a34f9758048231817d799b703fb998e40e2a5cbabb89ab95039fc30798f01b3c", gitCommit: "0435375b1271" },
{ version: "1.81.13", sha256: "a876f3f22f9b6fd481c9091c44a8a893d81c172d66dc2749298dcd3dc4a3d6f0", gitCommit: "cc957a19a560" },
{ version: "1.81.12-stable", sha256: "e24022878ccc87f57d808ac9304f18b87b8359e6556746d81cc20a5dc85f423a", gitCommit: "ba0d541b1982" },
{ version: "1.81.9-stable", sha256: "262e53d7702ed82579717faff0b08f7c0b7e9973a6406cfcc0e4af7826327627", gitCommit: "a09d3e9162eb" },
{ version: "1.81.3-stable", sha256: "dff82ccc32fb648927c090607887401c7e8ec814fe7c951beb95fe51073ca02b", gitCommit: "61ed8f9e0355" },
{ version: "1.81.0-stable", sha256: "f4913297d1bb3dc373eb8911a5ac816b597be9b5e08a91636b6c2786dd572aa8", gitCommit: "790a5ce0b323" },
{ version: "1.80.15-stable", sha256: "0b4ec3861e978b4aa254f4070f292cd345496a5fb59c72e1ee21cd6db94b670b", gitCommit: "17c8d8d109b5" },
{ version: "1.80.11-stable", sha256: "4068108d9101cd2affba3924310fd7f34f23d14e36dd4853733898b9e04d81ca", gitCommit: "57e07bddd341" },
{ version: "1.80.8-stable", sha256: "0304c2eb1f3cf54262d1b4e0629487232bab459e95b99a21e5810231d2b27021", gitCommit: "3381d63152f8" },
{ version: "1.80.5-stable", sha256: "a89e173135fff96af4b5b91ea31845164eadcf6497c82adeb64c36a23c8a3d11", gitCommit: "6c49b95a4ab7" },
{ version: "1.80.0-stable", sha256: "a3416f4cd0c896c94a1f526d872ff6c19bee22ff4afcdcc6f9ff690707900176", gitCommit: "98365205acd0" },
{ version: "1.79.3-stable", sha256: "27aae83d6ab6cb0b63bf8179e375ce0e11f5cfef51f2675b0c1e60c6f546dbc1", gitCommit: "c0548542d4a9" },
{ version: "1.79.1-stable", sha256: "7780d29a9543c4ce762430db7dfb0640105f7357fc38e35bf3fb7bbb1e6ba63f", gitCommit: "c217bddb59ba" },
{ version: "1.79.0-stable", sha256: "32bf6ac059a56641e11e4712f63b8467c295f988b6c160dc7229660417ee44bd", gitCommit: "8d495f56a9cc" },
{ version: "1.78.5-stable", sha256: "d5e607648eafa15edc63b0b1a5ed01f8b31a1fa0c80f7d25b252ae18a593ee29", gitCommit: "c471bf1f16c2" },
{ version: "1.78.0-stable", sha256: "7a56b32dc7153763d31c0a056123dc878a598959935d8c7daacb1fca5272c205", gitCommit: "5fde83d9f154" },
]} />
</TabItem>
</Tabs>
## Questions and support
If you believe your systems may be affected, contact us immediately:
- **Security:** `security@berri.ai`
- **Support:** `support@berri.ai`
- **Slack:** Reach out to the LiteLLM team directly
For real-time updates, follow [LiteLLM (YC W23) on X](https://x.com/LiteLLM).

View File

@ -0,0 +1,18 @@
---
slug: vanta-compliance-recertification
title: "LiteLLM + Vanta: SOC 2 Type 2 and ISO 27001 Recertification"
date: 2026-03-30T10:00:00
authors:
- krrish
description: "LiteLLM is partnering with Vanta on SOC 2 Type 2 and ISO 27001 recertification and engaging independent auditors for verification."
tags: [security, compliance]
hide_table_of_contents: true
---
![LiteLLM x Vanta SOC-2 Recertification](/img/blog/vanta_soc2_recertification.png)
We are partnering with [Vanta](https://www.vanta.com/) to recertify LiteLLM's compliance for SOC 2 Type 2 and ISO 27001.
As part of this process, we are also identifying independent auditors to validate and verify our compliance posture.
This is part of our commitment to being the most secure and transparent AI Gateway possible.

View File

@ -506,12 +506,15 @@ Request body will be in the Anthropic messages API format. **litellm follows the
A system prompt providing context or specific instructions to the model.
- **temperature** (number):
Controls randomness in the model's responses. Valid range: `0 < temperature < 1`.
- **thinking** (object):
- **thinking** (object):
Configuration for enabling extended thinking. If enabled, it includes:
- **budget_tokens** (integer):
- **budget_tokens** (integer):
Minimum of 1024 tokens (and less than `max_tokens`).
- **type** (enum):
- **type** (enum):
E.g., `"enabled"`.
- **summary** (string, optional):
Enables the summary style for thinking blocks. Possible values: `"auto"`, `"concise"`, `"detailed"`, `"disabled"`.
When routing to non-Anthropic providers (e.g., `openai/gpt-5.1`), the `summary` value is preserved and forwarded to the downstream API.
- **tool_choice** (object):
Instructs how the model should utilize any provided tools.
- **tools** (array of objects):

View File

@ -6,6 +6,8 @@ import TabItem from '@theme/TabItem';
Supported Providers:
- OpenAI (`openai/`)
- Anthropic API (`anthropic/`)
- Google AI Studio (`gemini/`)
- Vertex AI (`vertex_ai/`, `vertex_ai_beta/`)
- Bedrock (`bedrock/`, `bedrock/invoke/`, `bedrock/converse`) ([All models bedrock supports prompt caching on](https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html))
- Deepseek API (`deepseek/`)
@ -257,7 +259,7 @@ Anthropic charges for cache writes.
Specify the content to cache with `"cache_control": {"type": "ephemeral"}`.
If you pass that in for any other llm provider, it will be ignored.
This same format also works for [Gemini / Vertex AI](#google-ai-studio--vertex-ai-gemini-example). For other providers, it will be ignored.
<Tabs>
<TabItem value="sdk" label="SDK">
@ -356,6 +358,208 @@ print(response.usage)
</TabItem>
</Tabs>
### Google AI Studio / Vertex AI (Gemini) Example
Use the same Anthropic-style `cache_control` format — LiteLLM automatically translates it to Google's [context caching API](https://ai.google.dev/api/caching).
**How it works under the hood:**
1. Messages with `cache_control` are separated and sent to Google's `cachedContents` API
2. The cached content ID is then passed as `cachedContent` in the Gemini request body
3. Works across all three providers: `gemini/` (Google AI Studio), `vertex_ai/`, and `vertex_ai_beta/`
4. Requires a minimum of **1024 tokens** in the cached content — below that, caching is silently skipped
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import completion
import os
os.environ["GEMINI_API_KEY"] = ""
response = completion(
model="gemini/gemini-2.5-flash",
messages=[
{
"role": "system",
"content": [
{
"type": "text",
"text": "You are an AI assistant tasked with analyzing legal documents.",
},
{
"type": "text",
"text": "Here is the full text of a complex legal agreement" * 400,
"cache_control": {"type": "ephemeral"},
},
],
},
{
"role": "user",
"content": "what are the key terms and conditions in this agreement?",
},
],
)
print(response.usage)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: gemini-2.5-flash
litellm_params:
model: gemini/gemini-2.5-flash
api_key: os.environ/GEMINI_API_KEY
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
```python
from openai import OpenAI
client = OpenAI(
api_key="LITELLM_PROXY_KEY", # sk-1234
base_url="LITELLM_PROXY_BASE", # http://0.0.0.0:4000
)
response = client.chat.completions.create(
model="gemini-2.5-flash",
messages=[
{
"role": "system",
"content": [
{
"type": "text",
"text": "You are an AI assistant tasked with analyzing legal documents.",
},
{
"type": "text",
"text": "Here is the full text of a complex legal agreement" * 400,
"cache_control": {"type": "ephemeral"},
},
],
},
{
"role": "user",
"content": "what are the key terms and conditions in this agreement?",
},
],
)
print(response.usage)
```
</TabItem>
</Tabs>
#### Vertex AI
For Vertex AI, use `vertex_ai/` prefix:
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import completion
response = completion(
model="vertex_ai/gemini-2.5-flash",
vertex_project="my-gcp-project",
vertex_location="us-central1",
messages=[
{
"role": "system",
"content": [
{
"type": "text",
"text": "You are an AI assistant tasked with analyzing legal documents.",
},
{
"type": "text",
"text": "Here is the full text of a complex legal agreement" * 400,
"cache_control": {"type": "ephemeral"},
},
],
},
{
"role": "user",
"content": "what are the key terms and conditions in this agreement?",
},
],
)
print(response.usage)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: gemini-2.5-flash
litellm_params:
model: vertex_ai/gemini-2.5-flash
vertex_project: my-gcp-project
vertex_location: us-central1
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
```python
from openai import OpenAI
client = OpenAI(
api_key="LITELLM_PROXY_KEY", # sk-1234
base_url="LITELLM_PROXY_BASE", # http://0.0.0.0:4000
)
response = client.chat.completions.create(
model="gemini-2.5-flash",
messages=[
{
"role": "system",
"content": [
{
"type": "text",
"text": "You are an AI assistant tasked with analyzing legal documents.",
},
{
"type": "text",
"text": "Here is the full text of a complex legal agreement" * 400,
"cache_control": {"type": "ephemeral"},
},
],
},
{
"role": "user",
"content": "what are the key terms and conditions in this agreement?",
},
],
)
print(response.usage)
```
</TabItem>
</Tabs>
### Deepeek Example
Works the same as OpenAI.

View File

@ -128,8 +128,6 @@ We'll review all reports promptly. Note that we don't currently offer a bug boun
Legal Entity Name: Berrie AI Incorporated
Company Phone Number: 7708783106
Point of contact email address for security incidents: krrish@berri.ai
Point of contact email address for general security-related questions: krrish@berri.ai

View File

@ -67,6 +67,6 @@ response = completion("command-nightly", messages, logger_fn=my_custom_logging_f
## Still Seeing Issues?
Text us @ +17708783106 or Join the [Discord](https://discord.com/invite/wuPM9dRgDw).
Join the [Discord](https://discord.com/invite/wuPM9dRgDw).
We promise to help you in `lite`ning speed ❤️

View File

@ -4,7 +4,7 @@ import Image from '@theme/IdealImage';
:::info
- ✨ SSO is free for up to 5 users. After that, an enterprise license is required. [Get Started with Enterprise here](https://www.litellm.ai/enterprise)
- Who is Enterprise for? Companies giving access to 100+ users **OR** 10+ AI use-cases. If you're not sure, [get in touch with us](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions) to discuss your needs.
- Who is Enterprise for? Companies giving access to 100+ users **OR** 10+ AI use-cases. If you're not sure, [get in touch with us](https://enterprise.litellm.ai/demo) to discuss your needs.
:::
For companies that need SSO, user management and professional support for LiteLLM Proxy
@ -36,7 +36,7 @@ Manage Yourself - you can deploy our Docker Image or build a custom image from o
### Whats the cost of the Self-Managed Enterprise edition?
Self-Managed Enterprise deployments require our team to understand your exact needs. [Get in touch with us to learn more](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions)
Self-Managed Enterprise deployments require our team to understand your exact needs. [Get in touch with us to learn more](https://enterprise.litellm.ai/demo)
### How does deployment with Enterprise License work?
@ -106,7 +106,7 @@ Professional Support can assist with LLM/Provider integrations, deployment, upgr
Pricing is based on usage. We can figure out a price that works for your team, on the call.
[**Contact Us to learn more**](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions)
[**Contact Us to learn more**](https://enterprise.litellm.ai/demo)

View File

@ -6,7 +6,7 @@ import TabItem from '@theme/TabItem';
:::info
This is an Enterprise only endpoint [Get Started with Enterprise here](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions)
This is an Enterprise only endpoint [Get Started with Enterprise here](https://enterprise.litellm.ai/demo)
:::

View File

@ -11,7 +11,7 @@ Use this path if you are integrating LiteLLM directly into application code.
## 1. Install LiteLLM
```bash
pip install litellm
pip install litellm==1.82.6
```
## 2. Set Provider Credentials

View File

@ -31,5 +31,4 @@ When we have breaking changes (i.e. going from 1.x.x to 2.x.x), we will document
**How can we communicate changes better?**
Tell us
- [Discord](https://discord.com/invite/wuPM9dRgDw)
- Email (krrish@berri.ai/ishaan@berri.ai)
- Text us (+17708783106)
- Email (support@berri.ai)

View File

@ -194,5 +194,4 @@ print(response)
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -6,7 +6,7 @@ Log LLM Logs to [Google Cloud Storage Buckets](https://cloud.google.com/storage?
:::info
✨ This is an Enterprise only feature [Get Started with Enterprise here](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions)
✨ This is an Enterprise only feature [Get Started with Enterprise here](https://enterprise.litellm.ai/demo)
:::
@ -79,5 +79,4 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -342,5 +342,4 @@ Be aware that if you are continuing an existing trace, and you set `update_trace
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -225,5 +225,4 @@ environment_variables:
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -63,5 +63,4 @@ response = litellm.completion(
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -176,5 +176,4 @@ You can find more details about the different ways of making requests to the Lit
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -261,5 +261,4 @@ All requests made with this key will automatically be tracked in the "TestProjec
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -127,5 +127,4 @@ Depending on which Phoenix Cloud version or deployment you are using, you should
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -84,5 +84,4 @@ Credits to [Nick Bradford](https://github.com/nsbradford), from [Vim-GPT](https:
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -101,5 +101,4 @@ response = litellm.completion(
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -328,5 +328,4 @@ If you get authentication errors, regenerate the HTTP Source URL in Sumo Logic:
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -105,5 +105,4 @@ litellm.modify_integration("supabase",{"table_name": "litellm_logs"})
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -57,5 +57,4 @@ response = litellm.completion(
- [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
- [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
- Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

View File

@ -0,0 +1,48 @@
---
title: Prompt Management with Responses API
---
# Prompt Management with Responses API
Use LiteLLM Prompt Management with `/v1/responses` by passing `prompt_id` and optional `prompt_variables`.
## Basic Usage
```bash
curl -X POST "http://localhost:4000/v1/responses" \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o",
"prompt_id": "my-responses-prompt",
"prompt_variables": {"topic": "large language models"},
"input": []
}'
```
## Multi-turn Follow-up in `input`
To send follow-up turns in one request, pass message history in `input`.
```bash
curl -X POST "http://localhost:4000/v1/responses" \
-H "Authorization: Bearer sk-1234" \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-4o",
"prompt_id": "my-responses-prompt",
"prompt_variables": {"topic": "large language models"},
"input": [
{"role": "user", "content": "Topic is LLMs. Start short."},
{"role": "assistant", "content": "Sure, go ahead."},
{"role": "user", "content": "Now give me 3 bullets and include pricing caveat."}
]
}'
```
## Notes
- Prompt template messages are merged with your `input` messages.
- Prompt variable substitution applies to prompt message content.
- Tool call payload fields are not substituted by prompt variables.
- For follow-ups with `previous_response_id`, include `prompt_id` again if you want prompt management applied on that turn.

View File

@ -11,6 +11,7 @@ import TabItem from '@theme/TabItem';
| Provider Doc | [Google AI Studio ↗](https://aistudio.google.com/) |
| API Endpoint for Provider | https://generativelanguage.googleapis.com |
| Supported OpenAI Endpoints | `/chat/completions`, [`/embeddings`](../embedding/supported_embedding#gemini-ai-embedding-models), `/completions`, [`/videos`](./gemini/videos.md), [`/images/edits`](../image_edits.md) |
| Lyria (music) | [Cost map & notes](./gemini/music.md) |
| Pass-through Endpoint | [Supported](../pass_through/google_ai_studio.md) |
<br />
@ -54,6 +55,7 @@ response = completion(
- stream
- tools
- tool_choice
- include_server_side_tool_invocations
- functions
- response_format
- n
@ -856,7 +858,112 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
</TabItem>
</Tabs>
### URL Context
### Context Circulation (Server-Side Tool Combination)
Context circulation allows Gemini 3+ models to combine **built-in tools** (like Google Search) with **your custom functions** in the same request. Without it, Gemini returns an error if you try to use both.
When enabled, Gemini can execute Google Search server-side, use those results to decide whether to call your custom functions, and return the full chain of reasoning.
**How it works:**
1. You pass `include_server_side_tool_invocations=True` along with both Google Search and your function tools
2. Gemini executes server-side tools internally and returns `toolCall`/`toolResponse` parts alongside any `functionCall` parts
3. LiteLLM extracts the server-side invocations into `provider_specific_fields["server_side_tool_invocations"]`
4. On subsequent turns, include the full assistant message in your conversation history — LiteLLM re-injects the server-side parts automatically
<Tabs>
<TabItem value="sdk" label="SDK">
```python
from litellm import completion
response = completion(
model="gemini/gemini-3-flash-preview",
messages=[{"role": "user", "content": "What's the weather in Buenos Aires? If it's raining, schedule a meeting."}],
tools=[
{"type": "web_search_preview"}, # Google Search (server-side)
{
"type": "function",
"function": {
"name": "schedule_meeting",
"description": "Schedule a meeting",
"parameters": {
"type": "object",
"properties": {"reason": {"type": "string"}},
"required": ["reason"],
},
},
},
],
include_server_side_tool_invocations=True,
)
msg = response.choices[0].message
# Server-side tool results are in provider_specific_fields
psf = msg.provider_specific_fields or {}
for invocation in psf.get("server_side_tool_invocations", []):
print(invocation["tool_type"]) # e.g. "GOOGLE_SEARCH_WEB"
print(invocation["id"])
print(invocation["args"]) # e.g. {"queries": ["weather Buenos Aires"]}
print(invocation["response"]) # Search results from Google
# For multi-turn: just append the full message to history
messages.append(msg)
messages.append({"role": "user", "content": "Thanks!"})
# LiteLLM automatically re-injects the server-side parts + thought signatures
response2 = completion(
model="gemini/gemini-3-flash-preview",
messages=messages,
tools=tools,
include_server_side_tool_invocations=True,
)
```
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Setup config.yaml
```yaml
model_list:
- model_name: gemini-3-flash
litellm_params:
model: gemini/gemini-3-flash-preview
api_key: os.environ/GEMINI_API_KEY
```
2. Start Proxy
```bash
$ litellm --config /path/to/config.yaml
```
3. Make Request
```bash
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-d '{
"model": "gemini-3-flash",
"messages": [{"role": "user", "content": "What is the weather in Buenos Aires?"}],
"tools": [
{"type": "web_search_preview"},
{"type": "function", "function": {"name": "schedule_meeting", "description": "Schedule a meeting", "parameters": {"type": "object", "properties": {"reason": {"type": "string"}}}}}
],
"include_server_side_tool_invocations": true
}'
```
</TabItem>
</Tabs>
:::info
- Context circulation requires **Gemini 3+** models
- Server-side tool invocations (`toolCall`/`toolResponse`) are **not** included in `tool_calls` — they are in `provider_specific_fields["server_side_tool_invocations"]` because they were already executed by Google, not by your code
- `thought_signatures` are automatically preserved alongside server-side invocations for multi-turn coherence
:::
### URL Context
<Tabs>
<TabItem value="sdk" label="SDK">

View File

@ -0,0 +1,28 @@
# Gemini — Lyria (music generation)
Google Lyria 3 preview models are listed in LiteLLMs [model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) under the `gemini/` provider for metadata and spend tracking.
| Property | Details |
|----------|---------|
| Provider route | `gemini/` |
| Models | `gemini/lyria-3-clip-preview`, `gemini/lyria-3-pro-preview` |
| Provider docs | [Gemini API pricing / models ↗](https://ai.google.dev/gemini-api/docs/pricing) |
## Models
| Model | Notes |
|-------|--------|
| `gemini/lyria-3-clip-preview` | ~30s clip; paid tier listed as per generated song in Googles pricing |
| `gemini/lyria-3-pro-preview` | Full song; paid tier listed as per generated song in Googles pricing |
Input context limit in the cost map: **131,072** tokens. For modalities, limits, and features, see [Googles Gemini API docs ↗](https://ai.google.dev/gemini-api/docs/models).
## LiteLLM behavior
- **Cost map**: Per-song paid pricing is stored as `output_cost_per_image` on those entries (flat per generation unit). Token-based completion cost may not reflect music billing until a dedicated path exists.
- **API calls**: Use the Gemini API as documented by Google. LiteLLM does not ship a separate `music_generation` helper like Veos `video_generation`.
## Auth
Same as other Gemini API models: `GEMINI_API_KEY` or `GOOGLE_API_KEY`.

View File

@ -581,6 +581,90 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
See [OpenAI Reasoning documentation](https://platform.openai.com/docs/guides/reasoning) for more details on organization verification requirements.
### Multi-turn Conversations with `reasoning_items`
For multi-turn conversations you need `reasoning_items`: structured blocks that include the `encrypted_content` token OpenAI uses to restore reasoning state on the next request. Pass `include=["reasoning.encrypted_content"]` on every call where you want that token returned.
<Tabs>
<TabItem value="non-streaming" label="Non-Streaming">
```python showLineNumbers title="Non-streaming: round-trip reasoning_items"
import litellm
messages = [{"role": "user", "content": "Solve this step by step: 2 + 2"}]
# Turn 1 — get reasoning_items (encrypted_content);
response = litellm.completion(
model="openai/responses/gpt-5-mini",
messages=messages,
reasoning_effort="low",
include=["reasoning.encrypted_content"],
)
assistant_msg = response.choices[0].message
# Turn 2 — pass reasoning_items back; LiteLLM converts to the correct Responses API format
messages.append({
"role": "assistant",
"content": assistant_msg.content,
"reasoning_items": assistant_msg.reasoning_items,
})
messages.append({"role": "user", "content": "Now summarize your reasoning."})
response2 = litellm.completion(
model="openai/responses/gpt-5-mini",
messages=messages,
reasoning_effort="low",
include=["reasoning.encrypted_content"],
)
```
</TabItem>
<TabItem value="streaming" label="Streaming">
`reasoning_items` (with `encrypted_content`) arrive on the final chunk when the full response completes:
```python showLineNumbers title="Streaming: collect and round-trip reasoning_items"
import litellm
messages = [{"role": "user", "content": "Solve this step by step: 2 + 2"}]
collected_content = []
collected_reasoning_items = []
stream = litellm.completion(
model="openai/responses/gpt-5-mini",
messages=messages,
stream=True,
reasoning_effort="low",
include=["reasoning.encrypted_content"],
)
for chunk in stream:
delta = chunk.choices[0].delta
if delta.content:
collected_content.append(delta.content)
if getattr(delta, "reasoning_items", None):
collected_reasoning_items.extend(delta.reasoning_items)
messages.append({
"role": "assistant",
"content": "".join(collected_content),
"reasoning_items": collected_reasoning_items or None,
})
messages.append({"role": "user", "content": "Continue the conversation."})
response2 = litellm.completion(
model="openai/responses/gpt-5-mini",
messages=messages,
reasoning_effort="low",
include=["reasoning.encrypted_content"],
)
```
</TabItem>
</Tabs>
### Verbosity Control for GPT-5 Models
The `verbosity` parameter controls the length and detail of responses from GPT-5 family models. It accepts three values: `"low"`, `"medium"`, or `"high"`.

Some files were not shown because too many files have changed in this diff Show More