Merge branch 'main' into fix-schema-drift

2026-03-31 13:13:10 -07:00 · 2026-03-31 13:13:10 -07:00 · a8e002dbf6
commit a8e002dbf6
parent 93e3c81772 7046a58885
1196 changed files with 45533 additions and 17821 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
--- a/.claude/settings.json
+++ b/.claude/settings.json
@ -1,36 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "Bash(git show:*)",
-      "Bash(git worktree add:*)",
-      "Read(//Users/krrishdholakia/Documents/litellm/**)",
-      "Read(//Users/krrishdholakia/Documents/litellm-claude-code-guardrails/litellm/types/**)",
-      "Read(//Users/krrishdholakia/Documents/litellm-claude-code-guardrails/**)",
-      "Read(//Users/krrishdholakia/Documents/litellm-claude-code-guardrails/litellm/**)",
-      "Bash(python:*)",
-      "Bash(python -c \"\nimport sys; sys.path.insert\\(0, ''.''\\)\nfrom litellm.proxy.guardrails.guardrail_hooks.claude_code.guardrail import ClaudeCodeGuardrail, HOSTED_TOOL_PREFIXES\nprint\\(''HOSTED_TOOL_PREFIXES:'', HOSTED_TOOL_PREFIXES\\)\nprint\\(''ClaudeCodeGuardrail imported OK''\\)\n\")",
-      "Read(//Users/krrishdholakia/Documents/litellm-mcp-jwt-groups/litellm/proxy/**)",
-      "Read(//Users/krrishdholakia/Documents/litellm-mcp-jwt-groups/**)",
-      "Bash(poetry run pytest:*)",
-      "Bash(git add:*)",
-      "Bash(git commit:*)",
-      "Bash(poetry run python:*)",
-      "Bash(poetry run pip:*)",
-      "Bash(git reset:*)",
-      "Bash(git cherry-pick:*)",
-      "Bash(git checkout:*)",
-      "Read(//Users/krrishdholakia/Documents/litellm/litellm/proxy/guardrails/guardrail_hooks/**)",
-      "Read(//Users/krrishdholakia/Documents/**)",
-      "Bash(git -C /Users/krrishdholakia/Documents/litellm-mcp-user-permissions worktree list)",
-      "Bash(ls:*)"
-    ],
-    "additionalDirectories": [
-      "/Users/krrishdholakia/Documents/litellm-mcp-group-plan/plan",
-      "/Users/krrishdholakia/Documents/litellm-claude-code-guardrails/litellm/proxy/guardrails/guardrail_hooks/claude_code",
-      "/Users/krrishdholakia/Documents/litellm-claude-code-guardrails/litellm/types",
-      "/Users/krrishdholakia/Documents/litellm-claude-code-guardrails",
-      "/Users/krrishdholakia/Documents/litellm-mcp-jwt-groups/litellm/proxy",
-      "/Users/krrishdholakia/Documents/litellm-mcp-jwt-groups/tests/test_litellm/proxy/auth"
-    ]
-  }
-}
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@ -1,7 +1,7 @@
 blank_issues_enabled: true
 contact_links:
  - name: Schedule Demo
-    url: https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions
+    url: https://enterprise.litellm.ai/demo
    about: Speak directly with Krrish and Ishaan, the founders, to discuss issues, share feedback, or explore improvements for LiteLLM
  - name: Discord
    url: https://discord.com/invite/wuPM9dRgDw
--- a/.github/actions/helm-oci-chart-releaser/action.yml
+++ b/.github/actions/helm-oci-chart-releaser/action.yml
@ -41,32 +41,54 @@ runs:
  using: composite
  steps:
    - name: Helm | Setup
-      uses: azure/setup-helm@v4
+      uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1
      with:
        version: v3.20.0

    - name: Helm | Login
      shell: bash
-      run: echo ${{ inputs.registry_password }} | helm registry login -u ${{ inputs.registry_username }} --password-stdin ${{ inputs.registry }}
+      env:
+        REGISTRY_PASSWORD: ${{ inputs.registry_password }}
+        REGISTRY_USERNAME: ${{ inputs.registry_username }}
+        REGISTRY: ${{ inputs.registry }}
+      run: echo "$REGISTRY_PASSWORD" | helm registry login -u "$REGISTRY_USERNAME" --password-stdin "$REGISTRY"

    - name: Helm | Dependency
      if: inputs.update_dependencies == 'true'
      shell: bash
-      run: helm dependency update ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }}
+      env:
+        CHART_PATH: ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }}
+      run: helm dependency update "$CHART_PATH"

    - name: Helm | Package
      shell: bash
-      run: helm package ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }} --version ${{ inputs.tag }} --app-version ${{ inputs.app_version }}
+      env:
+        CHART_PATH: ${{ inputs.path == null && format('{0}/{1}', 'charts', inputs.name) || inputs.path }}
+        TAG: ${{ inputs.tag }}
+        APP_VERSION: ${{ inputs.app_version }}
+      run: helm package "$CHART_PATH" --version "$TAG" --app-version "$APP_VERSION"

    - name: Helm | Push
      shell: bash
-      run: helm push ${{ inputs.name }}-${{ inputs.tag }}.tgz oci://${{ inputs.registry }}/${{ inputs.repository }}
+      env:
+        NAME: ${{ inputs.name }}
+        TAG: ${{ inputs.tag }}
+        REGISTRY: ${{ inputs.registry }}
+        REPOSITORY: ${{ inputs.repository }}
+      run: helm push "${NAME}-${TAG}.tgz" "oci://${REGISTRY}/${REPOSITORY}"

    - name: Helm | Logout
      shell: bash
-      run: helm registry logout ${{ inputs.registry }}
+      env:
+        REGISTRY: ${{ inputs.registry }}
+      run: helm registry logout "$REGISTRY"

    - name: Helm | Output
      id: output
      shell: bash
-      run: echo "image=${{ inputs.registry }}/${{ inputs.repository }}/${{ inputs.name }}:${{ inputs.tag }}" >> $GITHUB_OUTPUT
+      env:
+        REGISTRY: ${{ inputs.registry }}
+        REPOSITORY: ${{ inputs.repository }}
+        NAME: ${{ inputs.name }}
+        TAG: ${{ inputs.tag }}
+      run: echo "image=${REGISTRY}/${REPOSITORY}/${NAME}:${TAG}" >> $GITHUB_OUTPUT
--- a/.github/codeql/codeql-config.yml
+++ b/.github/codeql/codeql-config.yml
@ -1,22 +1,21 @@
 name: "LiteLLM CodeQL config"

-# Use security-extended suite instead of security-and-quality to avoid
-# result sets > 2 GiB on this codebase that cause fatal OOM failures.
 queries:
-  - uses: security-extended
+  - uses: security-and-quality

-# These two queries are security queries included in security-extended that
-# individually produce result sets > 2 GiB on this codebase, causing fatal
-# OOM failures. Exclude them as a safety net until CI confirms they no longer
-# OOM; drop these exclusions in a follow-up once verified.
+# Known OOM queries on large Python codebases:
+# CodeQL builds a full data flow graph in memory. These two queries trace
+# sensitive data through every log call / regex pattern, causing combinatorial
+# path explosion on codebases with extensive logging like LiteLLM (>2 GiB
+# result sets). This is a known CodeQL scaling limitation, not a code issue.
+# Re-test periodically as CodeQL improves or the codebase refactors logging.
 query-filters:
  - exclude:
-      id: py/clear-text-logging-sensitive-data  # CWE-312 — > 2 GiB result set
+      id: py/clear-text-logging-sensitive-data  # CWE-312
  - exclude:
-      id: py/polynomial-redos                   # CWE-730 — > 2 GiB result set
+      id: py/polynomial-redos                   # CWE-730

 paths-ignore:
  - tests
  - docs
  - "**/*.md"
-  - litellm/proxy/_experimental/out
--- a/.github/dependabot.yaml
+++ b/.github/dependabot.yaml
@ -4,6 +4,9 @@ updates:
    directory: "/"
    schedule:
      interval: "daily"
+    cooldown:
+      default-days: 7
+      semver-major-days: 14
    groups:
      github-actions:
        patterns:
--- a/.github/workflows/_test-unit-base.yml
+++ b/.github/workflows/_test-unit-base.yml
@ -0,0 +1,96 @@
+name: _Unit Test Base (Reusable)
+
+on:
+  workflow_call:
+    inputs:
+      test-path:
+        description: "Pytest path(s) to run"
+        required: true
+        type: string
+      workers:
+        description: "Number of pytest-xdist workers"
+        required: false
+        type: number
+        default: 2
+      reruns:
+        description: "Number of reruns for flaky tests"
+        required: false
+        type: number
+        default: 2
+      timeout-minutes:
+        description: "Job timeout in minutes"
+        required: false
+        type: number
+        default: 20
+      max-failures:
+        description: "Stop after this many failures"
+        required: false
+        type: number
+        default: 10
+
+permissions:
+  contents: read
+
+jobs:
+  run:
+    name: Run tests
+    runs-on: ubuntu-latest
+    timeout-minutes: ${{ inputs.timeout-minutes }}
+
+    steps:
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false
+
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: "3.12"
+
+      - name: Install Poetry
+        run: pip install 'poetry==2.3.2'
+
+      - name: Cache Poetry dependencies
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.cache/pip
+            .venv
+          key: ${{ runner.os }}-poetry-${{ hashFiles('poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
+
+      - name: Install dependencies
+        run: |
+          poetry config virtualenvs.in-project true
+          poetry install --with dev,proxy-dev --extras "proxy semantic-router"
+          poetry run pip install google-genai==1.22.0 \
+            google-cloud-aiplatform==1.115.0 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core==0.23.0
+
+      - name: Setup litellm-enterprise
+        run: |
+          poetry run pip install --force-reinstall --no-deps -e enterprise/
+
+      - name: Generate Prisma client
+        env:
+          PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
+        run: |
+          poetry run pip install nodejs-wheel-binaries==24.13.1
+          poetry run prisma generate --schema litellm/proxy/schema.prisma
+
+      - name: Run tests
+        env:
+          TEST_PATH: ${{ inputs.test-path }}
+          MAX_FAILURES: ${{ inputs.max-failures }}
+          WORKERS: ${{ inputs.workers }}
+          RERUNS: ${{ inputs.reruns }}
+        run: |
+          poetry run pytest ${TEST_PATH:?} \
+            --tb=short -vv \
+            --maxfail="${MAX_FAILURES}" \
+            -n "${WORKERS}" \
+            --reruns "${RERUNS}" \
+            --reruns-delay 1 \
+            --dist=loadscope \
+            --durations=20
--- a/.github/workflows/_test-unit-services-base.yml
+++ b/.github/workflows/_test-unit-services-base.yml
@ -0,0 +1,164 @@
+name: _Unit Test Services Base (Reusable)
+
+on:
+  workflow_call:
+    inputs:
+      test-path:
+        description: "Pytest path(s) to run"
+        required: true
+        type: string
+      workers:
+        description: "Number of pytest-xdist workers (0 = no parallelism)"
+        required: false
+        type: number
+        default: 2
+      reruns:
+        description: "Number of reruns for flaky tests"
+        required: false
+        type: number
+        default: 2
+      timeout-minutes:
+        description: "Job timeout in minutes"
+        required: false
+        type: number
+        default: 20
+      max-failures:
+        description: "Stop after this many failures"
+        required: false
+        type: number
+        default: 10
+      enable-redis:
+        description: "Pass Redis Cloud credentials to tests via REDIS_HOST/PORT/PASSWORD env vars"
+        required: false
+        type: boolean
+        default: false
+      enable-postgres:
+        description: "Start a local Postgres service container and run Prisma migrations"
+        required: false
+        type: boolean
+        default: false
+    secrets:
+      REDIS_HOST:
+        required: false
+      REDIS_PORT:
+        required: false
+      REDIS_PASSWORD:
+        required: false
+      DATABASE_URL:
+        required: false
+      POSTGRES_USER:
+        required: false
+      POSTGRES_PASSWORD:
+        required: false
+
+permissions:
+  contents: read
+
+jobs:
+  run:
+    name: Run tests
+    runs-on: ubuntu-latest
+    timeout-minutes: ${{ inputs.timeout-minutes }}
+    # Environment is derived from the enable-* flags, not caller-controllable.
+    # This prevents callers from passing arbitrary environment names to bypass secret scoping.
+    # Note: Postgres service container always starts (GHA limitation), so any Redis job
+    # also needs Postgres secrets → uses integration-redis-postgres, not integration-redis.
+    environment: >-
+      ${{
+        inputs.enable-redis && 'integration-redis-postgres' ||
+        inputs.enable-postgres && 'integration-postgres' ||
+        ''
+      }}
+
+    services:
+      postgres:
+        image: postgres@sha256:705a5d5b5836f3fcba0d02c4d281e6a7dd9ed2dd4078640f08a1e1e9896e097d # postgres:14
+        env:
+          POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
+          POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
+          POSTGRES_DB: litellm_test
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd "pg_isready"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+
+    steps:
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false
+
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: "3.12"
+
+      - name: Install Poetry
+        run: pip install 'poetry==2.3.2'
+
+      - name: Cache Poetry dependencies
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.cache/pip
+            .venv
+          key: ${{ runner.os }}-poetry-services-${{ hashFiles('poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-services-
+
+      - name: Install dependencies
+        run: |
+          poetry config virtualenvs.in-project true
+          poetry install --with dev,proxy-dev --extras "proxy semantic-router"
+          poetry run pip install google-genai==1.22.0 \
+            google-cloud-aiplatform==1.115.0 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core==0.23.0
+
+      - name: Setup litellm-enterprise
+        run: |
+          poetry run pip install --force-reinstall --no-deps -e enterprise/
+
+      - name: Generate Prisma client
+        env:
+          PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
+        run: |
+          poetry run pip install nodejs-wheel-binaries==24.13.1
+          poetry run prisma generate --schema litellm/proxy/schema.prisma
+
+      - name: Run Prisma migrations
+        if: ${{ inputs.enable-postgres }}
+        env:
+          DATABASE_URL: ${{ secrets.DATABASE_URL }}
+        run: |
+          poetry run prisma db push --schema litellm/proxy/schema.prisma --accept-data-loss
+
+      - name: Run tests
+        env:
+          TEST_PATH: ${{ inputs.test-path }}
+          MAX_FAILURES: ${{ inputs.max-failures }}
+          WORKERS: ${{ inputs.workers }}
+          RERUNS: ${{ inputs.reruns }}
+          DATABASE_URL: ${{ inputs.enable-postgres && secrets.DATABASE_URL || '' }}
+          REDIS_HOST: ${{ inputs.enable-redis && secrets.REDIS_HOST || '' }}
+          REDIS_PORT: ${{ inputs.enable-redis && secrets.REDIS_PORT || '' }}
+          REDIS_PASSWORD: ${{ inputs.enable-redis && secrets.REDIS_PASSWORD || '' }}
+        run: |
+          if [ "${WORKERS}" = "0" ]; then
+            poetry run pytest ${TEST_PATH:?} \
+              --tb=short -vv \
+              --maxfail="${MAX_FAILURES}" \
+              --reruns "${RERUNS}" \
+              --reruns-delay 1 \
+              --durations=20
+          else
+            poetry run pytest ${TEST_PATH:?} \
+              --tb=short -vv \
+              --maxfail="${MAX_FAILURES}" \
+              -n "${WORKERS}" \
+              --reruns "${RERUNS}" \
+              --reruns-delay 1 \
+              --dist=loadscope \
+              --durations=20
+          fi
--- a/.github/workflows/auto_update_price_and_context_window.yml
+++ b/.github/workflows/auto_update_price_and_context_window.yml
@ -2,18 +2,24 @@ name: Updates model_prices_and_context_window.json and Create Pull Request

 on:
  schedule:
-    - cron: "0 0 * * 0"  # Run every Sundays at midnight
+    - cron: "0 0 * * 0" # Run every Sundays at midnight
    #- cron: "0 0 * * *" # Run daily at midnight

+permissions:
+  contents: write
+  pull-requests: write
+
 jobs:
  auto_update_price_and_context_window:
    if: github.repository == 'BerriAI/litellm'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false
      - name: Install Dependencies
        run: |
-          pip install aiohttp
+          pip install 'aiohttp==3.13.3'
      - name: Update JSON Data
        run: |
          python ".github/workflows/auto_update_price_and_context_window_file.py"
@ -26,4 +32,4 @@ jobs:
            --head auto-update-price-and-context-window-$(date +'%Y-%m-%d') \
            --base main
        env:
-          GH_TOKEN: ${{ secrets.GH_TOKEN }}
+          GH_TOKEN: ${{ secrets.GH_TOKEN }}
--- a/.github/workflows/check-schema-sync.yml
+++ b/.github/workflows/check-schema-sync.yml
@ -0,0 +1,58 @@
+name: Check Schema Sync
+
+on:
+  pull_request:
+    paths:
+      - 'schema.prisma'
+      - 'litellm/proxy/schema.prisma'
+      - 'litellm-proxy-extras/litellm_proxy_extras/schema.prisma'
+
+permissions:
+  contents: read
+
+jobs:
+  check-sync:
+    name: Verify schema.prisma copies match root
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    steps:
+      - name: Checkout PR
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false
+
+      - name: Reject symlinked schema files
+        run: |
+          for f in schema.prisma litellm/proxy/schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma; do
+            if [ -L "$f" ]; then
+              echo "::error file=$f::$f is a symlink, which is not allowed"
+              exit 1
+            fi
+          done
+
+      - name: Check all schemas match root
+        run: |
+          EXIT=0
+
+          diff schema.prisma litellm/proxy/schema.prisma || {
+            echo "::error file=litellm/proxy/schema.prisma::litellm/proxy/schema.prisma differs from root schema.prisma"
+            EXIT=1
+          }
+
+          diff schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma || {
+            echo "::error file=litellm-proxy-extras/litellm_proxy_extras/schema.prisma::litellm-proxy-extras/litellm_proxy_extras/schema.prisma differs from root schema.prisma"
+            EXIT=1
+          }
+
+          if [ "$EXIT" -ne 0 ]; then
+            echo ""
+            echo "Schema files are out of sync."
+            echo "The root schema.prisma is the source of truth."
+            echo ""
+            echo "To fix, run from the repo root:"
+            echo "  cp schema.prisma litellm/proxy/schema.prisma"
+            echo "  cp schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma"
+            exit 1
+          fi
+
+          echo "All schema copies are in sync with root."
--- a/.github/workflows/check_duplicate_issues.yml
+++ b/.github/workflows/check_duplicate_issues.yml
@ -12,7 +12,7 @@ jobs:
      contents: read
    steps:
      - name: Check for potential duplicates
-        uses: wow-actions/potential-duplicates@v1
+        uses: wow-actions/potential-duplicates@4d4ea0352e0383859279938e255179dd1dbb67b5 # v1.1.0
        with:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          label: potential-duplicate
@ -30,13 +30,14 @@ jobs:

      - name: Checkout close script
        if: github.event.action == 'opened'
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
        with:
          sparse-checkout: .github/scripts
+          persist-credentials: false

      - name: Set up Python
        if: github.event.action == 'opened'
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
        with:
          python-version: "3.11"

--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@ -6,8 +6,8 @@ on:
  pull_request:
    branches: [main]
  schedule:
-    # Run weekly on Sundays at 04:00 UTC
-    - cron: "0 4 * * 0"
+    # Run daily at 04:00 UTC
+    - cron: "0 4 * * *"

 concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
@ -15,6 +15,7 @@ concurrency:

 jobs:
  analyze:
+    if: github.event_name != 'schedule' || github.repository == 'BerriAI/litellm'
    name: Analyze (${{ matrix.language }})
    runs-on: ubuntu-latest
    timeout-minutes: 30
@ -37,16 +38,18 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false

      - name: Initialize CodeQL
-        uses: github/codeql-action/init@v3
+        uses: github/codeql-action/init@ebcb5b36ded6beda4ceefea6a8bc4cc885255bb3 # v3
        with:
          languages: ${{ matrix.language }}
          build-mode: ${{ matrix.build-mode }}
          config-file: ./.github/codeql/codeql-config.yml

      - name: Perform CodeQL Analysis
-        uses: github/codeql-action/analyze@v3
+        uses: github/codeql-action/analyze@ebcb5b36ded6beda4ceefea6a8bc4cc885255bb3 # v3
        with:
          category: "/language:${{ matrix.language }}"
--- a/.github/workflows/codspeed.yml
+++ b/.github/workflows/codspeed.yml
@ -25,10 +25,12 @@ jobs:
    timeout-minutes: 15

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false

      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
        with:
          python-version: "3.12"

@ -38,7 +40,7 @@ jobs:
          pip install pytest pytest-codspeed==4.3.0

      - name: Run benchmarks
-        uses: CodSpeedHQ/action@v4
+        uses: CodSpeedHQ/action@1c8ae4843586d3ba879736b7f6b7b0c990757fab # v4.12.1
        with:
          mode: simulation
          run: pytest tests/benchmarks/ --codspeed
--- a/.github/workflows/create_daily_staging_branch.yml
+++ b/.github/workflows/create_daily_staging_branch.yml
@ -2,18 +2,22 @@ name: Create Daily Staging Branch

 on:
  schedule:
-    - cron: '0 0,12 * * *'  # Runs every 12 hours at midnight and noon UTC
-  workflow_dispatch:  # Allow manual trigger
+    - cron: "0 0,12 * * *" # Runs every 12 hours at midnight and noon UTC
+  workflow_dispatch: # Allow manual trigger

 jobs:
  create-staging-branch:
+    if: github.repository == 'BerriAI/litellm'
    runs-on: ubuntu-latest
+    permissions:
+      contents: write

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Create daily staging branch
        env:
@ -43,13 +47,17 @@ jobs:
          fi

  create-internal-dev-branch:
+    if: github.repository == 'BerriAI/litellm'
    runs-on: ubuntu-latest
+    permissions:
+      contents: write

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
        with:
          fetch-depth: 0
+          persist-credentials: false

      - name: Create internal dev branch
        env:
--- a/.github/workflows/ghcr_deploy.yml
+++ b/.github/workflows/ghcr_deploy.yml
@ -1,444 +0,0 @@
-# this workflow is triggered by an API call when there is a new PyPI release of LiteLLM
-name: Build, Publish LiteLLM Docker Image. New Release
-on:
-  workflow_dispatch:
-    inputs:
-      tag:
-        description: "The tag version you want to build"
-        required: true
-      release_type:
-        description: "The release type you want to build. Can be 'latest', 'stable', 'dev', 'rc'"
-        type: string
-        default: "latest"
-      commit_hash:
-        description: "Commit hash"
-        required: true
-
-# Defines two custom environment variables for the workflow. Used for the Container registry domain, and a name for the Docker image that this workflow builds.
-env:
-  REGISTRY: ghcr.io
-  IMAGE_NAME: ${{ github.repository }}
-  CHART_NAME: litellm-helm
-
-# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
-jobs:
-  # print commit hash, tag, and release type
-  print:
-    runs-on: ubuntu-latest
-    steps:
-      - run: |
-          echo "Commit hash: ${{ github.event.inputs.commit_hash }}"
-          echo "Tag: ${{ github.event.inputs.tag }}"
-          echo "Release type: ${{ github.event.inputs.release_type }}"
-  docker-hub-deploy:
-    if: github.repository == 'BerriAI/litellm'
-    runs-on: ubuntu-latest
-    steps:
-      -
-        name: Checkout
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.inputs.commit_hash }}
-      -
-        name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-      -
-        name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-      -
-        name: Login to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.DOCKERHUB_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_TOKEN }}
-      -
-        name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: true
-          tags: litellm/litellm:${{ github.event.inputs.tag || 'latest' }} 
-      -
-        name: Build and push litellm-database image
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: true
-          file: ./docker/Dockerfile.database
-          tags: litellm/litellm-database:${{ github.event.inputs.tag || 'latest' }}
-      -
-        name: Build and push litellm-spend-logs image
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: true
-          file: ./litellm-js/spend-logs/Dockerfile
-          tags: litellm/litellm-spend_logs:${{ github.event.inputs.tag || 'latest' }}
-      -
-        name: Build and push litellm-non_root image
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          push: true
-          file: ./docker/Dockerfile.non_root
-          tags: litellm/litellm-non_root:${{ github.event.inputs.tag || 'latest' }}
-  build-and-push-image:
-    runs-on: ubuntu-latest
-    # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
-    permissions:
-      contents: read
-      packages: write
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.inputs.commit_hash }}
-      # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here.
-      - name: Log in to the Container registry
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-      # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels.
-      - name: Extract metadata (tags, labels) for Docker
-        id: meta
-        uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
-        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
-      # Configure multi platform Docker builds
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
-      # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages.
-      # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository.
-      # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step.
-      - name: Build and push Docker image
-        uses: docker/build-push-action@4976231911ebf5f32aad765192d35f942aa48cb8
-        with:
-          context: .
-          push: true
-          tags: |
-            ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
-            ${{ steps.meta.outputs.tags }}-${{ github.event.inputs.release_type }}
-            ${{ (github.event.inputs.release_type == 'stable'  || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
-            ${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm:main-stable', env.REGISTRY) || '' }},
-            ${{ (github.event.inputs.release_type == 'stable'  || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm:{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
-          labels: ${{ steps.meta.outputs.labels }}
-          platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
-  
-  build-and-push-image-ee:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.inputs.commit_hash }}
-
-      - name: Log in to the Container registry
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Extract metadata (tags, labels) for EE Dockerfile
-        id: meta-ee
-        uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
-        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-ee
-      # Configure multi platform Docker builds
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
-
-      - name: Build and push EE Docker image
-        uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
-        with:
-          context: .
-          file: Dockerfile
-          push: true
-          tags: |
-            ${{ steps.meta-ee.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
-            ${{ steps.meta-ee.outputs.tags }}-${{ github.event.inputs.release_type }}
-            ${{ (github.event.inputs.release_type == 'stable'  || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm-ee:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
-            ${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-ee:main-stable', env.REGISTRY) || '' }}
-          labels: ${{ steps.meta-ee.outputs.labels }}
-          platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
-   
-  build-and-push-image-database:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.inputs.commit_hash }}
-
-      - name: Log in to the Container registry
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Extract metadata (tags, labels) for database Dockerfile
-        id: meta-database
-        uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
-        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-database
-      # Configure multi platform Docker builds
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
-
-      - name: Build and push Database Docker image
-        uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
-        with:
-          context: .
-          file: ./docker/Dockerfile.database
-          push: true
-          tags: |
-            ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
-            ${{ steps.meta-database.outputs.tags }}-${{ github.event.inputs.release_type }}
-            ${{ (github.event.inputs.release_type == 'stable'  || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm-database:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
-            ${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-database:main-stable', env.REGISTRY) || '' }}
-          labels: ${{ steps.meta-database.outputs.labels }}
-          platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
-            
-  build-and-push-image-non_root:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.inputs.commit_hash }}
-
-      - name: Log in to the Container registry
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Extract metadata (tags, labels) for non_root Dockerfile
-        id: meta-non_root
-        uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
-        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-non_root
-      # Configure multi platform Docker builds
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
-
-      - name: Build and push non_root Docker image
-        uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
-        with:
-          context: .
-          file: ./docker/Dockerfile.non_root
-          push: true
-          tags: |
-            ${{ steps.meta-non_root.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
-            ${{ steps.meta-non_root.outputs.tags }}-${{ github.event.inputs.release_type }}
-            ${{ (github.event.inputs.release_type == 'stable'  || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm-non_root:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
-            ${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-non_root:main-stable', env.REGISTRY) || '' }}
-          labels: ${{ steps.meta-non_root.outputs.labels }} 
-          platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
-  
-  build-and-push-image-spend-logs:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          ref: ${{ github.event.inputs.commit_hash }}
-
-      - name: Log in to the Container registry
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Extract metadata (tags, labels) for spend-logs Dockerfile
-        id: meta-spend-logs
-        uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
-        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-spend_logs
-      # Configure multi platform Docker builds
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@e0e4588fad221d38ee467c0bffd91115366dc0c5
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@edfb0fe6204400c56fbfd3feba3fe9ad1adfa345
-
-      - name: Build and push Database Docker image
-        uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4
-        with:
-          context: .
-          file: ./litellm-js/spend-logs/Dockerfile
-          push: true
-          tags: |
-            ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.tag || 'latest' }},
-            ${{ steps.meta-spend-logs.outputs.tags }}-${{ github.event.inputs.release_type }}
-            ${{ (github.event.inputs.release_type == 'stable'  || github.event.inputs.release_type == 'rc') && format('{0}/berriai/litellm-spend_logs:main-{1}', env.REGISTRY, github.event.inputs.tag) || '' }},
-            ${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-spend_logs:main-stable', env.REGISTRY) || '' }}
-          platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
-
-  run-observatory-tests:
-    if: github.event.inputs.release_type == 'rc' || github.event.inputs.release_type == 'stable'
-    needs: [docker-hub-deploy]
-    uses: ./.github/workflows/run_observatory_tests.yml
-    with:
-      tag: ${{ github.event.inputs.tag }}
-      commit_hash: ${{ github.event.inputs.commit_hash }}
-    secrets: inherit
-
-  build-and-push-helm-chart:
-    if: github.event.inputs.release_type  != 'dev'
-    needs: [docker-hub-deploy, build-and-push-image, build-and-push-image-database]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Log in to the Container registry
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: lowercase github.repository_owner
-        run: |
-          echo "REPO_OWNER=`echo ${{github.repository_owner}} | tr '[:upper:]' '[:lower:]'`" >>${GITHUB_ENV}
-    
-      # Sync Helm chart version with LiteLLM release version (1-1 versioning)
-      # This allows users to easily map Helm chart versions to LiteLLM versions
-      # See: https://codefresh.io/docs/docs/ci-cd-guides/helm-best-practices/
-      - name: Calculate chart and app versions
-        id: chart_version
-        shell: bash
-        run: |
-          INPUT_TAG="${{ github.event.inputs.tag }}"
-          RELEASE_TYPE="${{ github.event.inputs.release_type }}"
-
-          # Chart version = LiteLLM version without 'v' prefix (Helm semver convention)
-          # v1.81.0 -> 1.81.0, v1.81.0.rc.1 -> 1.81.0.rc.1
-          CHART_VERSION="${INPUT_TAG#v}"
-
-          # Add suffix for 'latest' releases (rc already has suffix in tag)
-          if [ "$RELEASE_TYPE" = "latest" ]; then
-            CHART_VERSION="${CHART_VERSION}-latest"
-          fi
-
-          # App version = Docker tag (keeps 'v' prefix to match Docker image tags)
-          APP_VERSION="${INPUT_TAG}"
-
-          echo "version=${CHART_VERSION}" | tee -a $GITHUB_OUTPUT
-          echo "app_version=${APP_VERSION}" | tee -a $GITHUB_OUTPUT
-
-      - uses: ./.github/actions/helm-oci-chart-releaser
-        with:
-          name: ${{ env.CHART_NAME }}
-          repository: ${{ env.REPO_OWNER }}
-          tag: ${{ steps.chart_version.outputs.version }}
-          app_version: ${{ steps.chart_version.outputs.app_version }}
-          path: deploy/charts/${{ env.CHART_NAME }}
-          registry: ${{ env.REGISTRY }}
-          registry_username: ${{ github.actor }}
-          registry_password: ${{ secrets.GITHUB_TOKEN }}
-          update_dependencies: true
-
-  release:
-    name: "New LiteLLM Release"
-    needs: [docker-hub-deploy, build-and-push-image, build-and-push-image-database]
-    permissions:
-      contents: write
-    runs-on: "ubuntu-latest"
-   
-    steps:
-      - name: Display version
-        run: echo "Current version is ${{ github.event.inputs.tag }}"
-      - name: "Set Release Tag"
-        run: echo "RELEASE_TAG=${{ github.event.inputs.tag }}" >> $GITHUB_ENV
-      - name: Display release tag
-        run: echo "RELEASE_TAG is $RELEASE_TAG"
-      - name: "Create release"
-        uses: "actions/github-script@v6"
-        with:
-          github-token: "${{ secrets.GITHUB_TOKEN }}"
-          script: |
-            const commitHash = "${{ github.event.inputs.commit_hash}}";
-            console.log("Commit Hash:", commitHash); // Add this line for debugging
-            try {
-              const response = await github.rest.repos.createRelease({
-                draft: false,
-                generate_release_notes: true,
-                target_commitish: commitHash,
-                name: process.env.RELEASE_TAG,
-                owner: context.repo.owner,
-                prerelease: false,
-                repo: context.repo.repo,
-                tag_name: process.env.RELEASE_TAG,
-              });
-      
-              core.exportVariable('RELEASE_ID', response.data.id);
-              core.exportVariable('RELEASE_UPLOAD_URL', response.data.upload_url);
-            } catch (error) {
-              core.setFailed(error.message);
-            }
-      - name: Fetch Release Notes
-        id: release-notes
-        uses: actions/github-script@v6
-        with:
-          github-token: "${{ secrets.GITHUB_TOKEN }}"
-          script: |
-            try {
-              const response = await github.rest.repos.getRelease({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                release_id: process.env.RELEASE_ID,
-              });
-              const formattedBody = JSON.stringify(response.data.body).slice(1, -1);
-              return formattedBody;
-            } catch (error) {
-              core.setFailed(error.message);
-            }
-        env:
-          RELEASE_ID: ${{ env.RELEASE_ID }}
-      - name: Github Releases To Discord
-        env:
-          WEBHOOK_URL: ${{ secrets.WEBHOOK_URL }}
-          REALEASE_TAG: ${{ env.RELEASE_TAG }}
-          RELEASE_NOTES: ${{ steps.release-notes.outputs.result }}
-        run: |
-          curl -H "Content-Type: application/json" -X POST -d '{
-            "content": "New LiteLLM release '"${RELEASE_TAG}"'",
-            "username": "Release Changelog",
-            "avatar_url": "https://cdn.discordapp.com/avatars/487431320314576937/bd64361e4ba6313d561d54e78c9e7171.png",
-            "embeds": [
-              {
-                "title": "Changelog for LiteLLM '"${RELEASE_TAG}"'",
-                "description": "'"${RELEASE_NOTES}"'",
-                "color": 2105893
-              }
-            ]
-          }' $WEBHOOK_URL
-
--- a/.github/workflows/ghcr_helm_deploy.yml
+++ b/.github/workflows/ghcr_helm_deploy.yml
@ -1,67 +0,0 @@
-# Standalone workflow to publish LiteLLM Helm Chart
-# Note: The main ghcr_deploy.yml workflow also publishes the Helm chart as part of a full release
-name: Build, Publish LiteLLM Helm Chart. New Release
-on:
-  workflow_dispatch:
-    inputs:
-      tag:
-        description: "LiteLLM version tag (e.g., v1.81.0)"
-        required: true
-
-# Defines two custom environment variables for the workflow. Used for the Container registry domain, and a name for the Docker image that this workflow builds.
-env:
-  REGISTRY: ghcr.io
-  IMAGE_NAME: ${{ github.repository }}
-  REPO_OWNER: ${{github.repository_owner}}
-
-# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu.
-jobs:        
-  build-and-push-helm-chart:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Log in to the Container registry
-        uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-      
-      - name: lowercase github.repository_owner
-        run: |
-          echo "REPO_OWNER=`echo ${{github.repository_owner}} | tr '[:upper:]' '[:lower:]'`" >>${GITHUB_ENV}
-
-      # Sync Helm chart version with LiteLLM release version (1-1 versioning)
-      - name: Calculate chart and app versions
-        id: chart_version
-        shell: bash
-        run: |
-          INPUT_TAG="${{ github.event.inputs.tag }}"
-
-          # Chart version = LiteLLM version without 'v' prefix
-          # v1.81.0 -> 1.81.0
-          CHART_VERSION="${INPUT_TAG#v}"
-
-          # App version = Docker tag (keeps 'v' prefix)
-          APP_VERSION="${INPUT_TAG}"
-
-          echo "version=${CHART_VERSION}" | tee -a $GITHUB_OUTPUT
-          echo "app_version=${APP_VERSION}" | tee -a $GITHUB_OUTPUT
-
-      - name: Lint helm chart
-        run: helm lint deploy/charts/litellm-helm
-
-      - uses: ./.github/actions/helm-oci-chart-releaser
-        with:
-          name: litellm-helm
-          repository: ${{ env.REPO_OWNER }}
-          tag: ${{ steps.chart_version.outputs.version }}
-          app_version: ${{ steps.chart_version.outputs.app_version }}
-          path: deploy/charts/litellm-helm
-          registry: ${{ env.REGISTRY }}
-          registry_username: ${{ github.actor }}
-          registry_password: ${{ secrets.GITHUB_TOKEN }}
-          update_dependencies: true
-  
--- a/.github/workflows/helm_unit_test.yml
+++ b/.github/workflows/helm_unit_test.yml
@ -6,22 +6,36 @@ on:
    branches:
      - main

+permissions:
+  contents: read
+
 jobs:
  unit-test:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v2
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false

      - name: Set up Helm 3.11.1
-        uses: azure/setup-helm@v1
+        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1
        with:
-          version: '3.11.1'
+          version: "3.11.1"

      - name: Install Helm Unit Test Plugin
        run: |
          helm plugin install https://github.com/helm-unittest/helm-unittest --version v0.4.4
+      - name: Verify Helm Unit Test Plugin integrity
+        run: |
+          EXPECTED_SHA="e251ba198448629678ff2168e1a469249d998155"
+          PLUGIN_DIR="$(helm env HELM_PLUGINS)/helm-unittest"
+          ACTUAL_SHA="$(git -C "$PLUGIN_DIR" rev-parse HEAD)"
+          if [ "$ACTUAL_SHA" != "$EXPECTED_SHA" ]; then
+            echo "::error::Helm unittest plugin checksum mismatch! Expected $EXPECTED_SHA but got $ACTUAL_SHA"
+            exit 1
+          fi
+          echo "Helm unittest plugin integrity verified: $ACTUAL_SHA"

      - name: Run unit tests
-        run:
-          helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm
+        run: helm unittest -f 'tests/*.yaml' deploy/charts/litellm-helm
--- a/.github/workflows/interpret_load_test.py
+++ b/.github/workflows/interpret_load_test.py
@ -1,139 +0,0 @@
-import csv
-import os
-from github import Github
-
-
-def interpret_results(csv_file):
-    with open(csv_file, newline="") as csvfile:
-        csvreader = csv.DictReader(csvfile)
-        rows = list(csvreader)
-        """
-        in this csv reader
-        - Create 1 new column "Status"
-        - if a row has a median response time < 300 and an average response time < 300, Status = "Passed ✅"
-        - if a row has a median response time >= 300 or an average response time >= 300, Status = "Failed ❌"
-        - Order the table in this order Name, Status, Median Response Time, Average Response Time, Requests/s,Failures/s, Min Response Time, Max Response Time, all other columns
-        """
-
-        # Add a new column "Status"
-        for row in rows:
-            median_response_time = float(
-                row["Median Response Time"].strip().rstrip("ms")
-            )
-            average_response_time = float(
-                row["Average Response Time"].strip().rstrip("s")
-            )
-
-            request_count = int(row["Request Count"])
-            failure_count = int(row["Failure Count"])
-
-            failure_percent = round((failure_count / request_count) * 100, 2)
-
-            # Determine status based on conditions
-            if (
-                median_response_time < 300
-                and average_response_time < 300
-                and failure_percent < 5
-            ):
-                row["Status"] = "Passed ✅"
-            else:
-                row["Status"] = "Failed ❌"
-
-        # Construct Markdown table header
-        markdown_table = "| Name | Status | Median Response Time (ms) | Average Response Time (ms) | Requests/s | Failures/s | Request Count | Failure Count | Min Response Time (ms) | Max Response Time (ms) |"
-        markdown_table += (
-            "\n| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- |"
-        )
-
-        # Construct Markdown table rows
-        for row in rows:
-            markdown_table += f"\n| {row['Name']} | {row['Status']} | {row['Median Response Time']} | {row['Average Response Time']} | {row['Requests/s']} | {row['Failures/s']} | {row['Request Count']} | {row['Failure Count']} | {row['Min Response Time']} | {row['Max Response Time']} |"
-    print("markdown table: ", markdown_table)
-    return markdown_table
-
-
-def _get_docker_run_command_stable_release(release_version):
-    return f"""
-\n\n
-## Docker Run LiteLLM Proxy
-
-```
-docker run \\
-e STORE_MODEL_IN_DB=True \\
-p 4000:4000 \\
-ghcr.io/berriai/litellm:litellm_stable_release_branch-{release_version}
-```
-    """
-
-
-def _get_docker_run_command(release_version):
-    return f"""
-\n\n
-## Docker Run LiteLLM Proxy
-
-```
-docker run \\
-e STORE_MODEL_IN_DB=True \\
-p 4000:4000 \\
-ghcr.io/berriai/litellm:main-{release_version}
-```
-    """
-
-
-def get_docker_run_command(release_version):
-    if "stable" in release_version:
-        return _get_docker_run_command_stable_release(release_version)
-    else:
-        return _get_docker_run_command(release_version)
-
-
-if __name__ == "__main__":
-    return
-    csv_file = "load_test_stats.csv"  # Change this to the path of your CSV file
-    markdown_table = interpret_results(csv_file)
-
-    # Update release body with interpreted results
-    github_token = os.getenv("GITHUB_TOKEN")
-    g = Github(github_token)
-    repo = g.get_repo(
-        "BerriAI/litellm"
-    )  # Replace with your repository's username and name
-    latest_release = repo.get_latest_release()
-    print("got latest release: ", latest_release)
-    print(latest_release.title)
-    print(latest_release.tag_name)
-
-    release_version = latest_release.title
-
-    print("latest release body: ", latest_release.body)
-    print("markdown table: ", markdown_table)
-
-    # check if "Load Test LiteLLM Proxy Results" exists
-    existing_release_body = latest_release.body
-    if "Load Test LiteLLM Proxy Results" in latest_release.body:
-        # find the "Load Test LiteLLM Proxy Results" section and delete it
-        start_index = latest_release.body.find("Load Test LiteLLM Proxy Results")
-        existing_release_body = latest_release.body[:start_index]
-
-    docker_run_command = get_docker_run_command(release_version)
-    print("docker run command: ", docker_run_command)
-
-    new_release_body = (
-        existing_release_body
-        + docker_run_command
-        + "\n\n"
-        + "### Don't want to maintain your internal proxy? get in touch 🎉"
-        + "\nHosted Proxy Alpha: https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions"
-        + "\n\n"
-        + "## Load Test LiteLLM Proxy Results"
-        + "\n\n"
-        + markdown_table
-    )
-    print("new release body: ", new_release_body)
-    try:
-        latest_release.update_release(
-            name=latest_release.tag_name,
-            message=new_release_body,
-        )
-    except Exception as e:
-        print(e)
--- a/.github/workflows/issue-keyword-labeler.yml
+++ b/.github/workflows/issue-keyword-labeler.yml
@ -2,8 +2,8 @@ name: Issue Keyword Labeler

 on:
  issues:
-    types: 
-        - opened
+    types:
+      - opened

 jobs:
  scan-and-label:
@ -13,7 +13,9 @@ jobs:
      contents: read
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false

      - name: Scan for provider keywords
        id: scan
@ -24,7 +26,7 @@ jobs:

      - name: Ensure label exists
        if: steps.scan.outputs.found == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
@ -51,7 +53,7 @@ jobs:

      - name: Add label to the issue
        if: steps.scan.outputs.found == 'true'
-        uses: actions/github-script@v7
+        uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
@ -61,4 +63,3 @@ jobs:
              issue_number: context.issue.number,
              labels: ['llm translation']
            });
-
--- a/.github/workflows/label-component.yml
+++ b/.github/workflows/label-component.yml
@ -12,7 +12,7 @@ jobs:
      issues: write
    steps:
      - name: Add component labels
-        uses: actions/github-script@v7
+        uses: actions/github-script@f28e40c7f34bde8b3046d885e986cb6290c5673b # v7.1.0
        with:
          github-token: ${{ secrets.GITHUB_TOKEN }}
          script: |
--- a/.github/workflows/llm-translation-testing.yml
+++ b/.github/workflows/llm-translation-testing.yml
@ -4,38 +4,41 @@ on:
  workflow_dispatch:
    inputs:
      release_candidate_tag:
-        description: 'Release candidate tag/version'
+        description: "Release candidate tag/version"
        required: true
        type: string
  push:
    tags:
-      - 'v*-rc*'  # Triggers on release candidate tags like v1.0.0-rc1
-  
+      - "v*-rc*" # Triggers on release candidate tags like v1.0.0-rc1
+
+permissions:
+  contents: read
+
 jobs:
  run-llm-translation-tests:
    runs-on: ubuntu-latest
    timeout-minutes: 90
-    
+
    steps:
      - name: Checkout code
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
        with:
+          persist-credentials: false
          ref: ${{ github.event.inputs.release_candidate_tag || github.ref }}
-      
+
      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
        with:
-          python-version: '3.11'
-      
+          python-version: "3.11"
+
      - name: Install Poetry
-        uses: snok/install-poetry@v1
-        with:
-          version: latest
-          virtualenvs-create: true
-          virtualenvs-in-project: true
-      
-      - name: Cache Poetry dependencies
-        uses: actions/cache@v3
+        run: |
+          pip install 'poetry==2.3.2'
+          poetry config virtualenvs.create true
+          poetry config virtualenvs.in-project true
+
+      - name: Restore Poetry dependencies cache
+        uses: actions/cache/restore@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.0.0
        with:
          path: |
            ~/.cache/pypoetry
@ -43,15 +46,15 @@ jobs:
          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
          restore-keys: |
            ${{ runner.os }}-poetry-
-      
+
      - name: Install dependencies
        run: |
          poetry install --with dev
-          poetry run pip install pytest-xdist pytest-timeout
-      
+          poetry run pip install 'pytest-xdist==3.8.0' 'pytest-timeout==2.4.0'
+
      - name: Create test results directory
        run: mkdir -p test-results
-      
+
      - name: Run LLM Translation Tests
        env:
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
@ -61,13 +64,14 @@ jobs:
          AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
          AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
          AZURE_API_VERSION: ${{ secrets.AZURE_API_VERSION }}
-          # Add other API keys as needed
+          RC_TAG: ${{ github.event.inputs.release_candidate_tag || github.ref_name }}
+          COMMIT_SHA: ${{ github.sha }}
        run: |
          python .github/workflows/run_llm_translation_tests.py \
-            --tag "${{ github.event.inputs.release_candidate_tag || github.ref_name }}" \
-            --commit "${{ github.sha }}" \
+            --tag "$RC_TAG" \
+            --commit "$COMMIT_SHA" \
            || true  # Continue even if tests fail
-      
+
      - name: Display test summary
        if: always()
        run: |
@ -79,9 +83,9 @@ jobs:
          else
            echo "Warning: Test report was not generated"
          fi
-      
+
      - name: Upload test artifacts
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
        if: always()
        with:
          name: LLM-Translation-Artifact-${{ github.event.inputs.release_candidate_tag || github.ref_name }}
--- a/.github/workflows/load_test.yml
+++ b/.github/workflows/load_test.yml
@ -1,59 +0,0 @@
-name: Test Locust Load Test
-
-on:
-  workflow_run:
-    workflows: ["Build, Publish LiteLLM Docker Image. New Release"]
-    types:
-      - completed
-  workflow_dispatch:
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v1
-      - name: Setup Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: '3.x'
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install PyGithub
-      - name: re-deploy proxy
-        run: |
-          echo "Current working directory: $PWD"
-          ls
-          python ".github/workflows/redeploy_proxy.py"
-        env:
-          LOAD_TEST_REDEPLOY_URL1: ${{ secrets.LOAD_TEST_REDEPLOY_URL1 }}
-          LOAD_TEST_REDEPLOY_URL2: ${{ secrets.LOAD_TEST_REDEPLOY_URL2 }}
-        working-directory: ${{ github.workspace }}
-      - name: Run Load Test
-        id: locust_run
-        uses: BerriAI/locust-github-action@master
-        with:
-          LOCUSTFILE: ".github/workflows/locustfile.py"
-          URL:  "https://post-release-load-test-proxy.onrender.com/"
-          USERS: "20"
-          RATE: "20"
-          RUNTIME: "300s"
-      - name: Process Load Test Stats
-        run: |
-          echo "Current working directory: $PWD"
-          ls
-          python ".github/workflows/interpret_load_test.py"
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        working-directory: ${{ github.workspace }}
-      - name: Upload CSV as Asset to Latest Release
-        uses: xresloader/upload-to-github-release@v1
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          file: "load_test_stats.csv;load_test.html"
-          update_latest_release: true
-          tag_name: "load-test"
-          overwrite: true
--- a/.github/workflows/locustfile.py
+++ b/.github/workflows/locustfile.py
@ -1,28 +0,0 @@
-from locust import HttpUser, task, between
-
-
-class MyUser(HttpUser):
-    wait_time = between(1, 5)
-
-    @task
-    def chat_completion(self):
-        headers = {
-            "Content-Type": "application/json",
-            "Authorization": "Bearer sk-8N1tLOOyH8TIxwOLahhIVg",
-            # Include any additional headers you may need for authentication, etc.
-        }
-
-        # Customize the payload with "model" and "messages" keys
-        payload = {
-            "model": "fake-openai-endpoint",
-            "messages": [
-                {"role": "system", "content": "You are a chat bot."},
-                {"role": "user", "content": "Hello, how are you?"},
-            ],
-            # Add more data as necessary
-        }
-
-        # Make a POST request to the "chat/completions" endpoint
-        response = self.client.post("chat/completions", json=payload, headers=headers)
-
-        # Print or log the response if needed
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@ -1,34 +0,0 @@
-name: Publish Dev Release to PyPI
-
-on:
-  workflow_dispatch:
-  
-jobs:
-  publish-dev-release:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v2
-
-      - name: Set up Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.8  # Adjust the Python version as needed
-
-      - name: Install dependencies
-        run: pip install toml twine
-
-      - name: Read version from pyproject.toml
-        id: read-version
-        run: |
-          version=$(python -c 'import toml; print(toml.load("pyproject.toml")["tool"]["commitizen"]["version"])')
-          printf "LITELLM_VERSION=%s" "$version" >> $GITHUB_ENV
-
-      - name: Check if version exists on PyPI
-        id: check-version
-        run: |
-          set -e
-          if twine check --repository-url https://pypi.org/simple/ "litellm==$LITELLM_VERSION" >/dev/null 2>&1; then
-            echo "Version $LITELLM_VERSION already exists on PyPI. Skipping publish."
-     
--- a/.github/workflows/publish-migrations.yml
+++ b/.github/workflows/publish-migrations.yml
@ -1,207 +0,0 @@
-name: Publish Prisma Migrations
-
-permissions:
-  contents: write
-  pull-requests: write
-
-on:
-  push:
-    paths:
-      - 'schema.prisma'  # Check root schema.prisma
-    branches:
-      - main
-
-jobs:
-  publish-migrations:
-    if: github.repository == 'BerriAI/litellm'
-    runs-on: ubuntu-latest
-    services:
-      postgres:
-        image: postgres:14
-        env:
-          POSTGRES_DB: temp_db
-          POSTGRES_USER: postgres
-          POSTGRES_PASSWORD: postgres
-        ports:
-          - 5432:5432
-        options: >-
-          --health-cmd pg_isready
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-      
-      # Add shadow database service
-      postgres_shadow:
-        image: postgres:14
-        env:
-          POSTGRES_DB: shadow_db
-          POSTGRES_USER: postgres
-          POSTGRES_PASSWORD: postgres
-        ports:
-          - 5433:5432
-        options: >-
-          --health-cmd pg_isready
-          --health-interval 10s
-          --health-timeout 5s
-          --health-retries 5
-
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.x'
-
-      - name: Install Dependencies
-        run: |
-          pip install prisma
-          pip install python-dotenv
-
-      - name: Generate Initial Migration if None Exists
-        env:
-          DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
-          DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
-          SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
-        run: |
-          mkdir -p deploy/migrations
-          echo 'provider = "postgresql"' > deploy/migrations/migration_lock.toml
-          
-          if [ -z "$(ls -A deploy/migrations/2* 2>/dev/null)" ]; then
-            echo "No existing migrations found, creating baseline..."
-            VERSION=$(date +%Y%m%d%H%M%S)
-            mkdir -p deploy/migrations/${VERSION}_initial
-            
-            echo "Generating initial migration..."
-            # Save raw output for debugging
-            prisma migrate diff \
-              --from-empty \
-              --to-schema-datamodel schema.prisma \
-              --shadow-database-url "${SHADOW_DATABASE_URL}" \
-              --script > deploy/migrations/${VERSION}_initial/raw_migration.sql
-            
-            echo "Raw migration file content:"
-            cat deploy/migrations/${VERSION}_initial/raw_migration.sql
-            
-            echo "Cleaning migration file..."
-            # Clean the file
-            sed '/^Installing/d' deploy/migrations/${VERSION}_initial/raw_migration.sql > deploy/migrations/${VERSION}_initial/migration.sql
-            
-            # Verify the migration file
-            if [ ! -s deploy/migrations/${VERSION}_initial/migration.sql ]; then
-              echo "ERROR: Migration file is empty after cleaning"
-              echo "Original content was:"
-              cat deploy/migrations/${VERSION}_initial/raw_migration.sql
-              exit 1
-            fi
-            
-            echo "Final migration file content:"
-            cat deploy/migrations/${VERSION}_initial/migration.sql
-            
-            # Verify it starts with SQL
-            if ! head -n 1 deploy/migrations/${VERSION}_initial/migration.sql | grep -q "^--\|^CREATE\|^ALTER"; then
-              echo "ERROR: Migration file does not start with SQL command or comment"
-              echo "First line is:"
-              head -n 1 deploy/migrations/${VERSION}_initial/migration.sql
-              echo "Full content is:"
-              cat deploy/migrations/${VERSION}_initial/migration.sql
-              exit 1
-            fi
-            
-            echo "Initial migration generated at $(date -u)" > deploy/migrations/${VERSION}_initial/README.md
-          fi
-
-      - name: Compare and Generate Migration
-        if: success()
-        env:
-          DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
-          DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
-          SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
-        run: |
-          # Create temporary migration workspace
-          mkdir -p temp_migrations
-          
-          # Copy existing migrations (will not fail if directory is empty)
-          cp -r deploy/migrations/* temp_migrations/ 2>/dev/null || true
-          
-          VERSION=$(date +%Y%m%d%H%M%S)
-          
-          # Generate diff against existing migrations or empty state
-          prisma migrate diff \
-            --from-migrations temp_migrations \
-            --to-schema-datamodel schema.prisma \
-            --shadow-database-url "${SHADOW_DATABASE_URL}" \
-            --script > temp_migrations/migration_${VERSION}.sql
-          
-          # Check if there are actual changes
-          if [ -s temp_migrations/migration_${VERSION}.sql ]; then
-            echo "Changes detected, creating new migration"
-            mkdir -p deploy/migrations/${VERSION}_schema_update
-            mv temp_migrations/migration_${VERSION}.sql deploy/migrations/${VERSION}_schema_update/migration.sql
-            echo "Migration generated at $(date -u)" > deploy/migrations/${VERSION}_schema_update/README.md
-          else
-            echo "No schema changes detected"
-            exit 0
-          fi
-
-      - name: Verify Migration
-        if: success()
-        env:
-          DATABASE_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
-          DIRECT_URL: "postgresql://postgres:postgres@localhost:5432/temp_db"
-          SHADOW_DATABASE_URL: "postgresql://postgres:postgres@localhost:5433/shadow_db"
-        run: |
-          # Create test database
-          psql "${SHADOW_DATABASE_URL}" -c 'CREATE DATABASE migration_test;'
-          
-          # Apply all migrations in order to verify
-          for migration in deploy/migrations/*/migration.sql; do
-            echo "Applying migration: $migration"
-            psql "${SHADOW_DATABASE_URL}" -f $migration
-          done
-
-      # Add this step before create-pull-request to debug permissions
-      - name: Check Token Permissions
-        run: |
-          echo "Checking token permissions..."
-          curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-               -H "Accept: application/vnd.github.v3+json" \
-               https://api.github.com/repos/BerriAI/litellm/collaborators
-          
-          echo "\nChecking if token can create PRs..."
-          curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-               -H "Accept: application/vnd.github.v3+json" \
-               https://api.github.com/repos/BerriAI/litellm
-
-      # Add this debug step before git push
-      - name: Debug Changed Files
-        run: |
-          echo "Files staged for commit:"
-          git diff --name-status --staged
-          
-          echo "\nAll changed files:"
-          git status
-
-      - name: Create Pull Request
-        if: success()
-        uses: peter-evans/create-pull-request@v5
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          commit-message: "chore: update prisma migrations"
-          title: "Update Prisma Migrations"
-          body: |
-            Auto-generated migration based on schema.prisma changes.
-            
-            Generated files:
-            - deploy/migrations/${VERSION}_schema_update/migration.sql
-            - deploy/migrations/${VERSION}_schema_update/README.md
-          branch: feat/prisma-migration-${{ env.VERSION }}
-          base: main
-          delete-branch: true
-
-      - name: Generate and Save Migrations
-        run: |
-          # Only add migration files
-          git add deploy/migrations/
-          git status  # Debug what's being committed
-          git commit -m "chore: update prisma migrations" 
--- a/.github/workflows/publish_enterprise.yml
+++ b/.github/workflows/publish_enterprise.yml
@ -1,94 +0,0 @@
-name: Publish litellm-enterprise to PyPI
-
-on:
-  workflow_dispatch:
-    inputs:
-      bump:
-        description: "Version bump type"
-        required: true
-        default: "patch"
-        type: choice
-        options:
-          - patch
-          - minor
-          - major
-
-jobs:
-  publish:
-    runs-on: ubuntu-latest
-    if: github.repository == 'BerriAI/litellm'
-    permissions:
-      contents: write
-      pull-requests: write
-    defaults:
-      run:
-        working-directory: enterprise
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install Poetry
-        run: pip install poetry
-
-      - name: Bump version
-        id: bump
-        run: |
-          OLD=$(poetry version -s)
-          poetry version ${{ github.event.inputs.bump }}
-          NEW=$(poetry version -s)
-          echo "old=$OLD" >> $GITHUB_OUTPUT
-          echo "new=$NEW" >> $GITHUB_OUTPUT
-
-      - name: Update version refs in root pyproject.toml and requirements.txt
-        run: |
-          OLD=${{ steps.bump.outputs.old }}
-          NEW=${{ steps.bump.outputs.new }}
-          sed -i "s/litellm-enterprise = {version = \"${OLD}\"/litellm-enterprise = {version = \"${NEW}\"/" ../pyproject.toml
-          sed -i "s/litellm-enterprise==${OLD}/litellm-enterprise==${NEW}/" ../requirements.txt
-
-      - name: Update poetry.lock
-        working-directory: .
-        run: poetry lock
-
-      - name: Build
-        run: poetry build
-
-      - name: Commit version bump and create PR
-        id: create-pr
-        run: |
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-          cd ..
-          BRANCH="bump/enterprise-${{ steps.bump.outputs.new }}"
-          git checkout -b "$BRANCH"
-          git add enterprise/pyproject.toml pyproject.toml requirements.txt poetry.lock
-          git commit -m "bump: litellm-enterprise ${{ steps.bump.outputs.old }} → ${{ steps.bump.outputs.new }}"
-          git push origin "$BRANCH" --force
-          gh pr create \
-            --title "bump: litellm-enterprise ${{ steps.bump.outputs.old }} → ${{ steps.bump.outputs.new }}" \
-            --body "Version bump for litellm-enterprise. Merge to update main." \
-            --head "$BRANCH" \
-            --base main \
-          || true
-          PR_URL=$(gh pr list --head "$BRANCH" --json url -q '.[0].url')
-          echo "pr_url=$PR_URL" >> $GITHUB_OUTPUT
-        env:
-          GH_TOKEN: ${{ github.token }}
-
-      - name: Enable auto-merge
-        run: |
-          gh pr merge "${{ steps.create-pr.outputs.pr_url }}" --auto --squash
-        env:
-          GH_TOKEN: ${{ github.token }}
-
-      - name: Publish to PyPI
-        env:
-          TWINE_USERNAME: __token__
-          TWINE_PASSWORD: ${{ secrets.PYPI_ENTERPRISE }}
-        run: |
-          pip install twine
-          twine upload dist/litellm_enterprise-${{ steps.bump.outputs.new }}*
--- a/.github/workflows/publish_proxy_extras.yml
+++ b/.github/workflows/publish_proxy_extras.yml
@ -1,74 +0,0 @@
-name: Publish litellm-proxy-extras to PyPI
-
-on:
-  workflow_dispatch:
-    inputs:
-      bump:
-        description: "Version bump type"
-        required: true
-        default: "patch"
-        type: choice
-        options:
-          - patch
-          - minor
-          - major
-
-jobs:
-  publish:
-    runs-on: ubuntu-latest
-    if: github.repository == 'BerriAI/litellm'
-    permissions:
-      contents: write
-    defaults:
-      run:
-        working-directory: litellm-proxy-extras
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install Poetry
-        run: pip install poetry
-
-      - name: Bump version
-        id: bump
-        run: |
-          OLD=$(poetry version -s)
-          poetry version ${{ github.event.inputs.bump }}
-          NEW=$(poetry version -s)
-          echo "old=$OLD" >> $GITHUB_OUTPUT
-          echo "new=$NEW" >> $GITHUB_OUTPUT
-
-      - name: Update version refs in root pyproject.toml and requirements.txt
-        run: |
-          OLD=${{ steps.bump.outputs.old }}
-          NEW=${{ steps.bump.outputs.new }}
-          sed -i "s/litellm-proxy-extras = {version = \"${OLD}\"/litellm-proxy-extras = {version = \"${NEW}\"/" ../pyproject.toml
-          sed -i "s/litellm-proxy-extras==${OLD}/litellm-proxy-extras==${NEW}/" ../requirements.txt
-
-      - name: Update poetry.lock
-        working-directory: .
-        run: poetry lock
-
-      - name: Build
-        run: poetry build
-
-      - name: Commit version bump
-        run: |
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-          cd ..
-          git add litellm-proxy-extras/pyproject.toml pyproject.toml requirements.txt poetry.lock
-          git commit -m "bump: litellm-proxy-extras ${{ steps.bump.outputs.old }} → ${{ steps.bump.outputs.new }}"
-          git push
-
-      - name: Publish to PyPI
-        env:
-          TWINE_USERNAME: __token__
-          TWINE_PASSWORD: ${{ secrets.PYPI_PUBLISH_PASSWORD }}
-        run: |
-          pip install twine
-          twine upload dist/litellm_proxy_extras-${{ steps.bump.outputs.new }}*
--- a/.github/workflows/publish_to_pypi.yml
+++ b/.github/workflows/publish_to_pypi.yml
@ -0,0 +1,136 @@
+name: Publish to PyPI
+
+on:
+  workflow_dispatch:
+
+jobs:
+  preflight-checks:
+    name: Preflight Checks
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      contents: read
+    # No environment — read-only checks, no approval needed
+    outputs:
+      needs_publish: ${{ steps.check-litellm.outputs.needs_publish }}
+      version: ${{ steps.check-litellm.outputs.version }}
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: "3.12"
+
+      - name: Check litellm version on PyPI
+        id: check-litellm
+        run: |
+          VERSION=$(grep -m1 '^version' pyproject.toml | sed 's/version = "\(.*\)"/\1/')
+          echo "version=$VERSION" >> "$GITHUB_OUTPUT"
+          echo "Checking if litellm $VERSION exists on PyPI..."
+
+          HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://pypi.org/pypi/litellm/$VERSION/json")
+          if [ "$HTTP_STATUS" = "200" ]; then
+            echo "litellm $VERSION already exists on PyPI. Skipping publish."
+            echo "needs_publish=false" >> "$GITHUB_OUTPUT"
+          else
+            echo "litellm $VERSION not found on PyPI. Publish needed."
+            echo "needs_publish=true" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Sanity check proxy-extras version
+        run: |
+          # Read pinned version from requirements.txt
+          REQ_VERSION=$(grep -oP 'litellm-proxy-extras==\K[0-9.]+' requirements.txt)
+          if [ -z "$REQ_VERSION" ]; then
+            echo "::error::Could not find litellm-proxy-extras version in requirements.txt"
+            exit 1
+          fi
+          echo "requirements.txt pins litellm-proxy-extras==$REQ_VERSION"
+
+          # Read pinned version from pyproject.toml dependency
+          PYPROJECT_VERSION=$(python3 -c "
+          import re
+          with open('pyproject.toml') as f:
+              content = f.read()
+          match = re.search(r'litellm-proxy-extras\s*=\s*\{version\s*=\s*\"([^\"]+)\"', content)
+          if match:
+              print(match.group(1).lstrip('^~>='))
+          else:
+              import sys
+              print('::error::Could not find litellm-proxy-extras dependency in pyproject.toml', file=sys.stderr)
+              sys.exit(1)
+          ")
+          echo "pyproject.toml pins litellm-proxy-extras version: $PYPROJECT_VERSION"
+
+          # Check that both pinned versions match
+          if [ "$REQ_VERSION" != "$PYPROJECT_VERSION" ]; then
+            echo "::error::Version mismatch: requirements.txt has $REQ_VERSION but pyproject.toml has $PYPROJECT_VERSION"
+            exit 1
+          fi
+
+          # Check that the pinned version exists on PyPI
+          echo "Checking if litellm-proxy-extras $REQ_VERSION exists on PyPI..."
+          HTTP_STATUS=$(curl -s -o /dev/null -w "%{http_code}" "https://pypi.org/pypi/litellm-proxy-extras/$REQ_VERSION/json")
+          if [ "$HTTP_STATUS" != "200" ]; then
+            echo "::error::litellm-proxy-extras $REQ_VERSION is not published on PyPI yet. Publish it before releasing litellm."
+            exit 1
+          fi
+          echo "litellm-proxy-extras $REQ_VERSION exists on PyPI. Sanity check passed."
+
+  publish-litellm:
+    name: Publish litellm to PyPI
+    needs: preflight-checks
+    if: needs.preflight-checks.outputs.needs_publish == 'true'
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    permissions:
+      id-token: write
+      contents: read
+    environment: pypi-publish
+
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: "3.12"
+
+      - name: Copy model prices backup
+        run: cp model_prices_and_context_window.json litellm/model_prices_and_context_window_backup.json
+
+      - name: Install build tools
+        run: python -m pip install --upgrade pip build==1.4.2
+
+      - name: Build package
+        run: |
+          rm -rf build dist
+          python -m build
+
+      - name: Verify build artifacts
+        env:
+          EXPECTED_VERSION: ${{ needs.preflight-checks.outputs.version }}
+        run: |
+          echo "Contents of dist/:"
+          ls -la dist/
+          # Ensure we have both sdist and wheel
+          ls dist/*.tar.gz
+          ls dist/*.whl
+          # Verify built version matches expected
+          ls dist/ | grep -q "litellm-${EXPECTED_VERSION}" || {
+            echo "::error::Built artifacts do not match expected version $EXPECTED_VERSION"
+            ls dist/
+            exit 1
+          }
+
+      - name: Validate package metadata
+        run: |
+          pip install twine==6.2.0
+          twine check dist/*
+
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
--- a/.github/workflows/read_pyproject_version.yml
+++ b/.github/workflows/read_pyproject_version.yml
@ -3,7 +3,10 @@ name: Read Version from pyproject.toml
 on:
  push:
    branches:
-      - main  # Change this to the default branch of your repository
+      - main # Change this to the default branch of your repository
+
+permissions:
+  contents: read

 jobs:
  read-version:
@ -11,20 +14,14 @@ jobs:

    steps:
      - name: Checkout code
-        uses: actions/checkout@v2
-
-      - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
        with:
-          python-version: 3.8  # Adjust the Python version as needed
-
-      - name: Install dependencies
-        run: pip install toml
+          persist-credentials: false

      - name: Read version from pyproject.toml
        id: read-version
        run: |
-          version=$(python -c 'import toml; print(toml.load("pyproject.toml")["tool"]["commitizen"]["version"])')
+          version=$(grep -m1 '^version' pyproject.toml | sed 's/version = "\(.*\)"/\1/')
          printf "LITELLM_VERSION=%s" "$version" >> $GITHUB_ENV

      - name: Display version
--- a/.github/workflows/redeploy_proxy.py
+++ b/.github/workflows/redeploy_proxy.py
@ -1,20 +0,0 @@
-"""
-
-redeploy_proxy.py
-"""
-
-import os
-import requests
-import time
-
-# send a get request to this endpoint
-deploy_hook1 = os.getenv("LOAD_TEST_REDEPLOY_URL1")
-response = requests.get(deploy_hook1, timeout=20)
-
-
-deploy_hook2 = os.getenv("LOAD_TEST_REDEPLOY_URL2")
-response = requests.get(deploy_hook2, timeout=20)
-
-print("SENT GET REQUESTS to re-deploy proxy")
-print("sleeeping.... for 60s")
-time.sleep(60)
--- a/.github/workflows/regenerate-poetry-lock.yml
+++ b/.github/workflows/regenerate-poetry-lock.yml
@ -1,80 +0,0 @@
-name: Regenerate poetry.lock
-
-# Runs whenever pyproject.toml is merged into main (the most common cause of
-# the "pyproject.toml changed significantly since poetry.lock was last generated"
-# CI failure).  Can also be triggered manually.
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - pyproject.toml
-  workflow_dispatch:
-
-permissions:
-  contents: write       # needed to push the auto/regenerate-poetry-lock-* branch
-  pull-requests: write  # needed to open the PR and enable auto-merge
-
-jobs:
-  regenerate-lock:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install Poetry
-        run: pip install poetry
-
-      - name: Regenerate poetry.lock
-        run: poetry lock
-
-      - name: Check whether poetry.lock actually changed
-        id: diff
-        run: |
-          if git diff --quiet poetry.lock; then
-            echo "changed=false" >> "$GITHUB_OUTPUT"
-          else
-            echo "changed=true" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Open PR with the refreshed lock file
-        if: steps.diff.outputs.changed == 'true'
-        id: open-pr
-        run: |
-          BRANCH="auto/regenerate-poetry-lock-$(date +'%Y%m%d%H%M%S')"
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-          git checkout -b "$BRANCH"
-          git add poetry.lock
-          git commit -m "chore: regenerate poetry.lock to match pyproject.toml"
-          git push -f origin "$BRANCH"
-
-          cat > /tmp/pr-body.md << 'BODY'
-          Automated regeneration of `poetry.lock` after `pyproject.toml` was updated on `main`.
-
-          Fixes the recurring CI failure:
-          ```
-          pyproject.toml changed significantly since poetry.lock was last generated.
-          Run `poetry lock` to fix the lock file.
-          ```
-          BODY
-
-          PR_URL=$(gh pr create \
-            --title "chore: regenerate poetry.lock to match pyproject.toml" \
-            --body-file /tmp/pr-body.md \
-            --head "$BRANCH" \
-            --base main)
-          echo "pr_url=$PR_URL" >> "$GITHUB_OUTPUT"
-        env:
-          GH_TOKEN: ${{ github.token }}
-
-      - name: Enable auto-merge
-        if: steps.diff.outputs.changed == 'true'
-        run: |
-          gh pr merge "${{ steps.open-pr.outputs.pr_url }}" --auto --squash
-        env:
-          GH_TOKEN: ${{ github.token }}
--- a/.github/workflows/reset_stable.yml
+++ b/.github/workflows/reset_stable.yml
@ -1,39 +0,0 @@
-name: Reset litellm_stable branch
-
-on:
-  release:
-    types: [published, created]
-jobs:
-  update-stable-branch:
-    if: ${{ startsWith(github.event.release.tag_name, 'v') && !endsWith(github.event.release.tag_name, '-stable') }}
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v3
-
-      - name: Reset litellm_stable_release_branch branch to the release commit
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          # Configure Git user
-          git config user.name "github-actions[bot]"
-          git config user.email "github-actions[bot]@users.noreply.github.com"
-          
-          # Fetch all branches and tags
-          git fetch --all
-
-          # Check if the litellm_stable_release_branch branch exists
-          if git show-ref --verify --quiet refs/remotes/origin/litellm_stable_release_branch; then
-            echo "litellm_stable_release_branch branch exists."
-            git checkout litellm_stable_release_branch
-          else
-            echo "litellm_stable_release_branch branch does not exist. Creating it."
-            git checkout -b litellm_stable_release_branch
-          fi
-
-          # Reset litellm_stable_release_branch branch to the release commit
-          git reset --hard $GITHUB_SHA
-
-          # Push the updated litellm_stable_release_branch branch
-          git push origin litellm_stable_release_branch --force
--- a/.github/workflows/run_observatory_tests.yml
+++ b/.github/workflows/run_observatory_tests.yml
@ -33,7 +33,9 @@ jobs:
    timeout-minutes: 30
    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false

      - name: Validate tag input
        env:
@ -49,11 +51,12 @@ jobs:
          TAG: ${{ inputs.tag }}
          AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }}
          AZURE_API_BASE: ${{ secrets.AZURE_API_BASE }}
+          WORKSPACE: ${{ github.workspace }}
        run: |
          docker run -d \
            --name litellm-rc \
            -p 4000:4000 \
-            -v "${{ github.workspace }}/.github/observatory/litellm_config.yaml:/app/config.yaml" \
+            -v "${WORKSPACE}/.github/observatory/litellm_config.yaml:/app/config.yaml" \
            -e LITELLM_MASTER_KEY="${LITELLM_MASTER_KEY}" \
            -e AZURE_API_KEY="${AZURE_API_KEY}" \
            -e AZURE_API_BASE="${AZURE_API_BASE}" \
@ -77,8 +80,9 @@ jobs:

      - name: Start cloudflared tunnel
        run: |
-          # Install cloudflared
+          # Install cloudflared (pinned version + checksum)
          curl -sL https://github.com/cloudflare/cloudflared/releases/download/2025.2.1/cloudflared-linux-amd64 -o /usr/local/bin/cloudflared
+          echo "afdfadd1ef552e66bffc35246fe30a9bd578356d2d386de95585ccfc432472b8  /usr/local/bin/cloudflared" | sha256sum -c -
          chmod +x /usr/local/bin/cloudflared

          # Start a quick tunnel (no account needed) and capture the URL
@ -103,11 +107,11 @@ jobs:

      - name: Verify tunnel connectivity
        run: |
-          echo "Testing tunnel at ${{ env.TUNNEL_URL }}..."
+          echo "Testing tunnel at ${TUNNEL_URL}..."
          # Quick tunnels need time for DNS propagation; retry to avoid
          # transient NXDOMAIN (curl exit code 6) on first attempt.
          for i in $(seq 1 10); do
-            if curl -sf "${{ env.TUNNEL_URL }}/health/liveliness" > /dev/null 2>&1; then
+            if curl -sf "${TUNNEL_URL}/health/liveliness" > /dev/null 2>&1; then
              echo "Tunnel is working (attempt $i)"
              exit 0
            fi
@ -221,5 +225,5 @@ jobs:
      - name: Cleanup
        if: always()
        run: |
-          kill "${{ env.CLOUDFLARED_PID }}" 2>/dev/null || true
+          kill "$CLOUDFLARED_PID" 2>/dev/null || true
          docker rm -f litellm-rc 2>/dev/null || true
--- a/.github/workflows/scan_duplicate_issues.yml
+++ b/.github/workflows/scan_duplicate_issues.yml
@ -21,14 +21,15 @@ jobs:
      contents: read
    steps:
      - name: Checkout scripts
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
        with:
          sparse-checkout: .github/scripts
+          persist-credentials: false

      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
        with:
-          python-version: "3.11"
+          python-version: "3.13"

      - name: Scan for duplicate issues
        env:
--- a/.github/workflows/scorecard.yml
+++ b/.github/workflows/scorecard.yml
@ -0,0 +1,47 @@
+name: Scorecard supply-chain security
+
+on:
+  branch_protection_rule:
+  schedule:
+    - cron: '27 12 * * 4'
+  push:
+    branches: ["main"]
+
+permissions: read-all
+
+jobs:
+  analysis:
+    name: Scorecard analysis
+    runs-on: ubuntu-latest
+    if: github.event.repository.default_branch == github.ref_name
+    permissions:
+      security-events: write
+      id-token: write
+      # Uncomment for private repos if needed:
+      # contents: read
+      # actions: read
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
+
+      - name: Run analysis
+        uses: ossf/scorecard-action@f49aabe0b5af0936a0987cfb85d86b75731b0186 # v2.4.1
+        with:
+          results_file: results.sarif
+          results_format: sarif
+          publish_results: true
+
+      - name: Upload artifact
+        uses: actions/upload-artifact@4cec3d8aa04e39d1a68397de0c4cd6fb9dce8ec1 # v4.6.1
+        with:
+          name: SARIF file
+          path: results.sarif
+          retention-days: 5
+
+      - name: Upload to code scanning
+        uses: github/codeql-action/upload-sarif@c10b806170c8ee63ea24152429041b5624f0baf5 # v4.35.1
+        with:
+          sarif_file: results.sarif
--- a/.github/workflows/simple_pypi_publish.yml
+++ b/.github/workflows/simple_pypi_publish.yml
@ -1,67 +0,0 @@
-name: Simple PyPI Publish
-
-on:
-  workflow_dispatch:
-    inputs:
-      version:
-        description: 'Version to publish (e.g., 1.74.10)'
-        required: true
-        type: string
-
-env:
-  TWINE_USERNAME: __token__
-
-jobs:
-  publish:
-    runs-on: ubuntu-latest
-    if: github.repository == 'BerriAI/litellm'
-    
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: '3.8'
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install toml build wheel twine
-
-      - name: Update version in pyproject.toml
-        run: |
-          python -c "
-          import toml
-          
-          with open('pyproject.toml', 'r') as f:
-              data = toml.load(f)
-          
-          data['tool']['poetry']['version'] = '${{ github.event.inputs.version }}'
-          
-          with open('pyproject.toml', 'w') as f:
-              toml.dump(data, f)
-          
-          print(f'Updated version to ${{ github.event.inputs.version }}')
-          "
-
-      - name: Copy model prices file
-        run: |
-          cp model_prices_and_context_window.json litellm/model_prices_and_context_window_backup.json
-
-      - name: Build package
-        run: |
-          rm -rf build dist
-          python -m build
-
-      - name: Publish to PyPI
-        env:
-          TWINE_PASSWORD: ${{ secrets.PYPI_PUBLISH_PASSWORD }}
-        run: |
-          twine upload dist/*
-
-      - name: Output success
-        run: |
-          echo "✅ Successfully published litellm v${{ github.event.inputs.version }} to PyPI"
-          echo "📦 Package: https://pypi.org/project/litellm/${{ github.event.inputs.version }}/" 
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@ -2,19 +2,24 @@ name: "Stale Issue Management"

 on:
  schedule:
-    - cron: '0 0 * * *' # Runs daily at midnight UTC
+    - cron: "0 0 * * *" # Runs daily at midnight UTC
  workflow_dispatch:

+permissions:
+  issues: write
+  pull-requests: write
+
 jobs:
  stale:
+    if: github.repository == 'BerriAI/litellm'
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/stale@v8
+      - uses: actions/stale@1160a2240286f5da8ec72b1c0816ce2481aabf84 # v8
        with:
          repo-token: "${{ secrets.GITHUB_TOKEN }}"
          stale-issue-message: "This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs."
          stale-pr-message: "This pull request has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs."
-          days-before-stale: 90        # Revert to 60 days
-          days-before-close: 7         # Revert to 7 days
+          days-before-stale: 90 # Revert to 60 days
+          days-before-close: 7 # Revert to 7 days
          stale-issue-label: "stale"
-          operations-per-run: 1000
+          operations-per-run: 1000
--- a/.github/workflows/sync-schema.yml
+++ b/.github/workflows/sync-schema.yml
@ -0,0 +1,73 @@
+name: Sync schema.prisma copies
+
+on:
+  pull_request:
+    paths:
+      - 'schema.prisma'
+
+# Scoped to ONLY the permissions needed:
+# - contents:write to push the sync commit to the PR branch
+# - pull-requests:read is implicit (needed to check out the PR)
+permissions:
+  contents: write
+
+jobs:
+  sync:
+    name: Copy root schema to proxy and proxy-extras
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    # Only run on PRs from branches in THIS repo (not forks).
+    # Fork PRs cannot push back to the head branch with GITHUB_TOKEN,
+    # and pull_request events from forks have read-only tokens anyway.
+    # Also reject PRs from branches named after protected branches to
+    # prevent pushing directly to main/master.
+    if: >-
+      github.event.pull_request.head.repo.full_name == github.repository
+      && github.head_ref != 'main'
+      && github.head_ref != 'master'
+    steps:
+      - name: Checkout PR branch by SHA
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          # Use the merge commit SHA for safety — github.head_ref is an
+          # attacker-controlled string (the branch name) and could contain
+          # unusual characters that cause unexpected git behavior.
+          ref: ${{ github.event.pull_request.head.sha }}
+          persist-credentials: true  # needed for git push
+
+      - name: Reject symlinked schema files
+        run: |
+          for f in schema.prisma litellm/proxy/schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma; do
+            if [ -L "$f" ]; then
+              echo "::error file=$f::$f is a symlink, which is not allowed"
+              exit 1
+            fi
+          done
+
+      - name: Copy root schema to other locations
+        run: |
+          cp schema.prisma litellm/proxy/schema.prisma
+          cp schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma
+
+      - name: Check for changes
+        id: diff
+        run: |
+          if git diff --quiet -- litellm/proxy/schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma; then
+            echo "changed=false" >> "$GITHUB_OUTPUT"
+            echo "Schemas already in sync. Nothing to do."
+          else
+            echo "changed=true" >> "$GITHUB_OUTPUT"
+            echo "Schema copies need updating."
+          fi
+
+      - name: Commit synced schemas
+        if: steps.diff.outputs.changed == 'true'
+        run: |
+          # Push to the PR's head branch (need the branch name for git push).
+          # We checked out by SHA above for safety, so configure the push target explicitly.
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git checkout -B "$GITHUB_HEAD_REF"
+          git add -- litellm/proxy/schema.prisma litellm-proxy-extras/litellm_proxy_extras/schema.prisma
+          git commit -m "chore: sync schema.prisma copies from root"
+          git push origin "HEAD:$GITHUB_HEAD_REF"
--- a/.github/workflows/test-linting.yml
+++ b/.github/workflows/test-linting.yml
@ -2,7 +2,10 @@ name: LiteLLM Linting

 on:
  pull_request:
-    branches: [ main ]
+    branches: [main]
+
+permissions:
+  contents: read

 jobs:
  lint:
@ -10,69 +13,73 @@ jobs:
    timeout-minutes: 5

    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        clean: true
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          fetch-depth: 0
+          clean: true
+          persist-credentials: false

-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.12'
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: "3.12"

-    - name: Install Poetry
-      uses: snok/install-poetry@v1
+      - name: Install Poetry
+        run: pip install 'poetry==2.3.2'

-    - name: Clean Python cache
-      run: |
-        find . -type d -name "__pycache__" -exec rm -rf {} + || true
-        find . -name "*.pyc" -delete || true
+      - name: Clean Python cache
+        run: |
+          find . -type d -name "__pycache__" -exec rm -rf {} + || true
+          find . -name "*.pyc" -delete || true

-    - name: Install dependencies
-      run: |
-        poetry lock
-        poetry install --with dev
+      - name: Check poetry.lock is up to date
+        run: |
+          poetry check --lock || (echo "❌ poetry.lock is out of sync with pyproject.toml. Run 'poetry lock' locally and commit the result." && exit 1)

-    - name: Check Black formatting
-      run: |
-        cd litellm
-        poetry run black --check --exclude '/enterprise/' .
-        cd ..
+      - name: Install dependencies
+        run: |
+          poetry install --with dev

-    - name: Debug - Check file state
-      run: |
-        echo "Current branch:"
-        git branch --show-current
-        echo "Last 3 commits:"
-        git log --oneline -3
-        echo "File content around line 43:"
-        head -50 litellm/litellm_core_utils/custom_logger_registry.py | tail -10
-        
-    - name: Run Ruff linting
-      run: |
-        cd litellm
-        poetry run ruff check .
-        cd ..
+      - name: Check Black formatting
+        run: |
+          cd litellm
+          poetry run black --check --exclude '/enterprise/' .
+          cd ..

-    - name: Print OpenAI version
-      run: |
-        poetry run python -c "import openai; print(f'OpenAI version: {openai.__version__}')"
+      - name: Debug - Check file state
+        run: |
+          echo "Current branch:"
+          git branch --show-current
+          echo "Last 3 commits:"
+          git log --oneline -3
+          echo "File content around line 43:"
+          head -50 litellm/litellm_core_utils/custom_logger_registry.py | tail -10

-    - name: Run MyPy type checking
-      run: |
-        cd litellm
-        poetry run mypy . 
-        cd ..
+      - name: Run Ruff linting
+        run: |
+          cd litellm
+          poetry run ruff check .
+          cd ..

-    - name: Check for circular imports
-      run: |
-        cd litellm
-        poetry run python ../tests/documentation_tests/test_circular_imports.py
-        cd ..
+      - name: Print OpenAI version
+        run: |
+          poetry run python -c "import openai; print(f'OpenAI version: {openai.__version__}')"

-    - name: Check import safety
-      run: |
-        poetry run python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)
+      - name: Run MyPy type checking
+        run: |
+          cd litellm
+          poetry run mypy . 
+          cd ..
+
+      - name: Check for circular imports
+        run: |
+          cd litellm
+          poetry run python ../tests/documentation_tests/test_circular_imports.py
+          cd ..
+
+      - name: Check import safety
+        run: |
+          poetry run python -c "from litellm import *" || (echo '🚨 import failed, this means you introduced unprotected imports! 🚨'; exit 1)

  secret-scan:
    runs-on: ubuntu-latest
@ -81,27 +88,28 @@ jobs:
      contents: read

    steps:
-    - uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          fetch-depth: 0
+          persist-credentials: false

-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.12'
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: "3.12"

-    - name: Run secret scan test
-      run: |
-        pip install pytest
-        pytest tests/litellm/test_no_hardcoded_secrets.py -v
+      - name: Run secret scan test
+        run: |
+          pip install 'pytest==9.0.2'
+          pytest tests/litellm/test_no_hardcoded_secrets.py -v

-    - name: Run ggshield secret scan
-      env:
-        GITGUARDIAN_API_KEY: ${{ secrets.GITGUARDIAN_API_KEY }}
-      run: |
-        if [ -n "$GITGUARDIAN_API_KEY" ]; then
-          pip install ggshield
-          ggshield secret scan repo .
-        else
-          echo "GITGUARDIAN_API_KEY not set, skipping ggshield scan"
-        fi
+      - name: Run ggshield secret scan
+        env:
+          GITGUARDIAN_API_KEY: ${{ secrets.GITGUARDIAN_API_KEY }}
+        run: |
+          if [ -n "$GITGUARDIAN_API_KEY" ]; then
+            pip install 'ggshield==1.48.0'
+            ggshield secret scan repo .
+          else
+            echo "GITGUARDIAN_API_KEY not set, skipping ggshield scan"
+          fi
--- a/.github/workflows/test-litellm-matrix.yml
+++ b/.github/workflows/test-litellm-matrix.yml
@ -4,6 +4,9 @@ on:
  pull_request:
    branches: [main]

+permissions:
+  contents: read
+
 # Cancel in-progress runs for the same PR
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
@ -12,7 +15,7 @@ concurrency:
 jobs:
  test:
    runs-on: ubuntu-latest
-    timeout-minutes: 20  # Increased from 15 to 20
+    timeout-minutes: 20 # Increased from 15 to 20
    strategy:
      fail-fast: false
      matrix:
@ -43,7 +46,7 @@ jobs:
          - name: "integrations"
            path: "tests/test_litellm/integrations"
            workers: 2
-            reruns: 3  # Integration tests tend to be flakier
+            reruns: 3 # Integration tests tend to be flakier
          - name: "core-utils"
            path: "tests/test_litellm/litellm_core_utils"
            workers: 2
@ -117,18 +120,20 @@ jobs:
    name: test (${{ matrix.test-group.name }})

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false

      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
        with:
          python-version: "3.12"

      - name: Install Poetry
-        uses: snok/install-poetry@v1
+        run: pip install 'poetry==2.3.2'

      - name: Cache Poetry dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.0.0
        with:
          path: |
            ~/.cache/pypoetry
@ -144,14 +149,17 @@ jobs:
          poetry install --with dev,proxy-dev --extras "proxy semantic-router"
          # pytest-rerunfailures and pytest-xdist are in pyproject.toml dev dependencies
          poetry run pip install google-genai==1.22.0 \
-            google-cloud-aiplatform>=1.38 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core
+            google-cloud-aiplatform==1.115.0 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core==0.23.0

      - name: Setup litellm-enterprise
        run: |
          poetry run pip install --force-reinstall --no-deps -e enterprise/

      - name: Generate Prisma client
+        env:
+          PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
        run: |
+          poetry run pip install nodejs-wheel-binaries==24.13.1
          poetry run prisma generate --schema litellm/proxy/schema.prisma

      - name: Run tests - ${{ matrix.test-group.name }}
--- a/.github/workflows/test-litellm-ui-build.yml
+++ b/.github/workflows/test-litellm-ui-build.yml
@ -16,10 +16,12 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false

      - name: Setup Node.js
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@a0853c24544627f65ddf259abe73b1d18a591444 # v5.0
        with:
          node-version: "20"
          cache: "npm"
--- a/.github/workflows/test-litellm.yml
+++ b/.github/workflows/test-litellm.yml
@ -4,45 +4,50 @@ name: LiteLLM Mock Tests (folder - tests/test_litellm)
 # the same tests in parallel across 10 jobs for faster CI times.
 # Kept for manual debugging only.
 on:
-  workflow_dispatch:  # Manual trigger only
+  workflow_dispatch: # Manual trigger only
  # pull_request:
  #   branches: [ main ]

+permissions:
+  contents: read
+
 jobs:
  test:
    runs-on: ubuntu-latest
    timeout-minutes: 25

    steps:
-    - uses: actions/checkout@v4
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false

-    - name: Thank You Message
-      run: |
-        echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
-        echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY
+      - name: Thank You Message
+        run: |
+          echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
+          echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY

-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.12'
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: "3.12"

-    - name: Install Poetry
-      uses: snok/install-poetry@v1
+      - name: Install Poetry
+        run: pip install 'poetry==2.3.2'

-    - name: Install dependencies
-      run: |
-        poetry lock
-        poetry install --with dev,proxy-dev --extras "proxy semantic-router"
-        poetry run pip install "pytest-retry==1.6.3"
-        poetry run pip install pytest-xdist
-        poetry run pip install "google-genai==1.22.0"
-        poetry run pip install "google-cloud-aiplatform>=1.38"
-        poetry run pip install "fastapi-offline==1.7.3"
-        poetry run pip install "python-multipart>=0.0.20"
-        poetry run pip install "openapi-core"
-    - name: Setup litellm-enterprise as local package
-      run: |
-        poetry run pip install --force-reinstall --no-deps -e enterprise/
-    - name: Run tests
-      run: |
-        poetry run pytest tests/test_litellm --tb=short -vv --maxfail=10 -n 4 --durations=50
+      - name: Install dependencies
+        run: |
+          poetry lock
+          poetry install --with dev,proxy-dev --extras "proxy semantic-router"
+          poetry run pip install "pytest-retry==1.6.3"
+          poetry run pip install 'pytest-xdist==3.8.0'
+          poetry run pip install "google-genai==1.22.0"
+          poetry run pip install "google-cloud-aiplatform==1.115.0"
+          poetry run pip install "fastapi-offline==1.7.3"
+          poetry run pip install "python-multipart==0.0.22"
+          poetry run pip install "openapi-core==0.23.0"
+      - name: Setup litellm-enterprise as local package
+        run: |
+          poetry run pip install --force-reinstall --no-deps -e enterprise/
+      - name: Run tests
+        run: |
+          poetry run pytest tests/test_litellm --tb=short -vv --maxfail=10 -n 4 --durations=50
--- a/.github/workflows/test-mcp.yml
+++ b/.github/workflows/test-mcp.yml
@ -2,7 +2,10 @@ name: LiteLLM MCP Tests (folder - tests/mcp_tests)

 on:
  pull_request:
-    branches: [ main ]
+    branches: [main]
+
+permissions:
+  contents: read

 jobs:
  test:
@ -10,38 +13,40 @@ jobs:
    timeout-minutes: 25

    steps:
-    - uses: actions/checkout@v4
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false

-    - name: Thank You Message
-      run: |
-        echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
-        echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY
+      - name: Thank You Message
+        run: |
+          echo "### 🙏 Thank you for contributing to LiteLLM!" >> $GITHUB_STEP_SUMMARY
+          echo "Your PR is being tested now. We appreciate your help in making LiteLLM better!" >> $GITHUB_STEP_SUMMARY

-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.12'
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: "3.12"

-    - name: Install Poetry
-      uses: snok/install-poetry@v1
+      - name: Install Poetry
+        run: pip install 'poetry==2.3.2'

-    - name: Install dependencies
-      run: |
-        poetry lock
-        poetry install --with dev,proxy-dev --extras "proxy semantic-router"
-        poetry run pip install "pytest==7.3.1"
-        poetry run pip install "pytest-retry==1.6.3"
-        poetry run pip install "pytest-cov==5.0.0"
-        poetry run pip install "pytest-asyncio==0.21.1"
-        poetry run pip install "respx==0.22.0"
-        poetry run pip install "pydantic==2.11.0"
-        poetry run pip install "mcp==1.25.0"
-        poetry run pip install pytest-xdist
+      - name: Install dependencies
+        run: |
+          poetry lock
+          poetry install --with dev,proxy-dev --extras "proxy semantic-router"
+          poetry run pip install "pytest==7.3.1"
+          poetry run pip install "pytest-retry==1.6.3"
+          poetry run pip install "pytest-cov==5.0.0"
+          poetry run pip install "pytest-asyncio==0.21.1"
+          poetry run pip install "respx==0.22.0"
+          poetry run pip install "pydantic==2.11.0"
+          poetry run pip install "mcp==1.25.0"
+          poetry run pip install 'pytest-xdist==3.8.0'

-    - name: Setup litellm-enterprise as local package
-      run: |
-        poetry run pip install --force-reinstall --no-deps -e enterprise/
+      - name: Setup litellm-enterprise as local package
+        run: |
+          poetry run pip install --force-reinstall --no-deps -e enterprise/

-    - name: Run MCP tests
-      run: |
-        poetry run pytest tests/mcp_tests -x -vv -n 4 --cov=litellm --cov-report=xml --durations=5
+      - name: Run MCP tests
+        run: |
+          poetry run pytest tests/mcp_tests -x -vv -n 4 --cov=litellm --cov-report=xml --durations=5
--- a/.github/workflows/test-model-map.yaml
+++ b/.github/workflows/test-model-map.yaml
@ -2,13 +2,18 @@ name: Validate model_prices_and_context_window.json

 on:
  pull_request:
-    branches: [ main ]
+    branches: [main]
+
+permissions:
+  contents: read

 jobs:
  validate-model-prices-json:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false

      - name: Validate model_prices_and_context_window.json
        run: |
--- a/.github/workflows/test-proxy-e2e-azure-batches.yml
+++ b/.github/workflows/test-proxy-e2e-azure-batches.yml
@ -9,6 +9,9 @@ concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

+permissions:
+  contents: read
+
 jobs:
  proxy_e2e_azure_batches_tests:
    runs-on: ubuntu-latest
@ -30,18 +33,20 @@ jobs:
          --health-retries 5

    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false

      - name: Set up Python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
        with:
          python-version: "3.12"

      - name: Install Poetry
-        uses: snok/install-poetry@v1
+        run: pip install 'poetry==2.3.2'

      - name: Cache Poetry dependencies
-        uses: actions/cache@v4
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.0.0
        with:
          path: |
            ~/.cache/pypoetry
@ -56,14 +61,17 @@ jobs:
        run: |
          poetry config virtualenvs.in-project true
          poetry install --with dev,proxy-dev --extras "proxy"
-          poetry run pip install psycopg2-binary uvicorn fastapi httpx tenacity
+          poetry run pip install psycopg2-binary==2.9.11 uvicorn==0.42.0 fastapi==0.135.2 httpx==0.28.1 tenacity==9.1.4

      - name: Setup litellm-enterprise
        run: |
          poetry run pip install --force-reinstall --no-deps -e enterprise/

      - name: Generate Prisma client
+        env:
+          PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
        run: |
+          poetry run pip install nodejs-wheel-binaries==24.13.1
          poetry run prisma generate --schema litellm/proxy/schema.prisma

      - name: Run Prisma migrations
@ -87,4 +95,3 @@ jobs:
            --tb=short \
            --maxfail=3 \
            --durations=10
-
--- a/.github/workflows/test-unit-caching-redis.yml
+++ b/.github/workflows/test-unit-caching-redis.yml
@ -0,0 +1,38 @@
+name: "Unit Tests: Caching (Redis)"
+
+# Uses cloud Redis credentials — only runs on trusted branches, not PRs.
+# This prevents external PRs from accessing Redis credentials.
+on:
+  push:
+    branches: [main, "litellm_*"]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  caching-redis:
+    uses: ./.github/workflows/_test-unit-services-base.yml
+    with:
+      # Redis-only tests that do NOT require provider API keys.
+      # Tests needing API keys (test_caching.py, test_caching_ssl.py, test_prometheus_service.py,
+      # test_router_caching.py) are in Phase 3 integration workflows.
+      test-path: >-
+        tests/local_testing/test_dual_cache.py
+        tests/local_testing/test_redis_batch_optimizations.py
+        tests/local_testing/test_router_utils.py
+      workers: 2
+      reruns: 2
+      timeout-minutes: 20
+      enable-redis: true
+      enable-postgres: false
+    secrets:
+      REDIS_HOST: ${{ secrets.REDIS_HOST }}
+      REDIS_PORT: ${{ secrets.REDIS_PORT }}
+      REDIS_PASSWORD: ${{ secrets.REDIS_PASSWORD }}
+      DATABASE_URL: ${{ secrets.DATABASE_URL }}
+      POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
+      POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
--- a/.github/workflows/test-unit-core-utils.yml
+++ b/.github/workflows/test-unit-core-utils.yml
@ -0,0 +1,20 @@
+name: "Unit Tests: Core Utilities"
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  core-utils:
+    uses: ./.github/workflows/_test-unit-base.yml
+    with:
+      test-path: "tests/test_litellm/litellm_core_utils"
+      workers: 2
+      reruns: 1
--- a/.github/workflows/test-unit-documentation.yml
+++ b/.github/workflows/test-unit-documentation.yml
@ -0,0 +1,67 @@
+name: "Unit Tests: Documentation Validation"
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  documentation:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+
+    steps:
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false
+
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: "3.12"
+
+      - name: Install Poetry
+        run: pip install 'poetry==2.3.2'
+
+      - name: Cache Poetry dependencies
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.cache/pip
+            .venv
+          key: ${{ runner.os }}-poetry-${{ hashFiles('poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
+
+      - name: Install dependencies
+        run: |
+          poetry config virtualenvs.in-project true
+          poetry install --with dev,proxy-dev --extras "proxy semantic-router"
+          poetry run pip install google-genai==1.22.0 \
+            google-cloud-aiplatform==1.115.0 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core==0.23.0
+
+      - name: Setup litellm-enterprise
+        run: |
+          poetry run pip install --force-reinstall --no-deps -e enterprise/
+
+      - name: Generate Prisma client
+        env:
+          PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
+        run: |
+          poetry run pip install nodejs-wheel-binaries==24.13.1
+          poetry run prisma generate --schema litellm/proxy/schema.prisma
+
+      # Run the same documentation tests that CircleCI ran (as direct Python scripts)
+      - name: Run documentation validation tests
+        run: |
+          poetry run python ./tests/documentation_tests/test_env_keys.py
+          poetry run python ./tests/documentation_tests/test_router_settings.py
+          poetry run python ./tests/documentation_tests/test_api_docs.py
+          poetry run python ./tests/documentation_tests/test_circular_imports.py
--- a/.github/workflows/test-unit-enterprise-routing.yml
+++ b/.github/workflows/test-unit-enterprise-routing.yml
@ -0,0 +1,24 @@
+name: "Unit Tests: Enterprise, Google GenAI & Routing"
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  enterprise-routing:
+    uses: ./.github/workflows/_test-unit-base.yml
+    with:
+      test-path: >-
+        tests/test_litellm/enterprise
+        tests/test_litellm/google_genai
+        tests/test_litellm/router_utils
+        tests/test_litellm/router_strategy
+      workers: 2
+      reruns: 2
--- a/.github/workflows/test-unit-integrations.yml
+++ b/.github/workflows/test-unit-integrations.yml
@ -0,0 +1,20 @@
+name: "Unit Tests: Integrations (Callbacks & Logging)"
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  integrations:
+    uses: ./.github/workflows/_test-unit-base.yml
+    with:
+      test-path: "tests/test_litellm/integrations"
+      workers: 2
+      reruns: 3
--- a/.github/workflows/test-unit-llm-providers.yml
+++ b/.github/workflows/test-unit-llm-providers.yml
@ -0,0 +1,29 @@
+name: "Unit Tests: LLM Provider Transformations"
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  vertex-ai:
+    name: Vertex AI
+    uses: ./.github/workflows/_test-unit-base.yml
+    with:
+      test-path: "tests/test_litellm/llms/vertex_ai"
+      workers: 1
+      reruns: 2
+
+  other-providers:
+    name: All Other Providers
+    uses: ./.github/workflows/_test-unit-base.yml
+    with:
+      test-path: "tests/test_litellm/llms --ignore=tests/test_litellm/llms/vertex_ai"
+      workers: 2
+      reruns: 2
--- a/.github/workflows/test-unit-misc.yml
+++ b/.github/workflows/test-unit-misc.yml
@ -0,0 +1,31 @@
+name: "Unit Tests: MCP, Secrets, Containers & Misc"
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  misc:
+    uses: ./.github/workflows/_test-unit-base.yml
+    with:
+      test-path: >-
+        tests/test_litellm/secret_managers
+        tests/test_litellm/a2a_protocol
+        tests/test_litellm/anthropic_interface
+        tests/test_litellm/completion_extras
+        tests/test_litellm/containers
+        tests/test_litellm/experimental_mcp_client
+        tests/test_litellm/images
+        tests/test_litellm/interactions
+        tests/test_litellm/passthrough
+        tests/test_litellm/vector_stores
+        tests/test_litellm/test_*.py
+      workers: 2
+      reruns: 2
--- a/.github/workflows/test-unit-proxy-auth.yml
+++ b/.github/workflows/test-unit-proxy-auth.yml
@ -0,0 +1,20 @@
+name: "Unit Tests: Proxy Auth & Key Management"
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  proxy-auth:
+    uses: ./.github/workflows/_test-unit-base.yml
+    with:
+      test-path: "tests/test_litellm/proxy/auth tests/test_litellm/proxy/hooks tests/test_litellm/proxy/policy_engine tests/test_litellm/proxy/client"
+      workers: 2
+      reruns: 2
--- a/.github/workflows/test-unit-proxy-db.yml
+++ b/.github/workflows/test-unit-proxy-db.yml
@ -0,0 +1,45 @@
+name: "Unit Tests: Proxy DB Operations"
+
+# Uses DATABASE_URL secret — only runs on trusted branches, not PRs.
+on:
+  push:
+    branches: [main, "litellm_*"]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  proxy-db:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # Key generation tests must NOT run in parallel (event loop conflicts with logging worker)
+          - test-group: key-generation
+            test-path: "tests/proxy_unit_tests/test_key_generate_prisma.py"
+            workers: 0
+            timeout: 30
+          - test-group: auth-checks
+            test-path: "tests/proxy_unit_tests/test_auth_checks.py tests/proxy_unit_tests/test_user_api_key_auth.py"
+            workers: 8
+            timeout: 20
+          - test-group: remaining
+            test-path: "tests/proxy_unit_tests --ignore=tests/proxy_unit_tests/test_key_generate_prisma.py --ignore=tests/proxy_unit_tests/test_auth_checks.py --ignore=tests/proxy_unit_tests/test_user_api_key_auth.py"
+            workers: 8
+            timeout: 20
+    uses: ./.github/workflows/_test-unit-services-base.yml
+    with:
+      test-path: ${{ matrix.test-path }}
+      workers: ${{ matrix.workers }}
+      reruns: 2
+      timeout-minutes: ${{ matrix.timeout }}
+      enable-redis: false
+      enable-postgres: true
+    secrets:
+      DATABASE_URL: ${{ secrets.DATABASE_URL }}
+      POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
+      POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
--- a/.github/workflows/test-unit-proxy-endpoints.yml
+++ b/.github/workflows/test-unit-proxy-endpoints.yml
@ -0,0 +1,35 @@
+name: "Unit Tests: Proxy API Endpoints"
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  proxy-endpoints:
+    uses: ./.github/workflows/_test-unit-base.yml
+    with:
+      test-path: >-
+        tests/test_litellm/proxy/management_endpoints
+        tests/test_litellm/proxy/guardrails
+        tests/test_litellm/proxy/management_helpers
+        tests/test_litellm/proxy/anthropic_endpoints
+        tests/test_litellm/proxy/google_endpoints
+        tests/test_litellm/proxy/openai_files_endpoint
+        tests/test_litellm/proxy/response_api_endpoints
+        tests/test_litellm/proxy/image_endpoints
+        tests/test_litellm/proxy/vector_store_endpoints
+        tests/test_litellm/proxy/agent_endpoints
+        tests/test_litellm/proxy/discovery_endpoints
+        tests/test_litellm/proxy/health_endpoints
+        tests/test_litellm/proxy/public_endpoints
+        tests/test_litellm/proxy/prompts
+        tests/test_litellm/proxy/ui_crud_endpoints
+      workers: 2
+      reruns: 2
--- a/.github/workflows/test-unit-proxy-infra.yml
+++ b/.github/workflows/test-unit-proxy-infra.yml
@ -0,0 +1,28 @@
+name: "Unit Tests: Proxy Infrastructure"
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  proxy-infra:
+    uses: ./.github/workflows/_test-unit-base.yml
+    with:
+      test-path: >-
+        tests/test_litellm/proxy/db
+        tests/test_litellm/proxy/middleware
+        tests/test_litellm/proxy/spend_tracking
+        tests/test_litellm/proxy/pass_through_endpoints
+        tests/test_litellm/proxy/_experimental
+        tests/test_litellm/proxy/experimental
+        tests/test_litellm/proxy/common_utils
+        tests/test_litellm/proxy/test_*.py
+      workers: 2
+      reruns: 2
--- a/.github/workflows/test-unit-proxy-legacy.yml
+++ b/.github/workflows/test-unit-proxy-legacy.yml
@ -0,0 +1,96 @@
+name: "Unit Tests: Proxy Legacy Tests"
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    strategy:
+      fail-fast: false
+      matrix:
+        test-group:
+          - name: "auth-and-jwt"
+            path: "tests/proxy_unit_tests/test_[a-j]*.py"
+          - name: "key-generation"
+            path: "tests/proxy_unit_tests/test_[k-o]*.py"
+          - name: "proxy-config"
+            path: "tests/proxy_unit_tests/test_prisma*.py tests/proxy_unit_tests/test_project*.py tests/proxy_unit_tests/test_prompt*.py tests/proxy_unit_tests/test_proxy_[c-r]*.py"
+          - name: "proxy-server"
+            path: "tests/proxy_unit_tests/test_proxy_server.py"
+          - name: "proxy-server-extras"
+            path: "tests/proxy_unit_tests/test_proxy_server_*.py tests/proxy_unit_tests/test_proxy_setting_guardrails.py"
+          - name: "proxy-utils"
+            path: "tests/proxy_unit_tests/test_proxy_utils.py"
+          - name: "proxy-token-counter"
+            path: "tests/proxy_unit_tests/test_proxy_token_counter.py"
+          - name: "proxy-response-and-misc"
+            path: "tests/proxy_unit_tests/test_[r-t]*.py"
+          - name: "proxy-user-auth-and-spend"
+            path: "tests/proxy_unit_tests/test_[u-z]*.py"
+
+    name: ${{ matrix.test-group.name }}
+
+    steps:
+      - uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false
+
+      - name: Set up Python
+        uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+        with:
+          python-version: "3.12"
+
+      - name: Install Poetry
+        run: pip install 'poetry==2.3.2'
+
+      - name: Cache Poetry dependencies
+        uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.cache/pip
+            .venv
+          key: ${{ runner.os }}-poetry-${{ hashFiles('poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
+
+      - name: Install dependencies
+        run: |
+          poetry config virtualenvs.in-project true
+          poetry install --with dev,proxy-dev --extras "proxy semantic-router"
+          poetry run pip install google-genai==1.22.0 \
+            google-cloud-aiplatform==1.115.0 fastapi-offline==1.7.3 python-multipart==0.0.22 openapi-core==0.23.0
+
+      - name: Setup litellm-enterprise
+        run: |
+          poetry run pip install --force-reinstall --no-deps -e enterprise/
+
+      - name: Generate Prisma client
+        env:
+          PRISMA_BINARY_CACHE_DIR: ${{ runner.temp }}/prisma-cache
+        run: |
+          poetry run pip install nodejs-wheel-binaries==24.13.1
+          poetry run prisma generate --schema litellm/proxy/schema.prisma
+
+      - name: Run tests - ${{ matrix.test-group.name }}
+        env:
+          TEST_PATH: ${{ matrix.test-group.path }}
+        run: |
+          poetry run pytest ${TEST_PATH} \
+            --tb=short -vv \
+            --maxfail=10 \
+            -n 2 \
+            --reruns 1 \
+            --reruns-delay 1 \
+            --dist=loadscope \
+            --durations=20
--- a/.github/workflows/test-unit-responses-caching-types.yml
+++ b/.github/workflows/test-unit-responses-caching-types.yml
@ -0,0 +1,20 @@
+name: "Unit Tests: Responses, Caching & Types"
+
+on:
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  responses-caching-types:
+    uses: ./.github/workflows/_test-unit-base.yml
+    with:
+      test-path: "tests/test_litellm/responses tests/test_litellm/caching tests/test_litellm/types"
+      workers: 2
+      reruns: 2
--- a/.github/workflows/test-unit-security.yml
+++ b/.github/workflows/test-unit-security.yml
@ -0,0 +1,28 @@
+name: "Unit Tests: Security"
+
+# Uses DATABASE_URL secret — only runs on trusted branches, not PRs.
+on:
+  push:
+    branches: [main, "litellm_*"]
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  security:
+    uses: ./.github/workflows/_test-unit-services-base.yml
+    with:
+      test-path: "tests/proxy_security_tests/"
+      workers: 1
+      reruns: 2
+      timeout-minutes: 20
+      enable-redis: false
+      enable-postgres: true
+    secrets:
+      DATABASE_URL: ${{ secrets.DATABASE_URL }}
+      POSTGRES_USER: ${{ secrets.POSTGRES_USER }}
+      POSTGRES_PASSWORD: ${{ secrets.POSTGRES_PASSWORD }}
--- a/.github/workflows/test_server_root_path.yml
+++ b/.github/workflows/test_server_root_path.yml
@ -17,13 +17,15 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12

      - name: Build Docker image
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@0adf9959216b96bec444f325f1e493d4aa344497 #v6.14
        with:
          context: .
          file: ./docker/Dockerfile.non_root
--- a/.github/workflows/zizmor.yml
+++ b/.github/workflows/zizmor.yml
@ -0,0 +1,31 @@
+name: GitHub Actions Security Analysis
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+permissions: {}
+
+jobs:
+  zizmor:
+    name: zizmor
+    runs-on: ubuntu-latest
+    timeout-minutes: 5
+    permissions:
+      security-events: write
+      contents: read
+      actions: read
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
+        with:
+          persist-credentials: false
+
+      - name: Run zizmor
+        uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -14,12 +14,12 @@ repos:
        types: [python]
        files: (litellm/|litellm_proxy_extras/|enterprise/).*\.py
        exclude: ^litellm/__init__.py$
-    # -   id: black
-    #     name: black
-    #     entry: poetry run black
-    #     language: system
-    #     types: [python]
-    #     files: (litellm/|litellm_proxy_extras/|enterprise/).*\.py
+    -   id: black
+        name: black
+        entry: poetry run black
+        language: system
+        types: [python]
+        files: (litellm/|litellm_proxy_extras/).*\.py
 -   repo: https://github.com/pycqa/flake8
    rev: 7.0.0  # The version of flake8 to use
    hooks:
--- a/.semgrep/rules/security/no-claude-directory.yml
+++ b/.semgrep/rules/security/no-claude-directory.yml
@ -0,0 +1,18 @@
+rules:
+  - id: no-claude-directory-committed
+    message: >
+      .claude/ directory must not be committed to the repository.
+      It contains local Claude Code settings (permissions, worktree paths) that are
+      developer-machine-specific and may expose internal paths or credentials.
+      Add .claude/ to .gitignore instead.
+    severity: ERROR
+    languages: [generic]
+    paths:
+      include:
+        - "/.claude/**"
+        - "/.claude/*"
+    pattern-regex: '[\s\S]+'
+    metadata:
+      category: security
+      tags: [supply-chain, secrets]
+      confidence: HIGH
--- a/README.md
+++ b/README.md
@ -266,6 +266,7 @@ Support for more providers. Missing a provider or LLM Platform, raise a [feature
 <table>
  <tr>
    <td><img height="60" alt="Stripe" src="https://github.com/user-attachments/assets/f7296d4f-9fbd-460d-9d05-e4df31697c4b" /></td>
+    <td><img height="60" alt="image" src="https://github.com/user-attachments/assets/436fca71-988b-40bb-b5fe-8450c80fdbd0" /></td>
    <td><img height="60" alt="Google ADK" src="https://github.com/user-attachments/assets/caf270a2-5aee-45c4-8222-41a2070c4f19" /></td>
    <td><img height="60" alt="Greptile" src="https://github.com/user-attachments/assets/0be4bd8a-7cfa-48d3-9090-f415fe948280" /></td>
    <td><img height="60" alt="OpenHands" src="https://github.com/user-attachments/assets/a6150c4c-149e-4cae-888b-8b92be6e003f" /></td>
@ -402,7 +403,7 @@ Support for more providers. Missing a provider or LLM Platform, raise a [feature
 # Enterprise
 For companies that need better security, user management and professional support

-[Talk to founders](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions)
+[Talk to founders](https://enterprise.litellm.ai/demo)

 This covers:
 - ✅ **Features under the [LiteLLM Commercial License](https://docs.litellm.ai/docs/proxy/enterprise):**
@ -452,7 +453,6 @@ All these checks must pass before your PR can be merged.
 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
 - [Community Slack 💭](https://www.litellm.ai/support)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

 # Why did we build this
--- a/ci_cd/security_scans.sh
+++ b/ci_cd/security_scans.sh
@ -10,13 +10,13 @@ echo "Starting security scans for LiteLLM..."
 # Function to install Trivy and required tools
 install_trivy() {
    echo "Installing Trivy and required tools..."
+    TRIVY_VERSION="0.35.0"
    sudo apt-get update
-    sudo apt-get install -y wget apt-transport-https gnupg lsb-release jq curl bsdmainutils
-    wget -qO - https://aquasecurity.github.io/trivy-repo/deb/public.key | sudo apt-key add -
-    echo "deb https://aquasecurity.github.io/trivy-repo/deb $(lsb_release -sc) main" | sudo tee -a /etc/apt/sources.list.d/trivy.list
-    sudo apt-get update
-    sudo apt-get install trivy
-    echo "Trivy and required tools installed successfully"
+    sudo apt-get install -y wget jq curl bsdmainutils
+    wget -qO trivy.deb "https://github.com/aquasecurity/trivy/releases/download/v${TRIVY_VERSION}/trivy_${TRIVY_VERSION}_Linux-64bit.deb"
+    sudo dpkg -i trivy.deb
+    rm trivy.deb
+    echo "Trivy ${TRIVY_VERSION} installed successfully"
 }

 # Function to install Grype
@ -163,6 +163,9 @@ run_grype_scans() {
        "CVE-2026-25639" # axios - full fix requires 1.x major version bump; pinned to >=0.30.2 to clear other axios CVEs, upgrade to 1.x in follow-up
        "CVE-2026-2297" # Python 3.13 SourcelessFileLoader audit hook bypass - no fix available in base image
        "GHSA-qffp-2rhf-9h96" # tar hardlink path traversal - from nodejs_wheel bundled npm, not used in application runtime code
+        "CVE-2026-2673" # OpenSSL 3.6.1 TLS 1.3 key exchange group negotiation issue - no fix available yet
+        "CVE-2026-3644" # Python 3.13 vulnerability - no fix available in base image
+        "CVE-2026-4224" # Python 3.13 Expat parser stack overflow in ElementDeclHandler - no fix available in base image
    )

    # Build JSON array of allowlisted CVE IDs for jq
--- a/cookbook/benchmark/readme.md
+++ b/cookbook/benchmark/readme.md
@ -178,4 +178,4 @@ Benchmark Results for 'When will BerriAI IPO?':
 ```

 ## Support 
-**🤝 Schedule a 1-on-1 Session:** Book a [1-on-1 session](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions) with Krrish and Ishaan, the founders, to discuss any issues, provide feedback, or explore how we can improve LiteLLM for you.
+**🤝 Schedule a 1-on-1 Session:** Book a [1-on-1 session](https://enterprise.litellm.ai/demo) with Krrish and Ishaan, the founders, to discuss any issues, provide feedback, or explore how we can improve LiteLLM for you.
--- a/cookbook/codellama-server/README.MD
+++ b/cookbook/codellama-server/README.MD
@ -143,7 +143,6 @@ All responses from the server are returned in the following format (for all LLM

 - [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

 ## Roadmap
--- a/cookbook/litellm_proxy_server/readme.md
+++ b/cookbook/litellm_proxy_server/readme.md
@ -164,7 +164,6 @@ All responses from the server are returned in the following format (for all LLM

 - [Our calendar 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai

 ## Roadmap
--- a/docs/my-website/blog/ci_cd_v2_improvements/index.md
+++ b/docs/my-website/blog/ci_cd_v2_improvements/index.md
@ -0,0 +1,55 @@
+---
+slug: ci-cd-v2-improvements
+title: "Announcing CI/CD v2 for LiteLLM"
+date: 2026-03-30T21:30:00
+authors:
+  - krrish
+description: "CI/CD v2 introduces isolated environments, stronger security gates, and safer release separation for LiteLLM."
+tags: [engineering, ci-cd, security]
+hide_table_of_contents: false
+---
+
+import Image from '@theme/IdealImage';
+
+The CI/CD v2 is now live for LiteLLM.
+
+<Image
+  img={require('../../img/ci_cd_architecture.png')}
+  style={{width: '700px', height: 'auto', display: 'block'}}
+/>
+
+<br/>
+Building on the roadmap from our [security incident](https://docs.litellm.ai/blog/security-townhall-updates#roadmap), CI/CD v2 introduces isolated environments, stronger security gates, and safer release separation for LiteLLM.
+
+## What changed
+
+- Security scans and unit tests run in isolated environments.
+- Validation and release are separated into different repositories, making it harder for an attacker to reach release credentials.
+- Trusted Publishing for PyPI releases - this means no long-lived credentials are used to publish releases.
+- Immutable Docker release tags - this means no tampering of Docker release tags after they are published [Learn more](https://docs.docker.com/docker-hub/repos/manage/hub-images/immutable-tags/). Note: work for GHCR docker releases is planned as well.
+
+## What's next
+
+Moving forward, we plan on:
+- Adopting OpenSSF (this is a set of security criteria that projects should meet to demonstrate a strong security posture - [Learn more](https://baseline.openssf.org/versions/2026-02-19.html))
+  - We've added Scorecard and Allstar to our Github
+
+- Adding SLSA Build Provenance to our CI/CD pipeline - this means we allow users to independently verify that a release came from us and prevent silent modifications of releases after they are published.
+
+
+We hope that this will mean you can be confident that the releases you are using are safe and from us.
+
+
+## The principle
+
+The new CI/CD pipeline reflects the principles, outlined below, and is designed to be more secure and reliable:
+
+- **Limit** what each package can access
+- **Reduce** the number of sensitive environment variables
+- **Avoid** compromised packages
+- **Prevent** release tampering
+
+
+## How to help: 
+
+Help us plan April's stability sprint - https://github.com/BerriAI/litellm/issues/24825
--- a/docs/my-website/blog/gpt_5_4_mini_nano/index.md
+++ b/docs/my-website/blog/gpt_5_4_mini_nano/index.md
@ -0,0 +1,106 @@
+---
+slug: gpt_5_4_mini_nano
+title: "Day 0 Support: GPT-5.4-mini and GPT-5.4-nano"
+date: 2026-03-17T10:00:00
+authors:
+  - name: Sameer Kankute
+    title: SWE @ LiteLLM (LLM Translation)
+    url: https://www.linkedin.com/in/sameer-kankute/
+    image_url: https://pbs.twimg.com/profile_images/2001352686994907136/ONgNuSk5_400x400.jpg
+  - name: Krrish Dholakia
+    title: "CEO, LiteLLM"
+    url: https://www.linkedin.com/in/krish-d/
+    image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg
+  - name: Ishaan Jaff
+    title: "CTO, LiteLLM"
+    url: https://www.linkedin.com/in/reffajnaahsi/
+    image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
+description: "GPT-5.4-mini and GPT-5.4-nano model support in LiteLLM"
+tags: [openai, gpt-5.4-mini, gpt-5.4-nano, completion]
+hide_table_of_contents: false
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+LiteLLM now supports GPT-5.4-mini and GPT-5.4-nano — cost-effective models for simple completions and high-throughput workloads.
+
+:::note
+If you're on **v1.82.3-stable** or above, you don't need any update to use these models.
+:::
+
+## Usage
+
+<Tabs>
+<TabItem value="proxy" label="LiteLLM Proxy">
+
+**1. Setup config.yaml**
+
+```yaml
+model_list:
+  - model_name: gpt-5.4-mini
+    litellm_params:
+      model: openai/gpt-5.4-mini
+      api_key: os.environ/OPENAI_API_KEY
+  - model_name: gpt-5.4-nano
+    litellm_params:
+      model: openai/gpt-5.4-nano
+      api_key: os.environ/OPENAI_API_KEY
+```
+
+**2. Start the proxy**
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+**3. Test it**
+
+```bash
+# GPT-5.4-mini
+curl -X POST "http://localhost:4000/v1/chat/completions" \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $LITELLM_KEY" \
+  -d '{
+    "model": "gpt-5.4-mini",
+    "messages": [{"role": "user", "content": "What is the capital of France?"}]
+  }'
+
+# GPT-5.4-nano
+curl -X POST "http://localhost:4000/v1/chat/completions" \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $LITELLM_KEY" \
+  -d '{
+    "model": "gpt-5.4-nano",
+    "messages": [{"role": "user", "content": "What is 2 + 2?"}]
+  }'
+```
+
+</TabItem>
+<TabItem value="sdk" label="LiteLLM SDK">
+
+```python
+from litellm import completion
+
+# GPT-5.4-mini
+response = completion(
+    model="openai/gpt-5.4-mini",
+    messages=[{"role": "user", "content": "What is the capital of France?"}],
+)
+print(response.choices[0].message.content)
+
+# GPT-5.4-nano
+response = completion(
+    model="openai/gpt-5.4-nano",
+    messages=[{"role": "user", "content": "What is 2 + 2?"}],
+)
+print(response.choices[0].message.content)
+```
+
+</TabItem>
+</Tabs>
+
+## Notes
+
+- Both models support function calling, vision, and tool-use — see the [OpenAI provider docs](../../docs/providers/openai) for advanced usage.
+- GPT-5.4-nano is the most cost-effective option for simple tasks; GPT-5.4-mini offers a balance of speed and capability.
--- a/docs/my-website/blog/guardrail_logging_secret_exposure_incident/index.md
+++ b/docs/my-website/blog/guardrail_logging_secret_exposure_incident/index.md
@ -0,0 +1,78 @@
+---
+slug: guardrail-logging-secret-exposure-incident
+title: "Incident Report: Guardrail logging exposed secret headers in spend logs and traces"
+date: 2026-03-18T10:00:00
+authors:
+  - litellm
+tags: [incident-report, security, guardrails]
+hide_table_of_contents: false
+---
+
+**Date:** March 18, 2026
+**Duration:** Unknown
+**Severity:** High
+**Status:** Resolved
+
+## Summary
+
+When a custom guardrail returned the full LiteLLM request/data dictionary, the guardrail response logged by LiteLLM could include `secret_fields.raw_headers`, including plaintext `Authorization` headers containing API keys or other credentials.
+
+This information could then propagate to logging and observability surfaces that consume guardrail metadata, including:
+
+- **Spend logs in the LiteLLM UI:** visible to admins with access to spend-log data
+- **OpenTelemetry traces:** visible to anyone with access to the relevant telemetry backend
+
+LLM calls, proxy routing, and provider execution were not blocked by this bug. The impact was exposure of sensitive request headers in observability and logging paths.
+
+{/* truncate */}
+
+---
+
+## Background
+
+LiteLLM keeps internal request data (including request headers) for use during the call. That data is not meant to be written to logs or telemetry.
+
+When custom guardrails run, their outcomes are logged so they can appear in spend logs, OpenTelemetry traces, and other observability backends. If a guardrail returned the full request payload instead of a minimal result, that internal request data could be included in what was logged. Before the fix, the guardrail logging path did not strip that data before sending it to those systems.
+
+```mermaid
+flowchart TD
+    inboundRequest["1. Incoming proxy request"] --> storeSecrets["2. Store internal request data"]
+    storeSecrets --> guardrailRuns["3. Custom guardrail runs"]
+    guardrailRuns --> fullDataReturn["4. Guardrail returns full request payload"]
+    fullDataReturn --> loggingBuild["5. Build guardrail log payload"]
+    loggingBuild --> spendLogs["6a. Persist to spend logs / UI"]
+    loggingBuild --> otelTraces["6b. Attach to OTEL guardrail spans"]
+```
+
+---
+
+## Root Cause
+
+The root cause was incomplete sanitization in the guardrail logging path. When building the payload that gets sent to spend logs and traces, LiteLLM prepared guardrail responses for logging but did not strip internal request data (such as headers) from them. If a guardrail returned a response that included that data, it was passed through to the logging and observability systems unchanged.
+
+---
+
+## Impact
+
+This issue required all of the following:
+
+1. A custom guardrail returned the full LiteLLM request/data dictionary, or another response object containing `secret_fields`.
+2. LiteLLM logged that guardrail response through the standard guardrail logging path.
+3. An operator, admin, or telemetry consumer had access to the resulting logs or traces.
+
+When those conditions were met, sensitive values could become visible through:
+
+- **Spend logs / UI responses:** guardrail metadata could be included in spend-log payloads rendered in the admin UI.
+- **OpenTelemetry traces:** `guardrail_response` could be written as a span attribute on guardrail spans.
+- **Other downstream observability backends:** any integration consuming the same guardrail metadata could receive the leaked values.
+
+This was a logging and telemetry exposure bug. It did not let callers bypass auth, access other tenants directly, or change model behavior, but it could expose plaintext credentials to people with access to those observability systems.
+
+---
+
+## Guidance For Users
+
+- Upgrade to LiteLLM 1.82.3+.
+- If you operated custom guardrails that return the full request/data dict, review whether spend logs or telemetry traces were retained during the affected period.
+- Rotate any credentials that may have appeared in `Authorization` or other forwarded request headers in those systems.
+- Apply least-privilege access controls to spend-log views and telemetry backends that may contain request-derived metadata.
--- a/docs/my-website/blog/security_townhall_updates/index.md
+++ b/docs/my-website/blog/security_townhall_updates/index.md
@ -0,0 +1,190 @@
+---
+slug: security-townhall-updates
+title: "Security Townhall Updates"
+date: 2026-03-27T12:00:00
+authors:
+  - krrish
+  - ishaan-alt
+description: "What happened, what we've done, and what comes next for LiteLLM's release and security processes."
+tags: [security, incident-report]
+hide_table_of_contents: false
+---
+
+import Image from '@theme/IdealImage';
+
+Thank you to everyone who joined our town hall.
+
+We wanted to use that time to walk through what we know, what we've done so far, and how we're improving LiteLLM's release and security processes going forward. This post is a written version of that update. [Slides available here](https://drive.google.com/file/d/17hsSG7nk-OYL7VRCTbTa7McrWREtS9OO/view?usp=sharing)
+
+{/* truncate */}
+
+## What happened
+
+On March 24, 2026 at 10:39 UTC, LiteLLM v1.82.7 was pushed to PyPI. Version v1.82.8 was published soon after. Those packages were live for about 40 minutes before being quarantined by PyPI. By 16:00 UTC, the LiteLLM team had worked with PyPI to delete the affected packages.
+
+At this point, our understanding is that this was a supply-chain incident affecting those two published versions.
+
+## How did this happen?
+
+Our understanding is that the issue came from the [compromised Trivy security scanner](https://www.aquasec.com/blog/trivy-supply-chain-attack-what-you-need-to-know/) dependency in our CI/CD pipeline.
+
+<Image 
+  img={require('../../img/shared_ci_cd_environment.png')}
+  style={{width: '500px', height: '400px', display: 'block'}}
+/>
+
+There were three major contributing factors:
+
+### 1. Shared CI/CD environment
+
+At the time, everything was running on CircleCI, and all steps shared a common environment. That increased blast radius: if one component was compromised, it could potentially access credentials or context intended for other parts of the pipeline.
+
+### 2. Static credentials in environment variables
+
+Release credentials, including credentials for PyPI, GHCR, and Docker publishing, were available as static secrets in the environment. That meant a compromised step could access long-lived release credentials.
+
+### 3. Unpinned Trivy dependency
+
+In our security scanning component, we had an unpinned Trivy dependency. Our present understanding is that a compromised Trivy package ran during the scan, had access to environment variables, and enabled attackers to obtain those credentials.
+
+**In summary:** a compromised package in CI had access to secrets it should not have had, and those secrets were then used in the release path.
+
+## What we've already done
+
+
+In the last 3 days, we've taken the following steps:
+
+### 1. Minimize Scope of Impact
+
+#### Prevented further key abuse
+
+We deleted or rotated all impacted or adjacent secret keys, including PyPI, GitHub, Docker, and related credentials. Out of an abundance of caution, we've also rotated LiteLLM maintainer accounts. 
+
+#### Prevent branch attacks
+
+We removed roughly 6,000 open branches and added an auto-deletion policy for branches merged into `main`. This reduces the surface area for branch-based abuse.
+
+#### Pinned CI/CD dependencies
+
+We've pinned all Github Actions, and are working on pinning all CircleCI dependencies as well.
+
+#### Paused releases
+
+We've paused new releases until we've confirmed codebase security and put stronger release controls in place.
+
+### 2. Secured LiteLLM
+
+#### Forensic analysis
+
+We are working with Google's Mandiant cybersecurity team to confirm the source of the attack and verify the security of the codebase. We also confirmed that no malicious code was pushed to `main`.
+
+#### Confirm Application Security
+
+In parallel, we are working with whitehat hackers at [Veria Labs](https://verialabs.com/) to verify application security and review improvements to our CI/CD process.
+
+We have also confirmed that the last 20 LiteLLM releases contain no indicators of compromise, and that no unauthenticated attacks can be made against LiteLLM Proxy based on our current investigation. [Check Security Blog for release verification.](https://docs.litellm.ai/blog/security-update-march-2026#verified-safe-versions)
+
+#### Created a security working group
+
+We created a new security working group inside LiteLLM focused on:
+
+- Building threat models
+- Auditing the build process and dependencies
+
+If you're interested in joining the security working group, please file an issue [here](https://github.com/BerriAI/litellm-security-wg).
+
+### 3. Improved CI/CD
+
+We've already begun making structural changes to how releases are built and published. These align with our goals (covered in the next section) around isolated environments, ephemeral credentials, and release auditing.
+
+## Roadmap
+
+We plan on following 4 guiding principles for our new CI/CD pipeline:
+
+1. **Limit** what each package can access
+2. **Reduce** the number of sensitive environment variables
+3. **Avoid** compromised packages
+4. **Prevent** release tampering
+
+
+### Isolated environments
+
+<Image 
+  img={require('../../img/isolated_ci_cd_environments.png')}
+  style={{width: '400px', height: 'auto'}}
+/>
+
+We are breaking our CI/CD into 4 semantic concepts:
+
+1. Unit tests
+2. Integration tests
+3. Security scans
+4. Release publishing
+
+And will be running each of these in isolated environments.
+
+This will limit the damage that any single compromised component can cause.
+
+### Ephemeral credentials
+
+We plan to move to ephemeral credentials for PyPI (Trusted Publisher) and GHCR (Token-based authentication) releases. This will reduce the risk of credentials being leaked or compromised.
+
+We have already begun doing this: 
+
+- PyPI Trusted Publisher on GitHub Actions [PR](https://github.com/BerriAI/litellm/pull/24654)
+- GHCR Token-based authentication on GitHub Actions [PR](https://github.com/BerriAI/litellm/pull/24683)
+
+### Release auditing
+
+Our goal is to allow users to independently verify that a release came from us and prevent silent modifications of releases after they are published.
+
+This will ensure, your releases are safe, even when: 
+- Stolen PyPI/GHCR credentials are used to publish malicious releases
+- Tampered registry artifacts are published
+- Tag mutations are made after the release is published
+
+We believe that [Cosign](https://github.com/sigstore/cosign) is a good fit for this, and have already begun working on it [PR](https://github.com/BerriAI/litellm/pull/24683).
+
+
+### Avoid Compromised Packages
+
+- Move to pinned, verified SHAs for packages and actions used in CI/CD, avoiding `latest` wherever possible. 
+- Add a cooldown period before upgrading to a new version of a package - allows more time to investigate and verify the new version. 
+
+We've added zizmor to help us catch issues such as unpinned dependencies and credential leakage. [commit](https://github.com/BerriAI/litellm/commit/a671275f5c5b0e1fb1adacdf3b6ef779aaa5d56c).
+
+
+## Frequently Asked Questions
+
+**Q: Did you observe any lateral movement into your corporate environment during this incident?**
+
+A: No. Our investigation to date, conducted in coordination with external security experts, has found no evidence of lateral movement into our internal corporate systems. The incident was isolated to the CI/CD pipeline and the release path for specific versions (v1.82.7 and v1.82.8). As a proactive measure, we have rotated all potentially impacted or adjacent secrets—including PyPI, GitHub, and Docker credentials—and updated maintainer account security to ensure continued isolation.
+
+**Q: Do you expect delays in future product releases due to these new security measures?**
+
+A: We are committed to balancing security with speed. While we have temporarily paused releases to implement stronger controls, we are moving quickly to automate our new security protocols. We are currently implementing isolated CI/CD environments, ephemeral credentials (via Trusted Publishers), and release auditing with Cosign. These improvements are designed to be integrated into our automated pipeline, allowing us to maintain a fast release cadence while ensuring every package is verified and secure.
+
+**Q: Were older packages impacted?**
+
+Our current findings show no indicators of compromise in the last 20 versions of LiteLLM. This was manually verified by our team and independently reviewed by Veria Labs.
+
+We have also published the verified versions for users to use. [Check Security Blog for release verification.](https://docs.litellm.ai/blog/security-update-march-2026#verified-safe-versions)
+
+
+
+## Questions & Support 
+
+If you believe your systems may be affected, contact us immediately:
+
+- **Security:** security@berri.ai
+- **Support:** support@berri.ai
+- **Slack:** Reach out to the LiteLLM team directly [here](https://join.slack.com/t/litellmossslack/shared_invite/zt-3o7nkuyfr-p_kbNJj8taRfXGgQI1~YyA)
+
+## Hiring 
+
+We are currently hiring for: 
+
+- DevOps Engineer - to keep ci/cd secure and running smoothly
+- Security Engineer - to keep the application secure
+
+If you're interest in joining, please apply [here](https://jobs.ashbyhq.com/litellm)
--- a/docs/my-website/blog/security_townhall_updates/shared_ci_cd_environment.png
+++ b/docs/my-website/blog/security_townhall_updates/shared_ci_cd_environment.png
--- a/docs/my-website/blog/security_update_march_2026/index.md
+++ b/docs/my-website/blog/security_update_march_2026/index.md
@ -0,0 +1,786 @@
+---
+slug: security-update-march-2026
+title: "Security Update: Suspected Supply Chain Incident"
+date: 2026-03-24T14:00:00
+authors:
+  - krrish
+  - ishaan-alt
+description: "As of 2:00 PM ET on March 24, 2026"
+tags: [security, incident-report]
+hide_table_of_contents: false
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+import VersionVerificationTable from '@site/src/components/VersionVerificationTable';
+
+> **Status:** Active investigation
+> **Last updated:** March 27, 2026
+
+> **Update (March 30):** A new **clean** version of LiteLLM is now available (v1.83.0). This was released by our new [CI/CD v2](https://docs.litellm.ai/blog/ci-cd-v2-improvements) pipeline which added isolated environments, stronger security gates, and safer release separation for LiteLLM.
+
+> **Update (March 27):** Review Townhall updates, including explanation of the incident, what we've done, and what comes next. [Learn more](https://docs.litellm.ai/blog/security-townhall-updates)
+
+> **Update (March 27):** Added [Verified safe versions](#verified-safe-versions) section with SHA-256 checksums for all audited PyPI and Docker releases.
+
+> **Update (March 26):** Added `checkmarx[.]zone` to [Indicators of compromise](#indicators-of-compromise-iocs)
+
+> **Update (March 25):** Added community-contributed scripts for scanning GitHub Actions and GitLab CI pipelines for the compromised versions. See [How to check if you are affected](#how-to-check-if-you-are-affected). s/o [@Zach Fury](https://www.linkedin.com/in/fryware/) for these scripts.
+
+
+## TLDR; 
+- The compromised PyPI packages were **litellm==1.82.7** and **litellm==1.82.8**. Those packages were live on March 24, 2026 from 10:39 UTC for about 40 minutes before being quarantined by PyPI.
+- We believe that the compromise originated from the [Trivy dependency](https://www.aquasec.com/blog/trivy-supply-chain-attack-what-you-need-to-know/) used in our CI/CD security scanning workflow.
+- Customers running the official LiteLLM Proxy Docker image were not impacted. That deployment path pins dependencies in requirements.txt and does not rely on the compromised PyPI packages.
+- ~~We have paused all new LiteLLM releases until we complete a broader supply-chain review and confirm the release path is safe.~~ **Updated:** We have now released a new **safe** version of LiteLLM (v1.83.0) by our new [CI/CD v2](https://docs.litellm.ai/blog/ci-cd-v2-improvements) pipeline which added isolated environments, stronger security gates, and safer release separation for LiteLLM. We have also verified the codebase is safe and no malicious code was pushed to `main`.
+
+
+## Overview
+
+LiteLLM AI Gateway is investigating a suspected supply chain attack involving unauthorized PyPI package publishes. Current evidence suggests a maintainer's PyPI account may have been compromised and used to distribute malicious code.
+
+At this time, we believe this incident may be linked to the broader [Trivy security compromise](https://www.aquasec.com/blog/trivy-supply-chain-attack-what-you-need-to-know/), in which stolen credentials were reportedly used to gain unauthorized access to the LiteLLM publishing pipeline.
+
+This investigation is ongoing. Details below may change as we confirm additional findings.
+
+## Confirmed affected versions
+
+The following LiteLLM versions published to PyPI were impacted:
+
+- **v1.82.7**: contained a malicious payload in the LiteLLM AI Gateway `proxy_server.py`
+- **v1.82.8**: contained `litellm_init.pth` and a malicious payload in the LiteLLM AI Gateway `proxy_server.py`
+
+If you installed or ran either of these versions, review the recommendations below immediately.
+
+Note: These versions have already been removed from PyPI.
+
+## What happened
+
+Initial evidence suggests the attacker bypassed official CI/CD workflows and uploaded malicious packages directly to PyPI.
+
+These compromised versions appear to have included a credential stealer designed to:
+
+- Harvest secrets by scanning for:
+  - environment variables
+  - SSH keys
+  - cloud provider credentials (AWS, GCP, Azure)
+  - Kubernetes tokens
+  - database passwords
+- Encrypt and exfiltrate data via a `POST` request to `models.litellm.cloud`, which is **not** an official BerriAI / LiteLLM domain
+
+## Who is affected
+
+You may be affected if **any** of the following are true:
+
+- You installed or upgraded LiteLLM via `pip` on **March 24, 2026**, between **10:39 UTC and 16:00 UTC**
+- You ran `pip install litellm` without pinning a version and received **v1.82.7** or **v1.82.8**
+- You built a Docker image during this window that included `pip install litellm` without a pinned version
+- A dependency in your project pulled in LiteLLM as a transitive, unpinned dependency
+  (for example through AI agent frameworks, MCP servers, or LLM orchestration tools)
+
+You are **not** affected if any of the following are true:
+
+**LiteLLM AI Gateway/Proxy users:** Customers running the official LiteLLM Proxy Docker image were not impacted. That deployment path pins dependencies in requirements.txt and does not rely on the compromised PyPI packages.
+
+- You are using **LiteLLM Cloud**
+- You are using the official LiteLLM AI Gateway Docker image: `ghcr.io/berriai/litellm`
+- You are on **v1.82.6 or earlier** and did not upgrade during the affected window
+- You installed LiteLLM from source via the GitHub repository, which was **not** compromised
+
+
+### How to check if you are affected
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```bash
+pip show litellm
+```
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+Go to the proxy base url, and check the version of the installed LiteLLM.
+
+![Proxy version check](../../img/security_update_march_2026/proxy_version.png)
+</TabItem>
+<TabItem value="github" label="GitHub Actions">
+
+Scans all repositories in a GitHub organization for workflow jobs that installed the compromised versions.
+
+**Requirements:** Python 3 and `requests` (`pip install requests`).
+
+**Setup:**
+
+```bash
+export GITHUB_TOKEN="your-github-pat"
+```
+
+**Run:**
+
+```bash
+python find_litellm_github.py
+```
+
+Set the `ORG` variable in the script to your GitHub organization name.
+
+Both scripts default to scanning jobs from **today**. Adjust the `WINDOW_START` and `WINDOW_END` constants to cover **March 24, 2026** (the incident date) if running on a different day.
+
+<details>
+<summary>View full script (find_litellm_github.py)</summary>
+
+```python
+#!/usr/bin/env python3
+"""
+Scan all GitHub Actions jobs in a GitHub org that ran between
+0800-1244 UTC today and identify any that installed litellm 1.82.7 or 1.82.8.
+
+Adjust WINDOW_START / WINDOW_END to cover March 24, 2026 if running later.
+"""
+
+import io
+import os
+import re
+import sys
+import zipfile
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime, timezone
+
+import requests
+
+GITHUB_URL   = "https://api.github.com"
+ORG          = "your-org"  # <-- set to your GitHub organization
+TOKEN        = os.environ.get("GITHUB_TOKEN", "")
+
+TODAY        = datetime.now(timezone.utc).date()
+WINDOW_START = datetime(TODAY.year, TODAY.month, TODAY.day,  8,  0, 0, tzinfo=timezone.utc)
+WINDOW_END   = datetime(TODAY.year, TODAY.month, TODAY.day, 12, 44, 0, tzinfo=timezone.utc)
+
+TARGET_VERSIONS = {"1.82.7", "1.82.8"}
+VERSION_PATTERN = re.compile(r"litellm[=\-](\d+\.\d+\.\d+)", re.IGNORECASE)
+
+SESSION = requests.Session()
+SESSION.headers.update({
+    "Authorization": f"Bearer {TOKEN}",
+    "Accept": "application/vnd.github+json",
+    "X-GitHub-Api-Version": "2022-11-28",
+})
+
+
+def get_paginated(url, params=None):
+    params = dict(params or {})
+    params.setdefault("per_page", 100)
+    page = 1
+    while True:
+        params["page"] = page
+        resp = SESSION.get(url, params=params, timeout=30)
+        if resp.status_code == 404:
+            return
+        resp.raise_for_status()
+        data = resp.json()
+        if isinstance(data, dict):
+            items = next((v for v in data.values() if isinstance(v, list)), [])
+        else:
+            items = data
+        if not items:
+            break
+        yield from items
+        if len(items) < params["per_page"]:
+            break
+        page += 1
+
+
+def parse_ts(ts_str):
+    if not ts_str:
+        return None
+    return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
+
+
+def get_repos():
+    repos = []
+    for r in get_paginated(f"{GITHUB_URL}/orgs/{ORG}/repos", {"type": "all"}):
+        repos.append({"id": r["id"], "name": r["name"], "full_name": r["full_name"]})
+    return repos
+
+
+def get_runs_in_window(repo_full_name):
+    created_filter = (
+        f"{WINDOW_START.strftime('%Y-%m-%dT%H:%M:%SZ')}"
+        f"..{WINDOW_END.strftime('%Y-%m-%dT%H:%M:%SZ')}"
+    )
+    url = f"{GITHUB_URL}/repos/{repo_full_name}/actions/runs"
+    runs = []
+    for run in get_paginated(url, {"created": created_filter, "per_page": 100}):
+        ts = parse_ts(run.get("run_started_at") or run.get("created_at"))
+        if ts and WINDOW_START <= ts <= WINDOW_END:
+            runs.append(run)
+    return runs
+
+
+def get_jobs_for_run(repo_full_name, run_id):
+    url = f"{GITHUB_URL}/repos/{repo_full_name}/actions/runs/{run_id}/jobs"
+    jobs = []
+    for job in get_paginated(url, {"filter": "all"}):
+        ts = parse_ts(job.get("started_at"))
+        if ts and WINDOW_START <= ts <= WINDOW_END:
+            jobs.append(job)
+    return jobs
+
+
+def fetch_job_log(repo_full_name, job_id):
+    url = f"{GITHUB_URL}/repos/{repo_full_name}/actions/jobs/{job_id}/logs"
+    resp = SESSION.get(url, timeout=60, allow_redirects=True)
+    if resp.status_code in (403, 404, 410):
+        return ""
+    resp.raise_for_status()
+
+    content_type = resp.headers.get("Content-Type", "")
+    if "zip" in content_type or resp.content[:2] == b"PK":
+        try:
+            with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
+                parts = []
+                for name in sorted(zf.namelist()):
+                    with zf.open(name) as f:
+                        parts.append(f.read().decode("utf-8", errors="replace"))
+                return "\n".join(parts)
+        except zipfile.BadZipFile:
+            pass
+    return resp.text
+
+
+def check_job(repo_full_name, job):
+    job_id   = job["id"]
+    job_name = job["name"]
+    run_id   = job["run_id"]
+    started  = job.get("started_at", "")
+
+    log_text = fetch_job_log(repo_full_name, job_id)
+    if not log_text:
+        return None
+
+    found_versions = set()
+    context_lines  = []
+    for line in log_text.splitlines():
+        m = VERSION_PATTERN.search(line)
+        if m:
+            ver = m.group(1)
+            if ver in TARGET_VERSIONS:
+                found_versions.add(ver)
+                context_lines.append(line.strip())
+
+    if not found_versions:
+        return None
+
+    return {
+        "repo":       repo_full_name,
+        "run_id":     run_id,
+        "job_id":     job_id,
+        "job_name":   job_name,
+        "started_at": started,
+        "versions":   sorted(found_versions),
+        "context":    context_lines[:10],
+        "job_url":    job.get("html_url", f"https://github.com/{repo_full_name}/actions/runs/{run_id}"),
+    }
+
+
+def main():
+    if not TOKEN:
+        print("ERROR: Set GITHUB_TOKEN environment variable.", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"Time window : {WINDOW_START.isoformat()} -> {WINDOW_END.isoformat()}")
+    print(f"Hunting for : litellm {', '.join(sorted(TARGET_VERSIONS))}")
+    print()
+
+    print(f"Fetching repositories for org '{ORG}'...")
+    repos = get_repos()
+    print(f"  Found {len(repos)} repositories")
+    print()
+
+    jobs_to_check = []
+
+    print("Scanning workflow runs for time window...")
+    for repo in repos:
+        full_name = repo["full_name"]
+        try:
+            runs = get_runs_in_window(full_name)
+        except requests.HTTPError as e:
+            print(f"  WARN: {full_name} - {e}", file=sys.stderr)
+            continue
+        if not runs:
+            continue
+        print(f"  {full_name}: {len(runs)} run(s) in window")
+        for run in runs:
+            try:
+                jobs = get_jobs_for_run(full_name, run["id"])
+            except requests.HTTPError as e:
+                print(f"    WARN: run {run['id']} - {e}", file=sys.stderr)
+                continue
+            for job in jobs:
+                jobs_to_check.append((full_name, job))
+
+    total = len(jobs_to_check)
+    print(f"\nFetching logs for {total} job(s)...")
+    print()
+
+    hits = []
+    with ThreadPoolExecutor(max_workers=8) as pool:
+        futures = {
+            pool.submit(check_job, full_name, job): (full_name, job["id"])
+            for full_name, job in jobs_to_check
+        }
+        done = 0
+        for future in as_completed(futures):
+            done += 1
+            full_name, jid = futures[future]
+            try:
+                result = future.result()
+            except Exception as e:
+                print(f"  ERROR {full_name} job {jid}: {e}", file=sys.stderr)
+                continue
+            if result:
+                hits.append(result)
+            print(
+                f"  [{done}/{total}] {full_name} job {jid}" +
+                (f"  *** HIT: litellm {result['versions']} ***" if result else ""),
+                flush=True,
+            )
+
+    print()
+    print("=" * 72)
+    print(f"RESULTS: {len(hits)} job(s) installed litellm {' or '.join(sorted(TARGET_VERSIONS))}")
+    print("=" * 72)
+
+    if not hits:
+        print("No matches found.")
+        return
+
+    for h in sorted(hits, key=lambda x: x["started_at"]):
+        print()
+        print(f"  Repo      : {h['repo']}")
+        print(f"  Job       : {h['job_name']} (#{h['job_id']})")
+        print(f"  Run ID    : {h['run_id']}")
+        print(f"  Started   : {h['started_at']}")
+        print(f"  Versions  : litellm {', '.join(h['versions'])}")
+        print(f"  URL       : {h['job_url']}")
+        print(f"  Log lines :")
+        for line in h["context"]:
+            print(f"    {line}")
+
+
+if __name__ == "__main__":
+    main()
+```
+
+</details>
+
+</TabItem>
+<TabItem value="gitlab" label="GitLab CI">
+
+Scans all projects in a GitLab group (including subgroups) for CI/CD jobs that installed the compromised versions.
+
+**Requirements:** Python 3 and `requests` (`pip install requests`).
+
+**Setup:**
+
+```bash
+export GITLAB_TOKEN="your-gitlab-pat"
+```
+
+**Run:**
+
+```bash
+python find_litellm_jobs.py
+```
+
+Set the `GROUP_NAME` variable in the script to your GitLab group name.
+
+Both scripts default to scanning jobs from **today**. Adjust the `WINDOW_START` and `WINDOW_END` constants to cover **March 24, 2026** (the incident date) if running on a different day.
+
+<details>
+<summary>View full script (find_litellm_jobs.py)</summary>
+
+```python
+#!/usr/bin/env python3
+"""
+Scan all GitLab CI/CD jobs in a GitLab group that ran between
+0800-1244 UTC today and identify any that installed litellm 1.82.7 or 1.82.8.
+
+Adjust WINDOW_START / WINDOW_END to cover March 24, 2026 if running later.
+"""
+
+import os
+import re
+import sys
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from datetime import datetime, timezone
+
+import requests
+
+GITLAB_URL = "https://gitlab.com"
+GROUP_NAME = "YourGroup"  # <-- set to your GitLab group name
+TOKEN = os.environ.get("GITLAB_TOKEN", "")
+
+TODAY = datetime.now(timezone.utc).date()
+WINDOW_START = datetime(TODAY.year, TODAY.month, TODAY.day, 8, 0, 0, tzinfo=timezone.utc)
+WINDOW_END   = datetime(TODAY.year, TODAY.month, TODAY.day, 12, 44, 0, tzinfo=timezone.utc)
+
+TARGET_VERSIONS = {"1.82.7", "1.82.8"}
+VERSION_PATTERN = re.compile(r"litellm[=\-](\d+\.\d+\.\d+)", re.IGNORECASE)
+
+HEADERS = {"PRIVATE-TOKEN": TOKEN}
+SESSION = requests.Session()
+SESSION.headers.update(HEADERS)
+
+
+def get_paginated(url, params=None):
+    params = dict(params or {})
+    params.setdefault("per_page", 100)
+    page = 1
+    while True:
+        params["page"] = page
+        resp = SESSION.get(url, params=params, timeout=30)
+        resp.raise_for_status()
+        data = resp.json()
+        if not data:
+            break
+        yield from data
+        if len(data) < params["per_page"]:
+            break
+        page += 1
+
+
+def get_group_id(group_name):
+    resp = SESSION.get(f"{GITLAB_URL}/api/v4/groups/{group_name}", timeout=30)
+    resp.raise_for_status()
+    return resp.json()["id"]
+
+
+def get_all_projects(group_id):
+    projects = []
+    for p in get_paginated(
+        f"{GITLAB_URL}/api/v4/groups/{group_id}/projects",
+        {"include_subgroups": "true", "archived": "false"},
+    ):
+        projects.append({"id": p["id"], "name": p["path_with_namespace"]})
+    return projects
+
+
+def parse_ts(ts_str):
+    if not ts_str:
+        return None
+    ts_str = ts_str.replace("Z", "+00:00")
+    return datetime.fromisoformat(ts_str)
+
+
+def jobs_in_window(project_id):
+    matching = []
+    url = f"{GITLAB_URL}/api/v4/projects/{project_id}/jobs"
+    params = {"per_page": 100, "scope[]": ["success", "failed", "canceled", "running"]}
+
+    page = 1
+    while True:
+        params["page"] = page
+        resp = SESSION.get(url, params=params, timeout=30)
+        if resp.status_code == 403:
+            return matching
+        resp.raise_for_status()
+        jobs = resp.json()
+        if not jobs:
+            break
+
+        stop_early = False
+        for job in jobs:
+            ts = parse_ts(job.get("started_at") or job.get("created_at"))
+            if ts is None:
+                continue
+            if ts > WINDOW_END:
+                continue
+            if ts < WINDOW_START:
+                stop_early = True
+                continue
+            matching.append(job)
+
+        if stop_early or len(jobs) < 100:
+            break
+        page += 1
+
+    return matching
+
+
+def fetch_trace(project_id, job_id):
+    url = f"{GITLAB_URL}/api/v4/projects/{project_id}/jobs/{job_id}/trace"
+    resp = SESSION.get(url, timeout=60)
+    if resp.status_code in (403, 404):
+        return ""
+    resp.raise_for_status()
+    return resp.text
+
+
+def check_job(project_name, project_id, job):
+    job_id   = job["id"]
+    job_name = job["name"]
+    ref      = job.get("ref", "")
+    started  = job.get("started_at", job.get("created_at", ""))
+
+    trace = fetch_trace(project_id, job_id)
+    if not trace:
+        return None
+
+    found_versions = set()
+    for match in VERSION_PATTERN.finditer(trace):
+        ver = match.group(1)
+        if ver in TARGET_VERSIONS:
+            found_versions.add(ver)
+
+    if not found_versions:
+        return None
+
+    context_lines = []
+    for line in trace.splitlines():
+        if VERSION_PATTERN.search(line):
+            ver_match = VERSION_PATTERN.search(line)
+            if ver_match and ver_match.group(1) in TARGET_VERSIONS:
+                context_lines.append(line.strip())
+
+    return {
+        "project":    project_name,
+        "project_id": project_id,
+        "job_id":     job_id,
+        "job_name":   job_name,
+        "ref":        ref,
+        "started_at": started,
+        "versions":   sorted(found_versions),
+        "context":    context_lines[:10],
+        "job_url":    f"{GITLAB_URL}/{project_name}/-/jobs/{job_id}",
+    }
+
+
+def main():
+    if not TOKEN:
+        print("ERROR: Set GITLAB_TOKEN environment variable.", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"Time window : {WINDOW_START.isoformat()} -> {WINDOW_END.isoformat()}")
+    print(f"Hunting for : litellm {', '.join(sorted(TARGET_VERSIONS))}")
+    print()
+
+    print(f"Resolving group '{GROUP_NAME}'...")
+    group_id = get_group_id(GROUP_NAME)
+
+    print("Fetching projects...")
+    projects = get_all_projects(group_id)
+    print(f"  Found {len(projects)} projects")
+    print()
+
+    all_jobs_to_check = []
+
+    print("Scanning job listings for time window...")
+    for proj in projects:
+        try:
+            jobs = jobs_in_window(proj["id"])
+        except requests.HTTPError as e:
+            print(f"  WARN: {proj['name']} - {e}", file=sys.stderr)
+            continue
+        if jobs:
+            print(f"  {proj['name']}: {len(jobs)} job(s) in window")
+        for j in jobs:
+            all_jobs_to_check.append((proj["name"], proj["id"], j))
+
+    total = len(all_jobs_to_check)
+    print(f"\nFetching traces for {total} job(s)...")
+    print()
+
+    hits = []
+    with ThreadPoolExecutor(max_workers=10) as pool:
+        futures = {
+            pool.submit(check_job, pname, pid, job): (pname, job["id"])
+            for pname, pid, job in all_jobs_to_check
+        }
+        done = 0
+        for future in as_completed(futures):
+            done += 1
+            pname, jid = futures[future]
+            try:
+                result = future.result()
+            except Exception as e:
+                print(f"  ERROR checking {pname} job {jid}: {e}", file=sys.stderr)
+                continue
+            if result:
+                hits.append(result)
+            print(f"  [{done}/{total}] checked {pname} job {jid}" +
+                  (f"  *** HIT: litellm {result['versions']} ***" if result else ""),
+                  flush=True)
+
+    print()
+    print("=" * 72)
+    print(f"RESULTS: {len(hits)} job(s) installed litellm {' or '.join(sorted(TARGET_VERSIONS))}")
+    print("=" * 72)
+
+    if not hits:
+        print("No matches found.")
+        return
+
+    for h in sorted(hits, key=lambda x: x["started_at"]):
+        print()
+        print(f"  Project   : {h['project']}")
+        print(f"  Job       : {h['job_name']} (#{h['job_id']})")
+        print(f"  Branch/tag: {h['ref']}")
+        print(f"  Started   : {h['started_at']}")
+        print(f"  Versions  : litellm {', '.join(h['versions'])}")
+        print(f"  URL       : {h['job_url']}")
+        print(f"  Log lines :")
+        for line in h["context"]:
+            print(f"    {line}")
+
+
+if __name__ == "__main__":
+    main()
+```
+
+</details>
+
+</TabItem>
+</Tabs>
+
+*CI/CD scripts contributed by the community ([original gist](https://gist.github.com/fryz/93ec8d4898ffe5b5ac5706a208823ef3)). Review before running.*
+
+
+## Indicators of compromise (IoCs)
+
+Review affected systems for the following indicators:
+
+- `litellm_init.pth` present in your `site-packages`
+- Outbound traffic or requests to `models.litellm[.]cloud`
+  This domain is **not** affiliated with LiteLLM
+- Outbound traffic or requests to `checkmarx[.]zone`
+  This domain is **not** affiliated with LiteLLM
+
+
+## Immediate actions for affected users
+
+If you installed or ran **v1.82.7** or **v1.82.8**, take the following actions immediately.
+
+### 1. Rotate all secrets
+
+Treat any credentials present on the affected systems as compromised, including:
+
+- API keys
+- Cloud access keys
+- Database passwords
+- SSH keys
+- Kubernetes tokens
+- Any secrets stored in environment variables or configuration files
+
+### 2. Inspect your filesystem
+
+Check your `site-packages` directory for a file named `litellm_init.pth`:
+
+```bash
+find /usr/lib/python3.13/site-packages/ -name "litellm_init.pth"
+```
+
+If present:
+
+- remove it immediately
+- investigate the host for further compromise
+- preserve relevant artifacts if your security team is performing forensics
+
+### 3. Audit version history
+
+Review your:
+
+- Local environments
+- CI/CD pipelines
+- Docker builds
+- Deployment logs
+
+Confirm whether **v1.82.7** or **v1.82.8** was installed anywhere.
+
+Pin LiteLLM to a known safe version such as **v1.82.6 or earlier**, or to a later verified release once announced.
+
+
+## Response and remediation
+
+The LiteLLM AI Gateway team has already taken the following steps:
+
+- Removed compromised packages from PyPI
+- Rotated maintainer credentials and established new authorized maintainers
+- Engaged Google's Mandiant security team to assist with forensic analysis of the build and publishing chain
+
+
+## Verified safe versions
+
+We have audited every LiteLLM release published between v1.78.0 and v1.82.6 across both PyPI and Docker. Each artifact was verified by:
+
+1. Downloading the published artifact and computing its SHA-256 digest
+2. Scanning for the known [indicators of compromise](#indicators-of-compromise-iocs) (IOCs)
+3. Comparing the artifact contents against the corresponding Git commit in the BerriAI/litellm repository
+
+**All versions listed below are confirmed clean.**
+
+<Tabs>
+<TabItem value="pypi" label="PyPI Releases">
+
+<VersionVerificationTable entries={[
+  { version: "1.82.6", sha256: "164a3ef3e19f309e3cabc199bef3d2045212712fefdfa25fc7f75884a5b5b205", gitCommit: "38d477507dad" },
+  { version: "1.82.5", sha256: "e1012ab816352215c4e00776dd48b0c68058b537888a8ff82cca62af19e6fb11", gitCommit: "1998c4f3703f" },
+  { version: "1.82.4", sha256: "d37c34a847e7952a146ed0e2888a24d3edec7787955c6826337395e755ad5c4b", gitCommit: "cfeafbe38811" },
+  { version: "1.82.3", sha256: "609901f6c5a5cf8c24386e4e3f50738bb8a9db719709fd76b208c8ee6d00f7a7", gitCommit: "61409275c8d8" },
+  { version: "1.82.2", sha256: "641ed024774fa3d5b4dd9347f0efb1e31fa422fba2a6500aabedee085d1194cb", gitCommit: "f351bbdb3683" },
+  { version: "1.82.1", sha256: "a9ec3fe42eccb1611883caaf8b1bf33c9f4e12163f94c7d1004095b14c379eb2", gitCommit: "94b002066e3a" },
+  { version: "1.82.0", sha256: "5496b5d4532cccdc7a095c21cbac4042f7662021c57bc1d17be4e39838929e80", gitCommit: "6c6585af568e" },
+  { version: "1.81.16", sha256: "d6bcc13acbd26719e07bfa6b9923740e88409cbf1f9d626d85fc9ae0e0eec88c", gitCommit: "678200ee4887" },
+  { version: "1.81.15", sha256: "2fa253658702509ce09fe0e172e5a47baaadf697fb0f784c7fd4ff665ae76ae1", gitCommit: "2e819656cee9" },
+  { version: "1.81.14", sha256: "6394e61bbdef7121e5e3800349f6b01e9369e7cf611e034f1832750c481abfed", gitCommit: "96bcee0b0af7" },
+  { version: "1.81.13", sha256: "ae4aea2a55e85993f5f6dd36d036519422d24812a1a3e8540d9e987f2d7a4304", gitCommit: "cc957a19a560" },
+  { version: "1.81.12", sha256: "219cf9729e5ea30c6d3f75aa43fef3c56a717369939a6d717cbad0fd78e3c146", gitCommit: "ba0d541b1982" },
+  { version: "1.81.11", sha256: "06a66c24742e082ddd2813c87f40f5c12fe7baa73ce1f9457eaf453dc44a0f65", gitCommit: "231aedeeff7e" },
+  { version: "1.81.10", sha256: "9efa1cbe61ac051f6500c267b173d988ff2d511c2eecf1c8f2ee546c0870747c", gitCommit: "7488abece8e7" },
+  { version: "1.81.9", sha256: "24ee273bc8a62299fbb754035f83fb7d8d44329c383701a2bd034f4fd1c19084", gitCommit: "a09d3e9162eb" },
+  { version: "1.81.8", sha256: "78cca92f36bc6c267c191d1fe1e2630c812bff6daec32c58cade75748c2692f6", gitCommit: "4fea649f519b" },
+  { version: "1.81.7", sha256: "58466c88c3289c6a3830d88768cf8f307581d9e6c87861de874d1128bb2de90d", gitCommit: "3f6a281d0f7a" },
+  { version: "1.81.6", sha256: "573206ba194d49a1691370ba33f781671609ac77c35347f8a0411d852cf6341a", gitCommit: "8da3a93e6e63" },
+  { version: "1.81.5", sha256: "206505c5a0c6503e465154b9c979772be3ede3f5bf746d15b37dca5ae54d239f", gitCommit: "2cc3778761d4" },
+  { version: "1.81.3", sha256: "3f60fd8b727587952ad3dd18b68f5fed538d6f43d15bb0356f4c3a11bccb2b92", gitCommit: "f30742fe6e8e" },
+]} />
+
+</TabItem>
+<TabItem value="docker" label="Docker Images">
+
+<VersionVerificationTable entries={[
+  { version: "1.82.3", sha256: "0a571da849db5f9c3cf3fead2ffbf1df982eebff7e7b38b46dbec3f640dafdbb", gitCommit: "61409275c8d8" },
+  { version: "1.82.3-stable", sha256: "0c2b2a0ad3e50af1702fc493ecd07f22a5180b6d1cfb169440b429b40e340e29", gitCommit: "61409275c8d8" },
+  { version: "1.82.0-stable", sha256: "71bf7283767ca436edcfa9f1f26c1743487b5fa29736c61c3eb6977776007c42", gitCommit: "97947c254252" },
+  { version: "1.81.15", sha256: "303c31af87e7915e7b34d6c4d55a6ac753ef947a5deaa899e9ccfd3d1d58f7c2", gitCommit: "20bf3aa8070a" },
+  { version: "1.81.14-stable", sha256: "a34f9758048231817d799b703fb998e40e2a5cbabb89ab95039fc30798f01b3c", gitCommit: "0435375b1271" },
+  { version: "1.81.13", sha256: "a876f3f22f9b6fd481c9091c44a8a893d81c172d66dc2749298dcd3dc4a3d6f0", gitCommit: "cc957a19a560" },
+  { version: "1.81.12-stable", sha256: "e24022878ccc87f57d808ac9304f18b87b8359e6556746d81cc20a5dc85f423a", gitCommit: "ba0d541b1982" },
+  { version: "1.81.9-stable", sha256: "262e53d7702ed82579717faff0b08f7c0b7e9973a6406cfcc0e4af7826327627", gitCommit: "a09d3e9162eb" },
+  { version: "1.81.3-stable", sha256: "dff82ccc32fb648927c090607887401c7e8ec814fe7c951beb95fe51073ca02b", gitCommit: "61ed8f9e0355" },
+  { version: "1.81.0-stable", sha256: "f4913297d1bb3dc373eb8911a5ac816b597be9b5e08a91636b6c2786dd572aa8", gitCommit: "790a5ce0b323" },
+  { version: "1.80.15-stable", sha256: "0b4ec3861e978b4aa254f4070f292cd345496a5fb59c72e1ee21cd6db94b670b", gitCommit: "17c8d8d109b5" },
+  { version: "1.80.11-stable", sha256: "4068108d9101cd2affba3924310fd7f34f23d14e36dd4853733898b9e04d81ca", gitCommit: "57e07bddd341" },
+  { version: "1.80.8-stable", sha256: "0304c2eb1f3cf54262d1b4e0629487232bab459e95b99a21e5810231d2b27021", gitCommit: "3381d63152f8" },
+  { version: "1.80.5-stable", sha256: "a89e173135fff96af4b5b91ea31845164eadcf6497c82adeb64c36a23c8a3d11", gitCommit: "6c49b95a4ab7" },
+  { version: "1.80.0-stable", sha256: "a3416f4cd0c896c94a1f526d872ff6c19bee22ff4afcdcc6f9ff690707900176", gitCommit: "98365205acd0" },
+  { version: "1.79.3-stable", sha256: "27aae83d6ab6cb0b63bf8179e375ce0e11f5cfef51f2675b0c1e60c6f546dbc1", gitCommit: "c0548542d4a9" },
+  { version: "1.79.1-stable", sha256: "7780d29a9543c4ce762430db7dfb0640105f7357fc38e35bf3fb7bbb1e6ba63f", gitCommit: "c217bddb59ba" },
+  { version: "1.79.0-stable", sha256: "32bf6ac059a56641e11e4712f63b8467c295f988b6c160dc7229660417ee44bd", gitCommit: "8d495f56a9cc" },
+  { version: "1.78.5-stable", sha256: "d5e607648eafa15edc63b0b1a5ed01f8b31a1fa0c80f7d25b252ae18a593ee29", gitCommit: "c471bf1f16c2" },
+  { version: "1.78.0-stable", sha256: "7a56b32dc7153763d31c0a056123dc878a598959935d8c7daacb1fca5272c205", gitCommit: "5fde83d9f154" },
+]} />
+
+</TabItem>
+</Tabs>
+
+
+## Questions and support
+
+If you believe your systems may be affected, contact us immediately:
+
+- **Security:** `security@berri.ai`
+- **Support:** `support@berri.ai`
+- **Slack:** Reach out to the LiteLLM team directly
+
+For real-time updates, follow [LiteLLM (YC W23) on X](https://x.com/LiteLLM).
+
--- a/docs/my-website/blog/vanta_compliance_recertification/index.md
+++ b/docs/my-website/blog/vanta_compliance_recertification/index.md
@ -0,0 +1,18 @@
+---
+slug: vanta-compliance-recertification
+title: "LiteLLM + Vanta: SOC 2 Type 2 and ISO 27001 Recertification"
+date: 2026-03-30T10:00:00
+authors:
+  - krrish
+description: "LiteLLM is partnering with Vanta on SOC 2 Type 2 and ISO 27001 recertification and engaging independent auditors for verification."
+tags: [security, compliance]
+hide_table_of_contents: true
+---
+
+![LiteLLM x Vanta SOC-2 Recertification](/img/blog/vanta_soc2_recertification.png)
+
+We are partnering with [Vanta](https://www.vanta.com/) to recertify LiteLLM's compliance for SOC 2 Type 2 and ISO 27001.
+
+As part of this process, we are also identifying independent auditors to validate and verify our compliance posture.
+
+This is part of our commitment to being the most secure and transparent AI Gateway possible.
--- a/docs/my-website/docs/anthropic_unified/index.md
+++ b/docs/my-website/docs/anthropic_unified/index.md
@ -506,12 +506,15 @@ Request body will be in the Anthropic messages API format. **litellm follows the
  A system prompt providing context or specific instructions to the model.
 - **temperature** (number):  
  Controls randomness in the model's responses. Valid range: `0 < temperature < 1`.
- **thinking** (object):  
+- **thinking** (object):
  Configuration for enabling extended thinking. If enabled, it includes:
-  - **budget_tokens** (integer):  
+  - **budget_tokens** (integer):
    Minimum of 1024 tokens (and less than `max_tokens`).
-  - **type** (enum):  
+  - **type** (enum):
    E.g., `"enabled"`.
+  - **summary** (string, optional):
+    Enables the summary style for thinking blocks. Possible values: `"auto"`, `"concise"`, `"detailed"`, `"disabled"`.
+    When routing to non-Anthropic providers (e.g., `openai/gpt-5.1`), the `summary` value is preserved and forwarded to the downstream API.
 - **tool_choice** (object):  
  Instructs how the model should utilize any provided tools.
 - **tools** (array of objects):  
--- a/docs/my-website/docs/completion/prompt_caching.md
+++ b/docs/my-website/docs/completion/prompt_caching.md
@ -6,6 +6,8 @@ import TabItem from '@theme/TabItem';
 Supported Providers:
 - OpenAI (`openai/`)
 - Anthropic API (`anthropic/`)
+- Google AI Studio (`gemini/`)
+- Vertex AI (`vertex_ai/`, `vertex_ai_beta/`)
 - Bedrock (`bedrock/`, `bedrock/invoke/`, `bedrock/converse`) ([All models bedrock supports prompt caching on](https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html))
 - Deepseek API (`deepseek/`)

@ -257,7 +259,7 @@ Anthropic charges for cache writes.

 Specify the content to cache with `"cache_control": {"type": "ephemeral"}`.

-If you pass that in for any other llm provider, it will be ignored. 
+This same format also works for [Gemini / Vertex AI](#google-ai-studio--vertex-ai-gemini-example). For other providers, it will be ignored.

 <Tabs>
 <TabItem value="sdk" label="SDK">
@ -356,6 +358,208 @@ print(response.usage)
 </TabItem>
 </Tabs>

+### Google AI Studio / Vertex AI (Gemini) Example
+
+Use the same Anthropic-style `cache_control` format — LiteLLM automatically translates it to Google's [context caching API](https://ai.google.dev/api/caching).
+
+**How it works under the hood:**
+1. Messages with `cache_control` are separated and sent to Google's `cachedContents` API
+2. The cached content ID is then passed as `cachedContent` in the Gemini request body
+3. Works across all three providers: `gemini/` (Google AI Studio), `vertex_ai/`, and `vertex_ai_beta/`
+4. Requires a minimum of **1024 tokens** in the cached content — below that, caching is silently skipped
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import completion
+import os
+
+os.environ["GEMINI_API_KEY"] = ""
+
+response = completion(
+    model="gemini/gemini-2.5-flash",
+    messages=[
+        {
+            "role": "system",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "You are an AI assistant tasked with analyzing legal documents.",
+                },
+                {
+                    "type": "text",
+                    "text": "Here is the full text of a complex legal agreement" * 400,
+                    "cache_control": {"type": "ephemeral"},
+                },
+            ],
+        },
+        {
+            "role": "user",
+            "content": "what are the key terms and conditions in this agreement?",
+        },
+    ],
+)
+
+print(response.usage)
+```
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+    - model_name: gemini-2.5-flash
+      litellm_params:
+        model: gemini/gemini-2.5-flash
+        api_key: os.environ/GEMINI_API_KEY
+```
+
+2. Start proxy
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+3. Test it!
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="LITELLM_PROXY_KEY",  # sk-1234
+    base_url="LITELLM_PROXY_BASE",  # http://0.0.0.0:4000
+)
+
+response = client.chat.completions.create(
+    model="gemini-2.5-flash",
+    messages=[
+        {
+            "role": "system",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "You are an AI assistant tasked with analyzing legal documents.",
+                },
+                {
+                    "type": "text",
+                    "text": "Here is the full text of a complex legal agreement" * 400,
+                    "cache_control": {"type": "ephemeral"},
+                },
+            ],
+        },
+        {
+            "role": "user",
+            "content": "what are the key terms and conditions in this agreement?",
+        },
+    ],
+)
+
+print(response.usage)
+```
+
+</TabItem>
+</Tabs>
+
+#### Vertex AI
+
+For Vertex AI, use `vertex_ai/` prefix:
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import completion
+
+response = completion(
+    model="vertex_ai/gemini-2.5-flash",
+    vertex_project="my-gcp-project",
+    vertex_location="us-central1",
+    messages=[
+        {
+            "role": "system",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "You are an AI assistant tasked with analyzing legal documents.",
+                },
+                {
+                    "type": "text",
+                    "text": "Here is the full text of a complex legal agreement" * 400,
+                    "cache_control": {"type": "ephemeral"},
+                },
+            ],
+        },
+        {
+            "role": "user",
+            "content": "what are the key terms and conditions in this agreement?",
+        },
+    ],
+)
+
+print(response.usage)
+```
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+1. Setup config.yaml
+
+```yaml
+model_list:
+    - model_name: gemini-2.5-flash
+      litellm_params:
+        model: vertex_ai/gemini-2.5-flash
+        vertex_project: my-gcp-project
+        vertex_location: us-central1
+```
+
+2. Start proxy
+
+```bash
+litellm --config /path/to/config.yaml
+```
+
+3. Test it!
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    api_key="LITELLM_PROXY_KEY",  # sk-1234
+    base_url="LITELLM_PROXY_BASE",  # http://0.0.0.0:4000
+)
+
+response = client.chat.completions.create(
+    model="gemini-2.5-flash",
+    messages=[
+        {
+            "role": "system",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "You are an AI assistant tasked with analyzing legal documents.",
+                },
+                {
+                    "type": "text",
+                    "text": "Here is the full text of a complex legal agreement" * 400,
+                    "cache_control": {"type": "ephemeral"},
+                },
+            ],
+        },
+        {
+            "role": "user",
+            "content": "what are the key terms and conditions in this agreement?",
+        },
+    ],
+)
+
+print(response.usage)
+```
+
+</TabItem>
+</Tabs>
+
 ### Deepeek Example 

 Works the same as OpenAI. 
--- a/docs/my-website/docs/data_security.md
+++ b/docs/my-website/docs/data_security.md
@ -128,8 +128,6 @@ We'll review all reports promptly. Note that we don't currently offer a bug boun

 Legal Entity Name: Berrie AI Incorporated

-Company Phone Number: 7708783106 
-
 Point of contact email address for security incidents: krrish@berri.ai

 Point of contact email address for general security-related questions: krrish@berri.ai 
--- a/docs/my-website/docs/debugging/local_debugging.md
+++ b/docs/my-website/docs/debugging/local_debugging.md
@ -67,6 +67,6 @@ response = completion("command-nightly", messages, logger_fn=my_custom_logging_f

 ## Still Seeing Issues? 

-Text us @ +17708783106 or Join the [Discord](https://discord.com/invite/wuPM9dRgDw). 
+Join the [Discord](https://discord.com/invite/wuPM9dRgDw). 

 We promise to help you in `lite`ning speed ❤️
--- a/docs/my-website/docs/enterprise.md
+++ b/docs/my-website/docs/enterprise.md
@ -4,7 +4,7 @@ import Image from '@theme/IdealImage';

 :::info
 - ✨ SSO is free for up to 5 users. After that, an enterprise license is required. [Get Started with Enterprise here](https://www.litellm.ai/enterprise)
- Who is Enterprise for? Companies giving access to 100+ users **OR** 10+ AI use-cases. If you're not sure, [get in touch with us](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions) to discuss your needs.
+- Who is Enterprise for? Companies giving access to 100+ users **OR** 10+ AI use-cases. If you're not sure, [get in touch with us](https://enterprise.litellm.ai/demo) to discuss your needs.
 :::

 For companies that need SSO, user management and professional support for LiteLLM Proxy
@ -36,7 +36,7 @@ Manage Yourself - you can deploy our Docker Image or build a custom image from o

 ### What’s the cost of the Self-Managed Enterprise edition?

-Self-Managed Enterprise deployments require our team to understand your exact needs. [Get in touch with us to learn more](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions)
+Self-Managed Enterprise deployments require our team to understand your exact needs. [Get in touch with us to learn more](https://enterprise.litellm.ai/demo)


 ### How does deployment with Enterprise License work? 
@ -106,7 +106,7 @@ Professional Support can assist with LLM/Provider integrations, deployment, upgr

 Pricing is based on usage. We can figure out a price that works for your team, on the call. 

-[**Contact Us to learn more**](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions)
+[**Contact Us to learn more**](https://enterprise.litellm.ai/demo)



--- a/docs/my-website/docs/fine_tuning.md
+++ b/docs/my-website/docs/fine_tuning.md
@ -6,7 +6,7 @@ import TabItem from '@theme/TabItem';

 :::info

-This is an Enterprise only endpoint [Get Started with Enterprise here](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions)
+This is an Enterprise only endpoint [Get Started with Enterprise here](https://enterprise.litellm.ai/demo)

 :::

--- a/docs/my-website/docs/learn/sdk_quickstart.md
+++ b/docs/my-website/docs/learn/sdk_quickstart.md
@ -11,7 +11,7 @@ Use this path if you are integrating LiteLLM directly into application code.
 ## 1. Install LiteLLM

 ```bash
-pip install litellm
+pip install litellm==1.82.6
 ```

 ## 2. Set Provider Credentials
--- a/docs/my-website/docs/migration.md
+++ b/docs/my-website/docs/migration.md
@ -31,5 +31,4 @@ When we have breaking changes (i.e. going from 1.x.x to 2.x.x), we will document
 **How can we communicate changes better?**
 Tell us
 - [Discord](https://discord.com/invite/wuPM9dRgDw)
- Email (krrish@berri.ai/ishaan@berri.ai)
- Text us (+17708783106)
+- Email (support@berri.ai)
--- a/docs/my-website/docs/observability/arize_integration.md
+++ b/docs/my-website/docs/observability/arize_integration.md
@ -194,5 +194,4 @@ print(response)

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / +1 (412) 618-6238
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/gcs_bucket_integration.md
+++ b/docs/my-website/docs/observability/gcs_bucket_integration.md
@ -6,7 +6,7 @@ Log LLM Logs to [Google Cloud Storage Buckets](https://cloud.google.com/storage?

 :::info

-✨ This is an Enterprise only feature [Get Started with Enterprise here](https://calendly.com/d/cx9p-5yf-2nm/litellm-introductions)
+✨ This is an Enterprise only feature [Get Started with Enterprise here](https://enterprise.litellm.ai/demo)

 :::

@ -79,5 +79,4 @@ curl --location 'http://0.0.0.0:4000/chat/completions' \

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/langfuse_integration.md
+++ b/docs/my-website/docs/observability/langfuse_integration.md
@ -342,5 +342,4 @@ Be aware that if you are continuing an existing trace, and you set `update_trace

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/langsmith_integration.md
+++ b/docs/my-website/docs/observability/langsmith_integration.md
@ -225,5 +225,4 @@ environment_variables:

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/logfire_integration.md
+++ b/docs/my-website/docs/observability/logfire_integration.md
@ -63,5 +63,4 @@ response = litellm.completion(

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/lunary_integration.md
+++ b/docs/my-website/docs/observability/lunary_integration.md
@ -176,5 +176,4 @@ You can find more details about the different ways of making requests to the Lit

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/opik_integration.md
+++ b/docs/my-website/docs/observability/opik_integration.md
@ -261,5 +261,4 @@ All requests made with this key will automatically be tracked in the "TestProjec

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/phoenix_integration.md
+++ b/docs/my-website/docs/observability/phoenix_integration.md
@ -127,5 +127,4 @@ Depending on which Phoenix Cloud version or deployment you are using, you should

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/promptlayer_integration.md
+++ b/docs/my-website/docs/observability/promptlayer_integration.md
@ -84,5 +84,4 @@ Credits to [Nick Bradford](https://github.com/nsbradford), from [Vim-GPT](https:

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/slack_integration.md
+++ b/docs/my-website/docs/observability/slack_integration.md
@ -101,5 +101,4 @@ response = litellm.completion(

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/sumologic_integration.md
+++ b/docs/my-website/docs/observability/sumologic_integration.md
@ -328,5 +328,4 @@ If you get authentication errors, regenerate the HTTP Source URL in Sumo Logic:

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/supabase_integration.md
+++ b/docs/my-website/docs/observability/supabase_integration.md
@ -105,5 +105,4 @@ litellm.modify_integration("supabase",{"table_name": "litellm_logs"})

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/observability/wandb_integration.md
+++ b/docs/my-website/docs/observability/wandb_integration.md
@ -57,5 +57,4 @@ response = litellm.completion(

 - [Schedule Demo 👋](https://calendly.com/d/4mp-gd3-k5k/berriai-1-1-onboarding-litellm-hosted-version)
 - [Community Discord 💭](https://discord.gg/wuPM9dRgDw)
- Our numbers 📞 +1 (770) 8783-106 / ‭+1 (412) 618-6238‬
 - Our emails ✉️ ishaan@berri.ai / krrish@berri.ai
--- a/docs/my-website/docs/prompt_management.md
+++ b/docs/my-website/docs/prompt_management.md
@ -0,0 +1,48 @@
+---
+title: Prompt Management with Responses API
+---
+
+# Prompt Management with Responses API
+
+Use LiteLLM Prompt Management with `/v1/responses` by passing `prompt_id` and optional `prompt_variables`.
+
+## Basic Usage
+
+```bash
+curl -X POST "http://localhost:4000/v1/responses" \
+  -H "Authorization: Bearer sk-1234" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o",
+    "prompt_id": "my-responses-prompt",
+    "prompt_variables": {"topic": "large language models"},
+    "input": []
+  }'
+```
+
+## Multi-turn Follow-up in `input`
+
+To send follow-up turns in one request, pass message history in `input`.
+
+```bash
+curl -X POST "http://localhost:4000/v1/responses" \
+  -H "Authorization: Bearer sk-1234" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-4o",
+    "prompt_id": "my-responses-prompt",
+    "prompt_variables": {"topic": "large language models"},
+    "input": [
+      {"role": "user", "content": "Topic is LLMs. Start short."},
+      {"role": "assistant", "content": "Sure, go ahead."},
+      {"role": "user", "content": "Now give me 3 bullets and include pricing caveat."}
+    ]
+  }'
+```
+
+## Notes
+
+- Prompt template messages are merged with your `input` messages.
+- Prompt variable substitution applies to prompt message content.
+- Tool call payload fields are not substituted by prompt variables.
+- For follow-ups with `previous_response_id`, include `prompt_id` again if you want prompt management applied on that turn.
--- a/docs/my-website/docs/providers/gemini.md
+++ b/docs/my-website/docs/providers/gemini.md
@ -11,6 +11,7 @@ import TabItem from '@theme/TabItem';
 | Provider Doc | [Google AI Studio ↗](https://aistudio.google.com/) |
 | API Endpoint for Provider | https://generativelanguage.googleapis.com |
 | Supported OpenAI Endpoints | `/chat/completions`, [`/embeddings`](../embedding/supported_embedding#gemini-ai-embedding-models), `/completions`, [`/videos`](./gemini/videos.md), [`/images/edits`](../image_edits.md) |
+| Lyria (music) | [Cost map & notes](./gemini/music.md) |
 | Pass-through Endpoint | [Supported](../pass_through/google_ai_studio.md) |

 <br />
@ -54,6 +55,7 @@ response = completion(
 - stream
 - tools
 - tool_choice
+- include_server_side_tool_invocations
 - functions
 - response_format
 - n
@ -856,7 +858,112 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \
 </TabItem>
 </Tabs>

-### URL Context 
+### Context Circulation (Server-Side Tool Combination)
+
+Context circulation allows Gemini 3+ models to combine **built-in tools** (like Google Search) with **your custom functions** in the same request. Without it, Gemini returns an error if you try to use both.
+
+When enabled, Gemini can execute Google Search server-side, use those results to decide whether to call your custom functions, and return the full chain of reasoning.
+
+**How it works:**
+1. You pass `include_server_side_tool_invocations=True` along with both Google Search and your function tools
+2. Gemini executes server-side tools internally and returns `toolCall`/`toolResponse` parts alongside any `functionCall` parts
+3. LiteLLM extracts the server-side invocations into `provider_specific_fields["server_side_tool_invocations"]`
+4. On subsequent turns, include the full assistant message in your conversation history — LiteLLM re-injects the server-side parts automatically
+
+<Tabs>
+<TabItem value="sdk" label="SDK">
+
+```python
+from litellm import completion
+
+response = completion(
+    model="gemini/gemini-3-flash-preview",
+    messages=[{"role": "user", "content": "What's the weather in Buenos Aires? If it's raining, schedule a meeting."}],
+    tools=[
+        {"type": "web_search_preview"},  # Google Search (server-side)
+        {
+            "type": "function",
+            "function": {
+                "name": "schedule_meeting",
+                "description": "Schedule a meeting",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"reason": {"type": "string"}},
+                    "required": ["reason"],
+                },
+            },
+        },
+    ],
+    include_server_side_tool_invocations=True,
+)
+
+msg = response.choices[0].message
+
+# Server-side tool results are in provider_specific_fields
+psf = msg.provider_specific_fields or {}
+for invocation in psf.get("server_side_tool_invocations", []):
+    print(invocation["tool_type"])  # e.g. "GOOGLE_SEARCH_WEB"
+    print(invocation["id"])
+    print(invocation["args"])       # e.g. {"queries": ["weather Buenos Aires"]}
+    print(invocation["response"])   # Search results from Google
+
+# For multi-turn: just append the full message to history
+messages.append(msg)
+messages.append({"role": "user", "content": "Thanks!"})
+# LiteLLM automatically re-injects the server-side parts + thought signatures
+response2 = completion(
+    model="gemini/gemini-3-flash-preview",
+    messages=messages,
+    tools=tools,
+    include_server_side_tool_invocations=True,
+)
+```
+
+</TabItem>
+<TabItem value="proxy" label="PROXY">
+
+1. Setup config.yaml
+```yaml
+model_list:
+  - model_name: gemini-3-flash
+    litellm_params:
+      model: gemini/gemini-3-flash-preview
+      api_key: os.environ/GEMINI_API_KEY
+```
+
+2. Start Proxy
+```bash
+$ litellm --config /path/to/config.yaml
+```
+
+3. Make Request
+```bash
+curl -X POST 'http://0.0.0.0:4000/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-d '{
+  "model": "gemini-3-flash",
+  "messages": [{"role": "user", "content": "What is the weather in Buenos Aires?"}],
+  "tools": [
+    {"type": "web_search_preview"},
+    {"type": "function", "function": {"name": "schedule_meeting", "description": "Schedule a meeting", "parameters": {"type": "object", "properties": {"reason": {"type": "string"}}}}}
+  ],
+  "include_server_side_tool_invocations": true
+}'
+```
+
+</TabItem>
+</Tabs>
+
+:::info
+
+- Context circulation requires **Gemini 3+** models
+- Server-side tool invocations (`toolCall`/`toolResponse`) are **not** included in `tool_calls` — they are in `provider_specific_fields["server_side_tool_invocations"]` because they were already executed by Google, not by your code
+- `thought_signatures` are automatically preserved alongside server-side invocations for multi-turn coherence
+
+:::
+
+### URL Context

 <Tabs>
 <TabItem value="sdk" label="SDK">
--- a/docs/my-website/docs/providers/gemini/music.md
+++ b/docs/my-website/docs/providers/gemini/music.md
@ -0,0 +1,28 @@
+# Gemini — Lyria (music generation)
+
+Google Lyria 3 preview models are listed in LiteLLM’s [model cost map](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json) under the `gemini/` provider for metadata and spend tracking.
+
+| Property | Details |
+|----------|---------|
+| Provider route | `gemini/` |
+| Models | `gemini/lyria-3-clip-preview`, `gemini/lyria-3-pro-preview` |
+| Provider docs | [Gemini API pricing / models ↗](https://ai.google.dev/gemini-api/docs/pricing) |
+
+## Models
+
+| Model | Notes |
+|-------|--------|
+| `gemini/lyria-3-clip-preview` | ~30s clip; paid tier listed as per generated song in Google’s pricing |
+| `gemini/lyria-3-pro-preview` | Full song; paid tier listed as per generated song in Google’s pricing |
+
+Input context limit in the cost map: **131,072** tokens. For modalities, limits, and features, see [Google’s Gemini API docs ↗](https://ai.google.dev/gemini-api/docs/models).
+
+## LiteLLM behavior
+
+- **Cost map**: Per-song paid pricing is stored as `output_cost_per_image` on those entries (flat per generation unit). Token-based completion cost may not reflect music billing until a dedicated path exists.
+- **API calls**: Use the Gemini API as documented by Google. LiteLLM does not ship a separate `music_generation` helper like Veo’s `video_generation`.
+
+## Auth
+
+Same as other Gemini API models: `GEMINI_API_KEY` or `GOOGLE_API_KEY`.
+
--- a/docs/my-website/docs/providers/openai.md
+++ b/docs/my-website/docs/providers/openai.md
@ -581,6 +581,90 @@ curl -X POST 'http://0.0.0.0:4000/chat/completions' \

 See [OpenAI Reasoning documentation](https://platform.openai.com/docs/guides/reasoning) for more details on organization verification requirements.

+### Multi-turn Conversations with `reasoning_items`
+
+For multi-turn conversations you need `reasoning_items`: structured blocks that include the `encrypted_content` token OpenAI uses to restore reasoning state on the next request. Pass `include=["reasoning.encrypted_content"]` on every call where you want that token returned.
+
+<Tabs>
+<TabItem value="non-streaming" label="Non-Streaming">
+
+```python showLineNumbers title="Non-streaming: round-trip reasoning_items"
+import litellm
+
+messages = [{"role": "user", "content": "Solve this step by step: 2 + 2"}]
+
+# Turn 1 — get reasoning_items (encrypted_content);
+response = litellm.completion(
+    model="openai/responses/gpt-5-mini",
+    messages=messages,
+    reasoning_effort="low",
+    include=["reasoning.encrypted_content"],
+)
+
+assistant_msg = response.choices[0].message
+
+# Turn 2 — pass reasoning_items back; LiteLLM converts to the correct Responses API format
+messages.append({
+    "role": "assistant",
+    "content": assistant_msg.content,
+    "reasoning_items": assistant_msg.reasoning_items,
+})
+messages.append({"role": "user", "content": "Now summarize your reasoning."})
+
+response2 = litellm.completion(
+    model="openai/responses/gpt-5-mini",
+    messages=messages,
+    reasoning_effort="low",
+    include=["reasoning.encrypted_content"],
+)
+```
+
+</TabItem>
+<TabItem value="streaming" label="Streaming">
+
+`reasoning_items` (with `encrypted_content`) arrive on the final chunk when the full response completes:
+
+```python showLineNumbers title="Streaming: collect and round-trip reasoning_items"
+import litellm
+
+messages = [{"role": "user", "content": "Solve this step by step: 2 + 2"}]
+
+collected_content = []
+collected_reasoning_items = []
+
+stream = litellm.completion(
+    model="openai/responses/gpt-5-mini",
+    messages=messages,
+    stream=True,
+    reasoning_effort="low",
+    include=["reasoning.encrypted_content"],
+)
+
+for chunk in stream:
+    delta = chunk.choices[0].delta
+    if delta.content:
+        collected_content.append(delta.content)
+    if getattr(delta, "reasoning_items", None):
+        collected_reasoning_items.extend(delta.reasoning_items)
+
+messages.append({
+    "role": "assistant",
+    "content": "".join(collected_content),
+    "reasoning_items": collected_reasoning_items or None,
+})
+messages.append({"role": "user", "content": "Continue the conversation."})
+
+response2 = litellm.completion(
+    model="openai/responses/gpt-5-mini",
+    messages=messages,
+    reasoning_effort="low",
+    include=["reasoning.encrypted_content"],
+)
+```
+
+</TabItem>
+</Tabs>
+
 ### Verbosity Control for GPT-5 Models

 The `verbosity` parameter controls the length and detail of responses from GPT-5 family models. It accepts three values: `"low"`, `"medium"`, or `"high"`.
--- a/Show More
+++ b/Show More