Fix: tag budget reset must drop stale management-cache entry (#27568)

Squash-merged by litellm-agent from oss-agent-shin's PR.
2026-05-09 17:18:55 -07:00 · 2026-05-09 17:18:55 -07:00 · 9f68d2bb77
commit 9f68d2bb77
parent c7739c9ed5
44 changed files with 727 additions and 3602 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -241,10 +241,27 @@ When opening issues or pull requests, follow these templates:

 ### Running the proxy server

-Start the proxy with a config file:
+Create a minimal config file and start the proxy:
+
+```yaml
+# config.yaml
+model_list:
+  - model_name: fake-openai-endpoint
+    litellm_params:
+      model: openai/fake-model
+      api_key: fake-key
+      api_base: https://fake-api.example.com
+
+general_settings:
+  master_key: sk-1234
+
+litellm_settings:
+  drop_params: True
+  telemetry: False
+```

 ```bash
-uv run litellm --config dev_config.yaml --port 4000
+uv run litellm --config config.yaml --port 4000
 ```

 The proxy takes ~15-20 seconds to fully start (it runs Prisma migrations on boot). Wait for `/health` to return before sending requests. Without a PostgreSQL `DATABASE_URL`, the proxy connects to a default Neon dev database embedded in the `litellm-proxy-extras` package.
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -146,7 +146,7 @@ LiteLLM is a unified interface for 100+ LLM providers with two main components:
 - **Bound large result sets.** Prisma materializes full results in memory. For results over ~10 MB, paginate with `take`/`skip` or `cursor`/`take`, always with an explicit `order`. Prefer cursor-based pagination (`skip` is O(n)). Don't paginate naturally small result sets.
 - **Limit fetched columns on wide tables.** Use `select` to fetch only needed fields — returns a partial object, so downstream code must not access unselected fields.
 - **Check index coverage.** For new or modified queries, check `schema.prisma` for a supporting index. Prefer extending an existing index (e.g. `@@index([a])` → `@@index([a, b])`) over adding a new one, unless it's a `@@unique`. Only add indexes for large/frequent queries.
- **Keep schema files in sync.** Apply schema changes to all `schema.prisma` copies (`schema.prisma`, `litellm/proxy/`, `litellm-proxy-extras/`, `litellm-js/spend-logs/` for SpendLogs) with a migration under `litellm-proxy-extras/litellm_proxy_extras/migrations/`.
+- **Keep schema files in sync.** Apply schema changes to all `schema.prisma` copies (`schema.prisma`, `litellm/proxy/`, `litellm-proxy-extras/`) with a migration under `litellm-proxy-extras/litellm_proxy_extras/migrations/`.

 ### Setup Wizard (`litellm/setup_wizard.py`)
 - The wizard is implemented as a single `SetupWizard` class with `@staticmethod` methods — keep it that way. No module-level functions except `run_setup_wizard()` (the public entrypoint) and pure helpers (color, ANSI).
--- a/deploy/Dockerfile.ghcr_base
+++ b/deploy/Dockerfile.ghcr_base
@ -1,18 +0,0 @@
-# Use the provided base image
-FROM ghcr.io/berriai/litellm:main-latest@sha256:7c311546c25e7bb6e8cafede9fcd3d0d622ac636b5c9418befaa32e85dfb0186
-
-# Set the working directory to /app
-WORKDIR /app
-
-# Copy the configuration file into the container at /app
-COPY config.yaml .
-
-# Make sure your docker/entrypoint.sh is executable
-# Convert Windows line endings to Unix
-RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
-
-# Expose the necessary port
-EXPOSE 4000/tcp
-
-# Override the CMD instruction with your desired command and arguments
-CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug", "--run_gunicorn"]
--- a/deploy/kubernetes/kub.yaml
+++ b/deploy/kubernetes/kub.yaml
@ -1,56 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: litellm-deployment
-spec:
-  replicas: 3
-  selector:
-    matchLabels:
-      app: litellm
-  template:
-    metadata:
-      labels:
-        app: litellm
-    spec:
-      containers:
-        - name: litellm-container
-          image: ghcr.io/berriai/litellm:main-latest
-          imagePullPolicy: Always
-          env:
-            - name: AZURE_API_KEY
-              value: "d6f****"
-            - name: AZURE_API_BASE
-              value: "https://openai"
-            - name: LITELLM_MASTER_KEY
-              value: "sk-1234"
-            - name: DATABASE_URL
-              value: "postgresql://ishaan*********"
-          args:
-            - "--config"
-            - "/app/proxy_config.yaml"  # Update the path to mount the config file
-          volumeMounts:                 # Define volume mount for proxy_config.yaml
-            - name: config-volume
-              mountPath: /app
-              readOnly: true
-          livenessProbe:
-            httpGet:
-              path: /health/liveliness
-              port: 4000
-            initialDelaySeconds: 120
-            periodSeconds: 15
-            successThreshold: 1
-            failureThreshold: 3
-            timeoutSeconds: 10
-          readinessProbe:
-            httpGet:
-              path: /health/readiness
-              port: 4000
-            initialDelaySeconds: 120
-            periodSeconds: 15
-            successThreshold: 1
-            failureThreshold: 3
-            timeoutSeconds: 10
-      volumes:  # Define volume to mount proxy_config.yaml
-        - name: config-volume
-          configMap:
-            name: litellm-config  
--- a/deploy/kubernetes/service.yaml
+++ b/deploy/kubernetes/service.yaml
@ -1,12 +0,0 @@
-apiVersion: v1
-kind: Service
-metadata:
-  name: litellm-service
-spec:
-  selector:
-    app: litellm
-  ports:
-    - protocol: TCP
-      port: 4000
-      targetPort: 4000
-  type: LoadBalancer
--- a/dev_config.yaml
+++ b/dev_config.yaml
@ -1,13 +0,0 @@
-model_list:
-  - model_name: fake-openai-endpoint
-    litellm_params:
-      model: openai/fake-model
-      api_key: fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/
-
-general_settings:
-  master_key: sk-1234
-
-litellm_settings:
-  drop_params: True
-  telemetry: False
--- a/docker/Dockerfile.alpine
+++ b/docker/Dockerfile.alpine
@ -1,68 +0,0 @@
-# Base image for building
-ARG LITELLM_BUILD_IMAGE=python:3.11-alpine@sha256:f07e2ace46f560f09a6eeec7b4913b80ee99546e749ef82342a419a326620856
-
-# Runtime image
-ARG LITELLM_RUNTIME_IMAGE=python:3.11-alpine@sha256:f07e2ace46f560f09a6eeec7b4913b80ee99546e749ef82342a419a326620856
-ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
-
-FROM $UV_IMAGE AS uvbin
-
-FROM $LITELLM_BUILD_IMAGE AS builder
-
-WORKDIR /app
-
-COPY --from=uvbin /uv /usr/local/bin/uv
-COPY --from=uvbin /uvx /usr/local/bin/uvx
-
-RUN apk add --no-cache gcc python3-dev musl-dev nodejs npm libsndfile
-
-ENV PRISMA_BINARY_CACHE_DIR=/app/.cache/prisma-python/binaries \
-    UV_PROJECT_ENVIRONMENT=/app/.venv \
-    UV_LINK_MODE=copy \
-    XDG_CACHE_HOME=/app/.cache \
-    PATH="/app/.venv/bin:${PATH}"
-
-# Copy dependency metadata first for layer caching
-COPY pyproject.toml uv.lock ./
-COPY enterprise/pyproject.toml enterprise/
-COPY litellm-proxy-extras/pyproject.toml litellm-proxy-extras/
-
-# Install third-party dependencies (cached unless pyproject.toml/uv.lock change)
-RUN uv sync --frozen --no-install-project --no-install-workspace --no-default-groups --no-editable \
-    --extra proxy \
-    --extra proxy-runtime \
-    --extra extra_proxy \
-    --extra semantic-router \
-    --python python3
-
-# Copy full source tree
-COPY . .
-
-# Install project and workspace packages (fast - deps already cached)
-RUN uv sync --frozen --no-default-groups --no-editable \
-    --extra proxy \
-    --extra proxy-runtime \
-    --extra extra_proxy \
-    --extra semantic-router \
-    --python python3
-
-RUN prisma generate --schema=./schema.prisma
-
-RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh && \
-    sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
-
-FROM $LITELLM_RUNTIME_IMAGE AS runtime
-
-RUN apk upgrade --no-cache && apk add --no-cache libsndfile nodejs npm
-
-WORKDIR /app
-ENV PRISMA_BINARY_CACHE_DIR=/app/.cache/prisma-python/binaries \
-    XDG_CACHE_HOME=/app/.cache \
-    PATH="/app/.venv/bin:${PATH}"
-
-COPY --from=builder /app /app
-
-EXPOSE 4000/tcp
-
-ENTRYPOINT ["docker/prod_entrypoint.sh"]
-CMD ["--port", "4000"]
--- a/docker/Dockerfile.custom_ui
+++ b/docker/Dockerfile.custom_ui
@ -1,86 +0,0 @@
-# Use the provided base image
-# NOTE: This is a dev/branch-specific tag. Update digest when the base image is rebuilt.
-FROM ghcr.io/berriai/litellm:litellm_fwd_server_root_path-dev
-
-# Set the working directory to /app
-WORKDIR /app
-
-# Install Node.js and npm (adjust version as needed)
-RUN apt-get update && apt-get upgrade -y \
-        libxml2 \
-        libexpat1 \
-        openssl \
-        libssl3 \
-        git \
-        libkrb5-3 \
-        libglib2.0-0 \
-        wget \
-        libaom3 \
-        libxslt1.1 \
-        libgnutls30 \
-        libc6 && \
-    apt-get install -y --no-install-recommends nodejs npm && \
-    npm install -g npm@11.12.1 tar@7.5.11 glob@11.1.0 @isaacs/brace-expansion@5.0.1 minimatch@10.2.4 diff@8.0.3 && \
-    GLOBAL="$(npm root -g)" && \
-    find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \
-        rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
-    done && \
-    find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \
-        rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
-    done && \
-    find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \
-        rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
-    done && \
-    find "$GLOBAL/npm" -type d -name "minimatch" -path "*/node_modules/minimatch" | while read d; do \
-        rm -rf "$d" && cp -rL "$GLOBAL/minimatch" "$d"; \
-    done && \
-    find "$GLOBAL/npm" -type d -name "diff" -path "*/node_modules/diff" | while read d; do \
-        rm -rf "$d" && cp -rL "$GLOBAL/diff" "$d"; \
-    done && \
-    find /usr/local/lib /usr/lib -path "*/node_modules/npm/package.json" -exec \
-        sed -i 's/"tar": "\^7\.5\.[0-9]*"/"tar": "^7.5.10"/g; s/"minimatch": "\^10\.[0-9.]*"/"minimatch": "^10.2.4"/g' {} + 2>/dev/null && \
-    npm cache clean --force && \
-    apt-get purge -y npm
-
-# Copy the UI source into the container
-COPY ./ui/litellm-dashboard /app/ui/litellm-dashboard
-
-# Set an environment variable for UI_BASE_PATH
-# This can be overridden at build time
-# set UI_BASE_PATH to "<your server root path>/ui"
-ENV UI_BASE_PATH="/prod/ui"
-
-# Build the UI with the specified UI_BASE_PATH
-WORKDIR /app/ui/litellm-dashboard
-RUN npm ci
-RUN UI_BASE_PATH=$UI_BASE_PATH npm run build
-
-# Create the destination directory
-RUN mkdir -p /app/litellm/proxy/_experimental/out
-
-# Move the built files to the appropriate location
-# Assuming the build output is in ./out directory
-RUN rm -rf /app/litellm/proxy/_experimental/out/* && \
-    mv ./out/* /app/litellm/proxy/_experimental/out/
-
-# Switch back to the main app directory
-WORKDIR /app
-
-# Make sure your docker/entrypoint.sh is executable
-# Convert Windows line endings to Unix for entrypoint scripts
-RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
-RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
-
-# Run as non-root user
-RUN groupadd --gid 1000 appuser && useradd --uid 1000 --gid 1000 --no-create-home appuser \
-    && chown -R appuser:appuser /app
-USER appuser
-
-# Expose the necessary port
-EXPOSE 4000/tcp
-
-HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
-  CMD ["python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:4000/health')"]
-
-# Override the CMD instruction with your desired command and arguments
-CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug"]
--- a/docker/Dockerfile.dev
+++ b/docker/Dockerfile.dev
@ -1,121 +0,0 @@
-# Base image for building
-ARG LITELLM_BUILD_IMAGE=python:3.13-slim@sha256:739e7213785e88c0f702dcdc12c0973afcbd606dbf021a589cab77d6b00b579d
-
-# Runtime image
-ARG LITELLM_RUNTIME_IMAGE=python:3.13-slim@sha256:739e7213785e88c0f702dcdc12c0973afcbd606dbf021a589cab77d6b00b579d
-ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
-
-FROM $UV_IMAGE AS uvbin
-
-FROM $LITELLM_BUILD_IMAGE AS builder
-
-WORKDIR /app
-USER root
-
-COPY --from=uvbin /uv /usr/local/bin/uv
-COPY --from=uvbin /uvx /usr/local/bin/uvx
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    gcc \
-    g++ \
-    python3-dev \
-    libssl-dev \
-    pkg-config \
-    nodejs \
-    npm \
-    && rm -rf /var/lib/apt/lists/*
-
-ENV PRISMA_BINARY_CACHE_DIR=/app/.cache/prisma-python/binaries \
-    UV_PROJECT_ENVIRONMENT=/app/.venv \
-    UV_LINK_MODE=copy \
-    XDG_CACHE_HOME=/app/.cache \
-    PATH="/app/.venv/bin:${PATH}"
-
-# Copy dependency metadata first for layer caching
-COPY pyproject.toml uv.lock ./
-COPY enterprise/pyproject.toml enterprise/
-COPY litellm-proxy-extras/pyproject.toml litellm-proxy-extras/
-
-# Install third-party dependencies (cached unless pyproject.toml/uv.lock change)
-RUN uv sync --frozen --no-install-project --no-install-workspace --no-default-groups --no-editable \
-    --extra proxy \
-    --extra proxy-runtime \
-    --extra extra_proxy \
-    --extra semantic-router \
-    --python python
-
-# Copy full source tree
-COPY . .
-
-# Build Admin UI before final sync
-RUN sed -i 's/\r$//' docker/build_admin_ui.sh && chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
-
-# Install project and workspace packages (fast - deps already cached)
-RUN uv sync --frozen --no-default-groups --no-editable \
-    --extra proxy \
-    --extra proxy-runtime \
-    --extra extra_proxy \
-    --extra semantic-router \
-    --python python
-
-RUN prisma generate --schema=./schema.prisma
-
-RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh && \
-    sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
-
-FROM $LITELLM_RUNTIME_IMAGE AS runtime
-
-USER root
-
-RUN apt-get update && apt-get upgrade -y \
-        libxml2 \
-        libexpat1 \
-        openssl \
-        libssl3 \
-        git \
-        libkrb5-3 \
-        libglib2.0-0 \
-        wget \
-        libaom3 \
-        libxslt1.1 \
-        libgnutls30 \
-        libc6 \
-    && apt-get install -y --no-install-recommends \
-        libssl3 \
-        libatomic1 \
-        nodejs \
-        npm \
-    && rm -rf /var/lib/apt/lists/* \
-    && npm install -g npm@11.12.1 tar@7.5.11 glob@11.1.0 @isaacs/brace-expansion@5.0.1 minimatch@10.2.4 diff@8.0.3 \
-    && GLOBAL="$(npm root -g)" \
-    && find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \
-          rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
-       done \
-    && find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \
-          rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
-       done \
-    && find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \
-          rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
-       done \
-    && find "$GLOBAL/npm" -type d -name "minimatch" -path "*/node_modules/minimatch" | while read d; do \
-          rm -rf "$d" && cp -rL "$GLOBAL/minimatch" "$d"; \
-       done \
-    && find "$GLOBAL/npm" -type d -name "diff" -path "*/node_modules/diff" | while read d; do \
-          rm -rf "$d" && cp -rL "$GLOBAL/diff" "$d"; \
-       done \
-    && find /usr/local/lib /usr/lib -path "*/node_modules/npm/package.json" -exec \
-        sed -i 's/"tar": "\^7\.5\.[0-9]*"/"tar": "^7.5.10"/g; s/"minimatch": "\^10\.[0-9.]*"/"minimatch": "^10.2.4"/g' {} + 2>/dev/null \
-    && npm cache clean --force \
-    && apt-get purge -y npm
-
-WORKDIR /app
-ENV PRISMA_BINARY_CACHE_DIR=/app/.cache/prisma-python/binaries \
-    XDG_CACHE_HOME=/app/.cache \
-    PATH="/app/.venv/bin:${PATH}"
-
-COPY --from=builder /app /app
-
-EXPOSE 4000/tcp
-
-ENTRYPOINT ["docker/prod_entrypoint.sh"]
-CMD ["--port", "4000"]
--- a/docker/Dockerfile.health_check
+++ b/docker/Dockerfile.health_check
@ -1,30 +0,0 @@
-ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
-FROM $UV_IMAGE AS uvbin
-
-FROM python:3.13-slim@sha256:739e7213785e88c0f702dcdc12c0973afcbd606dbf021a589cab77d6b00b579d
-
-WORKDIR /app
-
-# Copy the uv binary and the health check script.
-COPY --from=uvbin /uv /usr/local/bin/uv
-COPY pyproject.toml uv.lock /app/
-COPY scripts/health_check/health_check_client.py /app/health_check_client.py
-
-# Resolve and install the health-check dependencies from the project lockfile
-# so the runtime image stays self-contained and reproducible.
-RUN uv export --frozen --no-default-groups --only-group healthcheck --no-emit-project --no-hashes --output-file /tmp/health-check-requirements.txt \
-  && uv pip install --system -r /tmp/health-check-requirements.txt \
-  && rm /tmp/health-check-requirements.txt \
-  && rm /app/pyproject.toml /app/uv.lock \
-  && chmod +x /app/health_check_client.py
-
-# Run as non-root user
-RUN groupadd --gid 1000 appuser && useradd --uid 1000 --gid 1000 --no-create-home appuser
-USER appuser
-
-# Health check
-HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
-  CMD ["python", "/app/health_check_client.py", "--help"]
-
-# Set entrypoint
-ENTRYPOINT ["python", "/app/health_check_client.py"]
--- a/index.yaml
+++ b/index.yaml
@ -1,108 +0,0 @@
-apiVersion: v1
-entries:
-  litellm-helm:
-  - apiVersion: v2
-    appVersion: v1.43.18
-    created: "2024-08-19T23:58:25.331689+08:00"
-    dependencies:
-    - condition: db.deployStandalone
-      name: postgresql
-      repository: oci://registry-1.docker.io/bitnamicharts
-      version: '>=13.3.0'
-    - condition: redis.enabled
-      name: redis
-      repository: oci://registry-1.docker.io/bitnamicharts
-      version: '>=18.0.0'
-    description: Call all LLM APIs using the OpenAI format
-    digest: 0411df3dc42868be8af3ad3e00cb252790e6bd7ad15f5b77f1ca5214573a8531
-    name: litellm-helm
-    type: application
-    urls:
-    - https://berriai.github.io/litellm/litellm-helm-0.2.3.tgz
-    version: 0.2.3
-  postgresql:
-  - annotations:
-      category: Database
-      images: |
-        - name: os-shell
-          image: docker.io/bitnami/os-shell:12-debian-12-r16
-        - name: postgres-exporter
-          image: docker.io/bitnami/postgres-exporter:0.15.0-debian-12-r14
-        - name: postgresql
-          image: docker.io/bitnami/postgresql:16.2.0-debian-12-r6
-      licenses: Apache-2.0
-    apiVersion: v2
-    appVersion: 16.2.0
-    created: "2024-08-19T23:58:25.335716+08:00"
-    dependencies:
-    - name: common
-      repository: oci://registry-1.docker.io/bitnamicharts
-      tags:
-      - bitnami-common
-      version: 2.x.x
-    description: PostgreSQL (Postgres) is an open source object-relational database
-      known for reliability and data integrity. ACID-compliant, it supports foreign
-      keys, joins, views, triggers and stored procedures.
-    digest: 3c8125526b06833df32e2f626db34aeaedb29d38f03d15349db6604027d4a167
-    home: https://bitnami.com
-    icon: https://bitnami.com/assets/stacks/postgresql/img/postgresql-stack-220x234.png
-    keywords:
-    - postgresql
-    - postgres
-    - database
-    - sql
-    - replication
-    - cluster
-    maintainers:
-    - name: VMware, Inc.
-      url: https://github.com/bitnami/charts
-    name: postgresql
-    sources:
-    - https://github.com/bitnami/charts/tree/main/bitnami/postgresql
-    urls:
-    - https://berriai.github.io/litellm/charts/postgresql-14.3.1.tgz
-    version: 14.3.1
-  redis:
-  - annotations:
-      category: Database
-      images: |
-        - name: kubectl
-          image: docker.io/bitnami/kubectl:1.29.2-debian-12-r3
-        - name: os-shell
-          image: docker.io/bitnami/os-shell:12-debian-12-r16
-        - name: redis
-          image: docker.io/bitnami/redis:7.2.4-debian-12-r9
-        - name: redis-exporter
-          image: docker.io/bitnami/redis-exporter:1.58.0-debian-12-r4
-        - name: redis-sentinel
-          image: docker.io/bitnami/redis-sentinel:7.2.4-debian-12-r7
-      licenses: Apache-2.0
-    apiVersion: v2
-    appVersion: 7.2.4
-    created: "2024-08-19T23:58:25.339392+08:00"
-    dependencies:
-    - name: common
-      repository: oci://registry-1.docker.io/bitnamicharts
-      tags:
-      - bitnami-common
-      version: 2.x.x
-    description: Redis(R) is an open source, advanced key-value store. It is often
-      referred to as a data structure server since keys can contain strings, hashes,
-      lists, sets and sorted sets.
-    digest: b2fa1835f673a18002ca864c54fadac3c33789b26f6c5e58e2851b0b14a8f984
-    home: https://bitnami.com
-    icon: https://bitnami.com/assets/stacks/redis/img/redis-stack-220x234.png
-    keywords:
-    - redis
-    - keyvalue
-    - database
-    maintainers:
-    - name: VMware, Inc.
-      url: https://github.com/bitnami/charts
-    name: redis
-    sources:
-    - https://github.com/bitnami/charts/tree/main/bitnami/redis
-    urls:
-    - https://berriai.github.io/litellm/charts/redis-18.19.1.tgz
-    version: 18.19.1
-generated: "2024-08-19T23:58:25.322532+08:00"
--- a/litellm-js/proxy/.npmrc
+++ b/litellm-js/proxy/.npmrc
@ -1,5 +0,0 @@
-# Supply-chain hardening
-# Packages needing lifecycle scripts: npm rebuild <pkg>
-ignore-scripts=true
-# Protects local npm install only — npm ci (used in CI) ignores this
-min-release-age=3
--- a/litellm-js/proxy/README.md
+++ b/litellm-js/proxy/README.md
@ -1,8 +0,0 @@
-```
-npm install
-npm run dev
-```
-
-```
-npm run deploy
-```
--- a/litellm-js/proxy/package-lock.json
+++ b/litellm-js/proxy/package-lock.json
--- a/litellm-js/proxy/package.json
+++ b/litellm-js/proxy/package.json
@ -1,14 +0,0 @@
-{
-  "scripts": {
-    "dev": "wrangler dev src/index.ts",
-    "deploy": "wrangler deploy --minify src/index.ts"
-  },
-  "dependencies": {
-    "hono": "4.12.16",
-    "openai": "4.29.2"
-  },
-  "devDependencies": {
-    "@cloudflare/workers-types": "4.20260501.1",
-    "wrangler": "4.87.0"
-  }
-}
--- a/litellm-js/proxy/src/index.ts
+++ b/litellm-js/proxy/src/index.ts
@ -1,59 +0,0 @@
-import { Hono } from 'hono'
-import { Context } from 'hono';
-import { bearerAuth } from 'hono/bearer-auth'
-import OpenAI from "openai";
-
-const openai = new OpenAI({
-  apiKey: "sk-1234",
-  baseURL: "https://openai-endpoint.ishaanjaffer0324.workers.dev"
-});
-
-async function call_proxy() {
-  const completion = await openai.chat.completions.create({
-    messages: [{ role: "system", content: "You are a helpful assistant." }],
-    model: "gpt-3.5-turbo",
-  });
-
-  return completion
-}
-
-const app = new Hono()
-
-// Middleware for API Key Authentication
-const apiKeyAuth = async (c: Context, next: Function) => {
-  const apiKey = c.req.header('Authorization');
-  if (!apiKey || apiKey !== 'Bearer sk-1234') {
-    return c.text('Unauthorized', 401);
-  }
-  await next();
-};
-
-
-app.use('/*', apiKeyAuth)
-
-
-app.get('/', (c) => {
-  return c.text('Hello Hono!')
-})
-
-
-
-
-// Handler for chat completions
-const chatCompletionHandler = async (c: Context) => {
-  // Assuming your logic for handling chat completion goes here
-  // For demonstration, just returning a simple JSON response
-  const response = await call_proxy()
-  return c.json(response);
-};
-
-// Register the above handler for different POST routes with the apiKeyAuth middleware
-app.post('/v1/chat/completions', chatCompletionHandler);
-app.post('/chat/completions', chatCompletionHandler);
-
-// Example showing how you might handle dynamic segments within the URL
-// Here, using ':model*' to capture the rest of the path as a parameter 'model'
-app.post('/openai/deployments/:model*/chat/completions', chatCompletionHandler);
-
-
-export default app
--- a/litellm-js/proxy/tsconfig.json
+++ b/litellm-js/proxy/tsconfig.json
@ -1,17 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "ESNext",
-    "module": "ESNext",
-    "moduleResolution": "Bundler",
-    "strict": true,
-    "lib": [
-      "ESNext"
-    ],
-    "types": [
-      "@cloudflare/workers-types"
-    ],
-    "jsx": "react-jsx",
-    "jsxImportSource": "hono/jsx",
-    "skipLibCheck": true
-  },
-}
--- a/litellm-js/proxy/wrangler.toml
+++ b/litellm-js/proxy/wrangler.toml
@ -1,18 +0,0 @@
-name = "my-app"
-compatibility_date = "2023-12-01"
-
-# [vars]
-# MY_VAR = "my-variable"
-
-# [[kv_namespaces]]
-# binding = "MY_KV_NAMESPACE"
-# id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
-
-# [[r2_buckets]]
-# binding = "MY_BUCKET"
-# bucket_name = "my-bucket"
-
-# [[d1_databases]]
-# binding = "DB"
-# database_name = "my-database"
-# database_id = ""
--- a/litellm-js/spend-logs/.npmrc
+++ b/litellm-js/spend-logs/.npmrc
@ -1,5 +0,0 @@
-# Supply-chain hardening
-# Packages needing lifecycle scripts: npm rebuild <pkg>
-ignore-scripts=true
-# Protects local npm install only — npm ci (used in CI) ignores this
-min-release-age=3
--- a/litellm-js/spend-logs/Dockerfile
+++ b/litellm-js/spend-logs/Dockerfile
@ -1,26 +0,0 @@
-# Use the specific Node.js v20.11.0 image
-FROM node:20.18.1-alpine3.20
-
-# Set the working directory inside the container
-WORKDIR /app
-
-# Copy package.json and package-lock.json to the working directory
-COPY ./litellm-js/spend-logs/package*.json ./
-
-# Install dependencies
-RUN npm ci
-
-# Install Prisma globally
-RUN npm install -g prisma
-
-# Copy the rest of the application code
-COPY ./litellm-js/spend-logs .
-
-# Generate Prisma client
-RUN npx prisma generate
-
-# Expose the port that the Node.js server will run on
-EXPOSE 3000
-
-# Command to run the Node.js app with npm run dev
-CMD ["npm", "run", "dev"]
--- a/litellm-js/spend-logs/README.md
+++ b/litellm-js/spend-logs/README.md
@ -1,8 +0,0 @@
-```
-npm install
-npm run dev
-```
-
-```
-open http://localhost:3000
-```
--- a/litellm-js/spend-logs/package-lock.json
+++ b/litellm-js/spend-logs/package-lock.json
@ -1,597 +0,0 @@
-{
-  "name": "spend-logs",
-  "lockfileVersion": 3,
-  "requires": true,
-  "packages": {
-    "": {
-      "dependencies": {
-        "@hono/node-server": "1.19.13",
-        "hono": "4.12.16"
-      },
-      "devDependencies": {
-        "@types/node": "20.19.25",
-        "tsx": "4.20.6"
-      }
-    },
-    "node_modules/@esbuild/aix-ppc64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.12.tgz",
-      "integrity": "sha512-Hhmwd6CInZ3dwpuGTF8fJG6yoWmsToE+vYgD4nytZVxcu1ulHpUQRAB1UJ8+N1Am3Mz4+xOByoQoSZf4D+CpkA==",
-      "cpu": [
-        "ppc64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "aix"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/android-arm": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.12.tgz",
-      "integrity": "sha512-VJ+sKvNA/GE7Ccacc9Cha7bpS8nyzVv0jdVgwNDaR4gDMC/2TTRc33Ip8qrNYUcpkOHUT5OZ0bUcNNVZQ9RLlg==",
-      "cpu": [
-        "arm"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "android"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/android-arm64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.12.tgz",
-      "integrity": "sha512-6AAmLG7zwD1Z159jCKPvAxZd4y/VTO0VkprYy+3N2FtJ8+BQWFXU+OxARIwA46c5tdD9SsKGZ/1ocqBS/gAKHg==",
-      "cpu": [
-        "arm64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "android"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/android-x64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.12.tgz",
-      "integrity": "sha512-5jbb+2hhDHx5phYR2By8GTWEzn6I9UqR11Kwf22iKbNpYrsmRB18aX/9ivc5cabcUiAT/wM+YIZ6SG9QO6a8kg==",
-      "cpu": [
-        "x64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "android"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/darwin-arm64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.12.tgz",
-      "integrity": "sha512-N3zl+lxHCifgIlcMUP5016ESkeQjLj/959RxxNYIthIg+CQHInujFuXeWbWMgnTo4cp5XVHqFPmpyu9J65C1Yg==",
-      "cpu": [
-        "arm64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/darwin-x64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.12.tgz",
-      "integrity": "sha512-HQ9ka4Kx21qHXwtlTUVbKJOAnmG1ipXhdWTmNXiPzPfWKpXqASVcWdnf2bnL73wgjNrFXAa3yYvBSd9pzfEIpA==",
-      "cpu": [
-        "x64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/freebsd-arm64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.12.tgz",
-      "integrity": "sha512-gA0Bx759+7Jve03K1S0vkOu5Lg/85dou3EseOGUes8flVOGxbhDDh/iZaoek11Y8mtyKPGF3vP8XhnkDEAmzeg==",
-      "cpu": [
-        "arm64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "freebsd"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/freebsd-x64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.12.tgz",
-      "integrity": "sha512-TGbO26Yw2xsHzxtbVFGEXBFH0FRAP7gtcPE7P5yP7wGy7cXK2oO7RyOhL5NLiqTlBh47XhmIUXuGciXEqYFfBQ==",
-      "cpu": [
-        "x64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "freebsd"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-arm": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.12.tgz",
-      "integrity": "sha512-lPDGyC1JPDou8kGcywY0YILzWlhhnRjdof3UlcoqYmS9El818LLfJJc3PXXgZHrHCAKs/Z2SeZtDJr5MrkxtOw==",
-      "cpu": [
-        "arm"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-arm64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.12.tgz",
-      "integrity": "sha512-8bwX7a8FghIgrupcxb4aUmYDLp8pX06rGh5HqDT7bB+8Rdells6mHvrFHHW2JAOPZUbnjUpKTLg6ECyzvas2AQ==",
-      "cpu": [
-        "arm64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-ia32": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.12.tgz",
-      "integrity": "sha512-0y9KrdVnbMM2/vG8KfU0byhUN+EFCny9+8g202gYqSSVMonbsCfLjUO+rCci7pM0WBEtz+oK/PIwHkzxkyharA==",
-      "cpu": [
-        "ia32"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-loong64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.12.tgz",
-      "integrity": "sha512-h///Lr5a9rib/v1GGqXVGzjL4TMvVTv+s1DPoxQdz7l/AYv6LDSxdIwzxkrPW438oUXiDtwM10o9PmwS/6Z0Ng==",
-      "cpu": [
-        "loong64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-mips64el": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.12.tgz",
-      "integrity": "sha512-iyRrM1Pzy9GFMDLsXn1iHUm18nhKnNMWscjmp4+hpafcZjrr2WbT//d20xaGljXDBYHqRcl8HnxbX6uaA/eGVw==",
-      "cpu": [
-        "mips64el"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-ppc64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.12.tgz",
-      "integrity": "sha512-9meM/lRXxMi5PSUqEXRCtVjEZBGwB7P/D4yT8UG/mwIdze2aV4Vo6U5gD3+RsoHXKkHCfSxZKzmDssVlRj1QQA==",
-      "cpu": [
-        "ppc64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-riscv64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.12.tgz",
-      "integrity": "sha512-Zr7KR4hgKUpWAwb1f3o5ygT04MzqVrGEGXGLnj15YQDJErYu/BGg+wmFlIDOdJp0PmB0lLvxFIOXZgFRrdjR0w==",
-      "cpu": [
-        "riscv64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-s390x": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.12.tgz",
-      "integrity": "sha512-MsKncOcgTNvdtiISc/jZs/Zf8d0cl/t3gYWX8J9ubBnVOwlk65UIEEvgBORTiljloIWnBzLs4qhzPkJcitIzIg==",
-      "cpu": [
-        "s390x"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-x64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.12.tgz",
-      "integrity": "sha512-uqZMTLr/zR/ed4jIGnwSLkaHmPjOjJvnm6TVVitAa08SLS9Z0VM8wIRx7gWbJB5/J54YuIMInDquWyYvQLZkgw==",
-      "cpu": [
-        "x64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/netbsd-arm64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.12.tgz",
-      "integrity": "sha512-xXwcTq4GhRM7J9A8Gv5boanHhRa/Q9KLVmcyXHCTaM4wKfIpWkdXiMog/KsnxzJ0A1+nD+zoecuzqPmCRyBGjg==",
-      "cpu": [
-        "arm64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "netbsd"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/netbsd-x64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.12.tgz",
-      "integrity": "sha512-Ld5pTlzPy3YwGec4OuHh1aCVCRvOXdH8DgRjfDy/oumVovmuSzWfnSJg+VtakB9Cm0gxNO9BzWkj6mtO1FMXkQ==",
-      "cpu": [
-        "x64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "netbsd"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/openbsd-arm64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.12.tgz",
-      "integrity": "sha512-fF96T6KsBo/pkQI950FARU9apGNTSlZGsv1jZBAlcLL1MLjLNIWPBkj5NlSz8aAzYKg+eNqknrUJ24QBybeR5A==",
-      "cpu": [
-        "arm64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "openbsd"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/openbsd-x64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.12.tgz",
-      "integrity": "sha512-MZyXUkZHjQxUvzK7rN8DJ3SRmrVrke8ZyRusHlP+kuwqTcfWLyqMOE3sScPPyeIXN/mDJIfGXvcMqCgYKekoQw==",
-      "cpu": [
-        "x64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "openbsd"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/openharmony-arm64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.25.12.tgz",
-      "integrity": "sha512-rm0YWsqUSRrjncSXGA7Zv78Nbnw4XL6/dzr20cyrQf7ZmRcsovpcRBdhD43Nuk3y7XIoW2OxMVvwuRvk9XdASg==",
-      "cpu": [
-        "arm64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "openharmony"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/sunos-x64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.12.tgz",
-      "integrity": "sha512-3wGSCDyuTHQUzt0nV7bocDy72r2lI33QL3gkDNGkod22EsYl04sMf0qLb8luNKTOmgF/eDEDP5BFNwoBKH441w==",
-      "cpu": [
-        "x64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "sunos"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/win32-arm64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.12.tgz",
-      "integrity": "sha512-rMmLrur64A7+DKlnSuwqUdRKyd3UE7oPJZmnljqEptesKM8wx9J8gx5u0+9Pq0fQQW8vqeKebwNXdfOyP+8Bsg==",
-      "cpu": [
-        "arm64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "win32"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/win32-ia32": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.12.tgz",
-      "integrity": "sha512-HkqnmmBoCbCwxUKKNPBixiWDGCpQGVsrQfJoVGYLPT41XWF8lHuE5N6WhVia2n4o5QK5M4tYr21827fNhi4byQ==",
-      "cpu": [
-        "ia32"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "win32"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/win32-x64": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.12.tgz",
-      "integrity": "sha512-alJC0uCZpTFrSL0CCDjcgleBXPnCrEAhTBILpeAp7M/OFgoqtAetfBzX0xM00MUsVVPpVjlPuMbREqnZCXaTnA==",
-      "cpu": [
-        "x64"
-      ],
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "win32"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@hono/node-server": {
-      "version": "1.19.13",
-      "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.13.tgz",
-      "integrity": "sha512-TsQLe4i2gvoTtrHje625ngThGBySOgSK3Xo2XRYOdqGN1teR8+I7vchQC46uLJi8OF62YTYA3AhSpumtkhsaKQ==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=18.14.1"
-      },
-      "peerDependencies": {
-        "hono": "^4"
-      }
-    },
-    "node_modules/@types/node": {
-      "version": "20.19.25",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
-      "integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "undici-types": "~6.21.0"
-      }
-    },
-    "node_modules/esbuild": {
-      "version": "0.25.12",
-      "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.12.tgz",
-      "integrity": "sha512-bbPBYYrtZbkt6Os6FiTLCTFxvq4tt3JKall1vRwshA3fdVztsLAatFaZobhkBC8/BrPetoa0oksYoKXoG4ryJg==",
-      "dev": true,
-      "hasInstallScript": true,
-      "license": "MIT",
-      "bin": {
-        "esbuild": "bin/esbuild"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "optionalDependencies": {
-        "@esbuild/aix-ppc64": "0.25.12",
-        "@esbuild/android-arm": "0.25.12",
-        "@esbuild/android-arm64": "0.25.12",
-        "@esbuild/android-x64": "0.25.12",
-        "@esbuild/darwin-arm64": "0.25.12",
-        "@esbuild/darwin-x64": "0.25.12",
-        "@esbuild/freebsd-arm64": "0.25.12",
-        "@esbuild/freebsd-x64": "0.25.12",
-        "@esbuild/linux-arm": "0.25.12",
-        "@esbuild/linux-arm64": "0.25.12",
-        "@esbuild/linux-ia32": "0.25.12",
-        "@esbuild/linux-loong64": "0.25.12",
-        "@esbuild/linux-mips64el": "0.25.12",
-        "@esbuild/linux-ppc64": "0.25.12",
-        "@esbuild/linux-riscv64": "0.25.12",
-        "@esbuild/linux-s390x": "0.25.12",
-        "@esbuild/linux-x64": "0.25.12",
-        "@esbuild/netbsd-arm64": "0.25.12",
-        "@esbuild/netbsd-x64": "0.25.12",
-        "@esbuild/openbsd-arm64": "0.25.12",
-        "@esbuild/openbsd-x64": "0.25.12",
-        "@esbuild/openharmony-arm64": "0.25.12",
-        "@esbuild/sunos-x64": "0.25.12",
-        "@esbuild/win32-arm64": "0.25.12",
-        "@esbuild/win32-ia32": "0.25.12",
-        "@esbuild/win32-x64": "0.25.12"
-      }
-    },
-    "node_modules/fsevents": {
-      "version": "2.3.3",
-      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
-      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
-      "dev": true,
-      "hasInstallScript": true,
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
-      }
-    },
-    "node_modules/get-tsconfig": {
-      "version": "4.14.0",
-      "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.14.0.tgz",
-      "integrity": "sha512-yTb+8DXzDREzgvYmh6s9vHsSVCHeC0G3PI5bEXNBHtmshPnO+S5O7qgLEOn0I5QvMy6kpZN8K1NKGyilLb93wA==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "resolve-pkg-maps": "^1.0.0"
-      },
-      "funding": {
-        "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
-      }
-    },
-    "node_modules/hono": {
-      "version": "4.12.16",
-      "resolved": "https://registry.npmjs.org/hono/-/hono-4.12.16.tgz",
-      "integrity": "sha512-jN0ZewiNAWSe5khM3EyCmBb250+b40wWbwNILNfEvq84VREWwOIkuUsFONk/3i3nqkz7Oe1PcpM2mwQEK2L9Kg==",
-      "license": "MIT",
-      "engines": {
-        "node": ">=16.9.0"
-      }
-    },
-    "node_modules/resolve-pkg-maps": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
-      "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
-      "dev": true,
-      "license": "MIT",
-      "funding": {
-        "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
-      }
-    },
-    "node_modules/tsx": {
-      "version": "4.20.6",
-      "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.20.6.tgz",
-      "integrity": "sha512-ytQKuwgmrrkDTFP4LjR0ToE2nqgy886GpvRSpU0JAnrdBYppuY5rLkRUYPU1yCryb24SsKBTL/hlDQAEFVwtZg==",
-      "dev": true,
-      "license": "MIT",
-      "dependencies": {
-        "esbuild": "~0.25.0",
-        "get-tsconfig": "^4.7.5"
-      },
-      "bin": {
-        "tsx": "dist/cli.mjs"
-      },
-      "engines": {
-        "node": ">=18.0.0"
-      },
-      "optionalDependencies": {
-        "fsevents": "~2.3.3"
-      }
-    },
-    "node_modules/undici-types": {
-      "version": "6.21.0",
-      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
-      "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
-      "dev": true,
-      "license": "MIT"
-    }
-  }
-}
--- a/litellm-js/spend-logs/package.json
+++ b/litellm-js/spend-logs/package.json
@ -1,13 +0,0 @@
-{
-  "scripts": {
-    "dev": "tsx watch src/index.ts"
-  },
-  "dependencies": {
-    "@hono/node-server": "1.19.13",
-    "hono": "4.12.16"
-  },
-  "devDependencies": {
-    "@types/node": "20.19.25",
-    "tsx": "4.20.6"
-  }
-}
--- a/litellm-js/spend-logs/schema.prisma
+++ b/litellm-js/spend-logs/schema.prisma
@ -1,29 +0,0 @@
-generator client {
-  provider        = "prisma-client-js"
-}
-
-datasource client {
-  provider = "postgresql"
-  url      = env("DATABASE_URL")
-}
-
-model LiteLLM_SpendLogs {
-  request_id        String   @id
-  call_type         String
-  api_key           String   @default("")
-  spend             Float    @default(0.0)
-  total_tokens      Int      @default(0)
-  prompt_tokens     Int      @default(0)
-  completion_tokens Int      @default(0)
-  startTime         DateTime
-  endTime           DateTime
-  model             String   @default("")
-  api_base          String   @default("")
-  user              String   @default("")
-  metadata          Json     @default("{}")
-  cache_hit         String   @default("")
-  cache_key         String   @default("")
-  request_tags      Json     @default("[]")
-  team_id           String?
-  end_user          String?
-}
--- a/litellm-js/spend-logs/src/_types.ts
+++ b/litellm-js/spend-logs/src/_types.ts
@ -1,32 +0,0 @@
-export type LiteLLM_IncrementSpend = {
-    key_transactions: Array<LiteLLM_IncrementObject>, // [{"key": spend},..]
-    user_transactions: Array<LiteLLM_IncrementObject>, 
-    team_transactions: Array<LiteLLM_IncrementObject>,
-    spend_logs_transactions: Array<LiteLLM_SpendLogs>
-}
-
-export type LiteLLM_IncrementObject = {
-    key: string,
-    spend: number
-}
-
-export type LiteLLM_SpendLogs = {
-    request_id: string; // @id means it's a unique identifier
-    call_type: string;
-    api_key: string; // @default("") means it defaults to an empty string if not provided
-    spend: number; // Float in Prisma corresponds to number in TypeScript
-    total_tokens: number; // Int in Prisma corresponds to number in TypeScript
-    prompt_tokens: number;
-    completion_tokens: number;
-    startTime: Date; // DateTime in Prisma corresponds to Date in TypeScript
-    endTime: Date;
-    model: string; // @default("") means it defaults to an empty string if not provided
-    api_base: string;
-    user: string;
-    metadata: any; // Json type in Prisma is represented by any in TypeScript; could also use a more specific type if the structure of JSON is known
-    cache_hit: string;
-    cache_key: string;
-    request_tags: any; // Similarly, this could be an array or a more specific type depending on the expected structure
-    team_id?: string | null; // ? indicates it's optional and can be undefined, but could also be null if not provided
-    end_user?: string | null;
-};
--- a/litellm-js/spend-logs/src/index.ts
+++ b/litellm-js/spend-logs/src/index.ts
@ -1,84 +0,0 @@
-import { serve } from '@hono/node-server'
-import { Hono } from 'hono'
-import { PrismaClient } from '@prisma/client'
-import {LiteLLM_SpendLogs, LiteLLM_IncrementSpend, LiteLLM_IncrementObject} from './_types'
-
-const app = new Hono()
-const prisma = new PrismaClient()
-// In-memory storage for logs
-let spend_logs: LiteLLM_SpendLogs[] = [];
-const key_logs: LiteLLM_IncrementObject[] = [];
-const user_logs: LiteLLM_IncrementObject[] = [];
-const transaction_logs: LiteLLM_IncrementObject[] = [];
-
-
-app.get('/', (c) => {
-  return c.text('Hello Hono!')
-})
-
-const MIN_LOGS = 1; // Minimum number of logs needed to initiate a flush
-const FLUSH_INTERVAL = 5000; // Time in ms to wait before trying to flush again
-const BATCH_SIZE = 100; // Preferred size of each batch to write to the database
-const MAX_LOGS_PER_INTERVAL = 1000; // Maximum number of logs to flush in a single interval
-
-const flushLogsToDb = async () => {
-  if (spend_logs.length >= MIN_LOGS) {
-    // Limit the logs to process in this interval to MAX_LOGS_PER_INTERVAL or less
-    const logsToProcess = spend_logs.slice(0, MAX_LOGS_PER_INTERVAL);
-  
-    for (let i = 0; i < logsToProcess.length; i += BATCH_SIZE) {
-      // Create subarray for current batch, ensuring it doesn't exceed the BATCH_SIZE
-      const batch = logsToProcess.slice(i, i + BATCH_SIZE);
-
-      // Convert datetime strings to Date objects
-      const batchWithDates = batch.map(entry => ({
-        ...entry,
-        startTime: new Date(entry.startTime),
-        endTime: new Date(entry.endTime),
-        // Repeat for any other DateTime fields you may have
-      }));
-
-      await prisma.liteLLM_SpendLogs.createMany({
-        data: batchWithDates,
-      });
-
-      console.log(`Flushed ${batch.length} logs to the DB.`);
-    }
-
-    // Remove the processed logs from spend_logs
-    spend_logs = spend_logs.slice(logsToProcess.length);
-    
-    console.log(`${logsToProcess.length} logs processed. Remaining in queue: ${spend_logs.length}`);
-  } else {
-    // This will ensure it doesn't falsely claim "No logs to flush." when it's merely below the MIN_LOGS threshold.
-    if(spend_logs.length > 0) {
-      console.log(`Accumulating logs. Currently at ${spend_logs.length}, waiting for at least ${MIN_LOGS}.`);
-    } else {
-      console.log("No logs to flush.");
-    }
-  }
-};
-
-// Setup interval for attempting to flush the logs
-setInterval(flushLogsToDb, FLUSH_INTERVAL);
-
-// Route to receive log messages
-app.post('/spend/update', async (c) => {
-  const incomingLogs = await c.req.json<LiteLLM_SpendLogs[]>();
-  
-  spend_logs.push(...incomingLogs);
-
-  console.log(`Received and stored ${incomingLogs.length} logs. Total logs in memory: ${spend_logs.length}`);
-  
-  return c.json({ message: `Successfully stored ${incomingLogs.length} logs` });
-});
-
-
-
-const port = 3000
-console.log(`Server is running on port ${port}`)
-
-serve({
-  fetch: app.fetch,
-  port
-})
--- a/litellm-js/spend-logs/tsconfig.json
+++ b/litellm-js/spend-logs/tsconfig.json
@ -1,13 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "ESNext",
-    "module": "ESNext",
-    "moduleResolution": "Bundler",
-    "strict": true,
-    "types": [
-      "node"
-    ],
-    "jsx": "react-jsx",
-    "jsxImportSource": "hono/jsx",
-  }
-}
--- a/litellm/proxy/auth/auth_checks.py
+++ b/litellm/proxy/auth/auth_checks.py
@ -2849,7 +2849,7 @@ def _can_object_call_model(
            object_type=object_type
        ),
        param="model",
-        code=status.HTTP_401_UNAUTHORIZED,
+        code=status.HTTP_403_FORBIDDEN,
    )


@ -3082,7 +3082,7 @@ async def can_user_call_model(
            message=f"User not allowed to access model. No default model access, only team models allowed. Tried to access {model}",
            type=ProxyErrorTypes.key_model_access_denied,
            param="model",
-            code=status.HTTP_401_UNAUTHORIZED,
+            code=status.HTTP_403_FORBIDDEN,
        )

    return _can_object_call_model(
@ -3625,7 +3625,7 @@ async def _check_team_member_model_access(
            message=f"Team member not allowed to access model. User={valid_token.user_id}, Team={team_object.team_id}, Model={model}. Allowed member models = {member_allowed_models}",
            type=ProxyErrorTypes.team_model_access_denied,
            param="model",
-            code=status.HTTP_401_UNAUTHORIZED,
+            code=status.HTTP_403_FORBIDDEN,
        )


--- a/litellm/proxy/auth/auth_exception_handler.py
+++ b/litellm/proxy/auth/auth_exception_handler.py
@ -123,7 +123,7 @@ class UserAPIKeyAuthExceptionHandler:
                    message=e.message,
                    type=ProxyErrorTypes.budget_exceeded,
                    param=None,
-                    code=400,
+                    code=getattr(e, "status_code", status.HTTP_429_TOO_MANY_REQUESTS),
                )
            if isinstance(e, HTTPException):
                raise ProxyException(
--- a/litellm/proxy/auth/user_api_key_auth.py
+++ b/litellm/proxy/auth/user_api_key_auth.py
@ -1107,7 +1107,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
                    raise ProxyException(
                        message=f"Authentication Error - Expired Key. Key Expiry time {expiry_time} and current time {current_time}",
                        type=ProxyErrorTypes.expired_key,
-                        code=400,
+                        code=status.HTTP_401_UNAUTHORIZED,
                        param=abbreviate_api_key(api_key=api_key),
                    )
            valid_token = update_valid_token_with_end_user_params(
@ -1432,7 +1432,7 @@ async def _user_api_key_auth_builder(  # noqa: PLR0915
                    raise ProxyException(
                        message=f"Authentication Error - Expired Key. Key Expiry time {expiry_time} and current time {current_time}",
                        type=ProxyErrorTypes.expired_key,
-                        code=400,
+                        code=status.HTTP_401_UNAUTHORIZED,
                        param=abbreviate_api_key(api_key=api_key),
                    )

@ -2417,7 +2417,7 @@ async def _run_post_custom_auth_checks(
            raise ProxyException(
                message=f"Authentication Error - Expired Key. Key Expiry time {expiry_time} and current time {current_time}",
                type=ProxyErrorTypes.expired_key,
-                code=400,
+                code=status.HTTP_401_UNAUTHORIZED,
                param=(
                    abbreviate_api_key(api_key=valid_token.token)
                    if valid_token.token
--- a/litellm/proxy/common_utils/reset_budget_job.py
+++ b/litellm/proxy/common_utils/reset_budget_job.py
@ -2,7 +2,7 @@ import asyncio
 import json
 import time
 from datetime import datetime, timezone
-from typing import Any, List, Literal, Optional, Union
+from typing import Any, Callable, List, Literal, Optional, Union

 import litellm
 from litellm._logging import verbose_proxy_logger
@ -83,93 +83,139 @@ class ResetBudgetJob:
                "Failed to reset spend counter %s: %s", counter_key, e
            )

+    @staticmethod
+    async def _invalidate_user_api_key_cache_entry(cache_key: str) -> None:
+        """Drop a stale management-cache entry so the next read fetches from DB.
+
+        Some entity types (notably tags and end-users) are not handled by
+        SpendCounterReseed.from_db, so when a spend counter expires the
+        budget check falls back to ``cached_obj.spend``. If that cached
+        object lingers in ``user_api_key_cache`` past a budget reset, the
+        stale ``.spend`` keeps the entity blocked indefinitely. Deleting
+        the cache entry forces the next auth-time fetch to reload the
+        zeroed row from Postgres.
+        """
+        try:
+            from litellm.proxy.proxy_server import user_api_key_cache
+
+            await user_api_key_cache.async_delete_cache(key=cache_key)
+        except Exception as e:
+            verbose_proxy_logger.warning(
+                "Failed to invalidate user_api_key_cache entry %s: %s",
+                cache_key,
+                e,
+            )
+
+    async def _cascade_reset_spend_for_budget_link(
+        self,
+        budgets_to_reset: List[LiteLLM_BudgetTableFull],
+        table: Any,
+        counter_key_fn: Callable[[Any], str],
+        log_subject: str,
+        extra_where: Optional[dict] = None,
+        cache_key_fn: Optional[Callable[[Any], str]] = None,
+    ):
+        """
+        Generic cascade: zero spend on rows whose budget_id is in the reset set.
+
+        ``cache_key_fn`` is optional: when provided, after the DB update each
+        matching row's entry in ``user_api_key_cache`` is also dropped. This
+        is required for entities whose spend counter is read with the cached
+        object's ``.spend`` as fallback (tags, end-users) — otherwise the
+        stale cached object pins enforcement to the pre-reset spend until
+        its TTL expires.
+        """
+        budget_ids = [b.budget_id for b in budgets_to_reset if b.budget_id is not None]
+        if not budget_ids:
+            return
+
+        where: dict = {"budget_id": {"in": budget_ids}}
+        if extra_where:
+            where.update(extra_where)
+
+        try:
+            rows = await table.find_many(where=where)
+        except Exception as e:
+            rows = []
+            verbose_proxy_logger.warning(
+                "Failed to fetch %s for counter invalidation: %s", log_subject, e
+            )
+
+        update_result = await table.update_many(where=where, data={"spend": 0})
+
+        for row in rows:
+            await self._invalidate_spend_counter(counter_key_fn(row))
+            if cache_key_fn is not None:
+                await self._invalidate_user_api_key_cache_entry(cache_key_fn(row))
+
+        return update_result
+
    async def reset_budget_for_litellm_team_members(
        self, budgets_to_reset: List[LiteLLM_BudgetTableFull]
    ):
        """
        Resets the budget for all LiteLLM Team Members if their budget has expired
        """
-        budget_ids = [
-            budget.budget_id
-            for budget in budgets_to_reset
-            if budget.budget_id is not None
-        ]
-
-        try:
-            memberships = await self.prisma_client.db.litellm_teammembership.find_many(
-                where={"budget_id": {"in": budget_ids}}
-            )
-        except Exception as e:
-            memberships = []
-            verbose_proxy_logger.warning(
-                "Failed to fetch team memberships for counter invalidation: %s", e
-            )
-
-        update_result = await self.prisma_client.db.litellm_teammembership.update_many(
-            where={"budget_id": {"in": budget_ids}},
-            data={
-                "spend": 0,
-            },
+        return await self._cascade_reset_spend_for_budget_link(
+            budgets_to_reset=budgets_to_reset,
+            table=self.prisma_client.db.litellm_teammembership,
+            counter_key_fn=lambda m: f"spend:team_member:{m.user_id}:{m.team_id}",
+            log_subject="team memberships",
        )

-        for m in memberships:
-            await self._invalidate_spend_counter(
-                f"spend:team_member:{m.user_id}:{m.team_id}"
-            )
-
-        return update_result
-
    async def reset_budget_for_keys_linked_to_budgets(
        self, budgets_to_reset: List[LiteLLM_BudgetTableFull]
    ):
        """
        Resets the spend for keys linked to budget tiers that are being reset.

-        This handles keys that have budget_id but no budget_duration set on the key
-        itself. Keys with budget_id rely on their linked budget tier's reset schedule
-        rather than having their own budget_duration.
-
-        Keys that have their own budget_duration are already handled by
-        reset_budget_for_litellm_keys() and are excluded here to avoid
-        double-resetting.
+        Excludes keys with their own budget_duration; those are reset by
+        reset_budget_for_litellm_keys() to avoid double-resetting.
        """
-        budget_ids = [
-            budget.budget_id
-            for budget in budgets_to_reset
-            if budget.budget_id is not None
-        ]
-        if not budget_ids:
-            return
-
-        where_clause: dict = {
-            "budget_id": {"in": budget_ids},
-            "budget_duration": None,  # only keys without their own reset schedule
-            "spend": {"gt": 0},  # only reset keys that have accumulated spend
-        }
-
-        try:
-            keys = await self.prisma_client.db.litellm_verificationtoken.find_many(
-                where=where_clause
-            )
-        except Exception as e:
-            keys = []
-            verbose_proxy_logger.warning(
-                "Failed to fetch keys for counter invalidation: %s", e
-            )
-
-        update_result = (
-            await self.prisma_client.db.litellm_verificationtoken.update_many(
-                where=where_clause,
-                data={
-                    "spend": 0,
-                },
-            )
+        return await self._cascade_reset_spend_for_budget_link(
+            budgets_to_reset=budgets_to_reset,
+            table=self.prisma_client.db.litellm_verificationtoken,
+            counter_key_fn=lambda k: f"spend:key:{k.token}",
+            log_subject="keys",
+            extra_where={"budget_duration": None, "spend": {"gt": 0}},
        )

-        for k in keys:
-            await self._invalidate_spend_counter(f"spend:key:{k.token}")
+    async def reset_budget_for_orgs_linked_to_budgets(
+        self, budgets_to_reset: List[LiteLLM_BudgetTableFull]
+    ):
+        """
+        Resets the spend for orgs linked to budget tiers that are being reset.
+        """
+        return await self._cascade_reset_spend_for_budget_link(
+            budgets_to_reset=budgets_to_reset,
+            table=self.prisma_client.db.litellm_organizationtable,
+            counter_key_fn=lambda o: f"spend:org:{o.organization_id}",
+            log_subject="orgs",
+            extra_where={"spend": {"gt": 0}},
+        )

-        return update_result
+    async def reset_budget_for_tags_linked_to_budgets(
+        self, budgets_to_reset: List[LiteLLM_BudgetTableFull]
+    ):
+        """
+        Resets the spend for tags linked to budget tiers that are being reset.
+
+        Also drops each tag's ``user_api_key_cache`` entry so the next
+        ``_tag_max_budget_check`` reloads the zeroed row from the DB.
+        ``SpendCounterReseed.from_db`` intentionally returns ``None`` for
+        tags, so the budget check falls back to the cached
+        ``LiteLLM_TagTable.spend`` once the spend counter expires; without
+        this invalidation, that stale ``.spend`` keeps the tag over-budget
+        indefinitely.
+        """
+        return await self._cascade_reset_spend_for_budget_link(
+            budgets_to_reset=budgets_to_reset,
+            table=self.prisma_client.db.litellm_tagtable,
+            counter_key_fn=lambda t: f"spend:tag:{t.tag_name}",
+            log_subject="tags",
+            extra_where={"spend": {"gt": 0}},
+            cache_key_fn=lambda t: f"tag:{t.tag_name}",
+        )

    async def reset_budget_for_litellm_budget_table(self):
        """
@ -237,6 +283,14 @@ class ResetBudgetJob:
                    budgets_to_reset=budgets_to_reset
                )

+                await self.reset_budget_for_orgs_linked_to_budgets(
+                    budgets_to_reset=budgets_to_reset
+                )
+
+                await self.reset_budget_for_tags_linked_to_budgets(
+                    budgets_to_reset=budgets_to_reset
+                )
+
            if endusers_to_reset is not None and len(endusers_to_reset) > 0:
                for enduser in endusers_to_reset:
                    try:
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -211,6 +211,7 @@ from litellm import Router
 from litellm._logging import verbose_proxy_logger, verbose_router_logger
 from litellm.caching.caching import DualCache, RedisCache
 from litellm.caching.redis_cluster_cache import RedisClusterCache
+from litellm.proxy.common_utils.timezone_utils import get_budget_reset_time
 from litellm.proxy.common_utils.user_api_key_cache import UserApiKeyCache
 from litellm.constants import (
    _REALTIME_BODY_CACHE_SIZE,
@ -6750,27 +6751,64 @@ class ProxyStartupEvent:
                "budget_duration not set on Proxy. budget_duration is required to use max_budget."
            )

-        # add proxy budget to db in the user table
        asyncio.create_task(
-            generate_key_helper_fn(  # type: ignore
-                request_type="user",
-                table_name="user",
-                user_id=litellm_proxy_budget_name,
-                duration=None,
-                models=[],
-                aliases={},
-                config={},
-                spend=0,
-                max_budget=litellm.max_budget,
-                budget_duration=litellm.budget_duration,
-                query_type="update_data",
-                update_key_values={
-                    "max_budget": litellm.max_budget,
-                    "budget_duration": litellm.budget_duration,
-                },
-            )
+            cls._upsert_proxy_budget_with_reset_at_backfill(litellm_proxy_budget_name)
        )

+    @classmethod
+    async def _upsert_proxy_budget_with_reset_at_backfill(
+        cls, litellm_proxy_budget_name: str
+    ) -> None:
+        """
+        Upsert the proxy admin user row with the configured max_budget /
+        budget_duration, then backfill budget_reset_at if currently NULL.
+
+        The backfill uses `WHERE budget_reset_at IS NULL` so it only fires
+        when the row pre-existed without a reset schedule (e.g. row created
+        via a different path before the proxy budget was configured). On
+        subsequent restarts it no-ops, so an active reset window is never
+        slid forward.
+        """
+        await generate_key_helper_fn(  # type: ignore
+            request_type="user",
+            table_name="user",
+            user_id=litellm_proxy_budget_name,
+            duration=None,
+            models=[],
+            aliases={},
+            config={},
+            spend=0,
+            max_budget=litellm.max_budget,
+            budget_duration=litellm.budget_duration,
+            query_type="update_data",
+            update_key_values={
+                "max_budget": litellm.max_budget,
+                "budget_duration": litellm.budget_duration,
+            },
+        )
+
+        # Without this, the upsert leaves budget_reset_at=NULL on rows that
+        # took the UPDATE path, and reset_budget_for_litellm_users never
+        # matches them (NULL < now() is unknown in SQL) — so the proxy-wide
+        # spend cap blocks forever once it's hit.
+        if prisma_client is not None and litellm.budget_duration is not None:
+            try:
+                await prisma_client.db.litellm_usertable.update_many(
+                    where={
+                        "user_id": litellm_proxy_budget_name,
+                        "budget_reset_at": None,
+                    },
+                    data={
+                        "budget_reset_at": get_budget_reset_time(
+                            budget_duration=litellm.budget_duration
+                        )
+                    },
+                )
+            except Exception as e:
+                verbose_proxy_logger.warning(
+                    "Failed to backfill budget_reset_at on proxy admin row: %s", e
+                )
+
    @classmethod
    async def _warm_global_spend_cache(
        cls,
--- a/pyproject.toml
+++ b/pyproject.toml
@ -22,7 +22,7 @@ dependencies = [
    "importlib-metadata>=8.0.0,<9.0",
    "tokenizers>=0.21.0,<1.0",
    "click>=8.0.0,<9.0",
-    "jinja2>=3.1.0,<4.0",
+    "jinja2>=3.1.6,<4.0",
    "aiohttp>=3.10,<4.0",
    "pydantic>=2.10.0,<3.0.0",
    "jsonschema>=4.0.0,<5.0",
--- a/tests/litellm_utils_tests/test_proxy_budget_reset.py
+++ b/tests/litellm_utils_tests/test_proxy_budget_reset.py
@ -233,6 +233,12 @@ async def test_reset_budget_endusers_partial_failure():
    prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
        return_value={"count": 0}
    )
+    # Mock db.litellm_organizationtable.update_many (used by reset_budget_for_orgs_linked_to_budgets)
+    prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
+        return_value={"count": 0}
+    )
+    # Mock db.litellm_tagtable.update_many (used by reset_budget_for_tags_linked_to_budgets)
+    prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})

    proxy_logging_obj = MagicMock()
    proxy_logging_obj.service_logging_obj = MagicMock()
@ -400,6 +406,12 @@ async def test_reset_budget_continues_other_categories_on_failure():
    prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
        return_value={"count": 0}
    )
+    # Mock db.litellm_organizationtable.update_many (used by reset_budget_for_orgs_linked_to_budgets)
+    prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
+        return_value={"count": 0}
+    )
+    # Mock db.litellm_tagtable.update_many (used by reset_budget_for_tags_linked_to_budgets)
+    prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})

    proxy_logging_obj = MagicMock()
    proxy_logging_obj.service_logging_obj = MagicMock()
@ -884,6 +896,12 @@ async def test_service_logger_endusers_success():
    prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
        return_value={"count": 0}
    )
+    # Mock db.litellm_organizationtable.update_many (used by reset_budget_for_orgs_linked_to_budgets)
+    prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
+        return_value={"count": 0}
+    )
+    # Mock db.litellm_tagtable.update_many (used by reset_budget_for_tags_linked_to_budgets)
+    prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})

    proxy_logging_obj = MagicMock()
    proxy_logging_obj.service_logging_obj = MagicMock()
@ -966,6 +984,12 @@ async def test_service_logger_endusers_failure():
    prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
        return_value={"count": 0}
    )
+    # Mock db.litellm_organizationtable.update_many (used by reset_budget_for_orgs_linked_to_budgets)
+    prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
+        return_value={"count": 0}
+    )
+    # Mock db.litellm_tagtable.update_many (used by reset_budget_for_tags_linked_to_budgets)
+    prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})

    proxy_logging_obj = MagicMock()
    proxy_logging_obj.service_logging_obj = MagicMock()
@ -1060,6 +1084,10 @@ async def test_reset_budget_for_litellm_team_members_called():
    prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
        return_value={"count": 0}
    )
+    prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
+        return_value={"count": 0}
+    )
+    prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})

    proxy_logging_obj = MagicMock()
    proxy_logging_obj.service_logging_obj = MagicMock()
--- a/tests/otel_tests/test_e2e_budgeting.py
+++ b/tests/otel_tests/test_e2e_budgeting.py
@ -25,8 +25,8 @@ async def make_calls_until_budget_exceeded(session, key: str, call_function, **k

        # Check error structure and values that should be consistent
        assert (
-            error_dict["code"] == "400"
-        ), f"Expected error code 400, got: {error_dict['code']}"
+            error_dict["code"] == "429"
+        ), f"Expected error code 429, got: {error_dict['code']}"
        assert (
            error_dict["type"] == "budget_exceeded"
        ), f"Expected error type budget_exceeded, got: {error_dict['type']}"
--- a/tests/otel_tests/test_e2e_model_access.py
+++ b/tests/otel_tests/test_e2e_model_access.py
@ -99,7 +99,7 @@ async def test_model_access_patterns(key_models, test_model, expect_success):
            # Assert error structure and values
            assert _error_body["type"] == "key_model_access_denied"
            assert _error_body["param"] == "model"
-            assert _error_body["code"] == "401"
+            assert _error_body["code"] == "403"
            assert "key not allowed to access model" in _error_body["message"]


@ -297,7 +297,7 @@ def _validate_model_access_exception(
    # Assert error structure and values
    assert _error_body["type"] == expected_type
    assert _error_body["param"] == "model"
-    assert _error_body["code"] == "401"
+    assert _error_body["code"] == "403"
    if expected_type == "key_model_access_denied":
        assert "key not allowed to access model" in _error_body["message"]
    elif expected_type == "team_model_access_denied":
--- a/tests/test_litellm/proxy/auth/test_auth_checks.py
+++ b/tests/test_litellm/proxy/auth/test_auth_checks.py
@ -12,6 +12,7 @@ from datetime import datetime, timedelta

 import httpx
 import pytest
+from fastapi import status

 import litellm
 from litellm.proxy._types import (
@ -31,6 +32,7 @@ from litellm.proxy._types import (
 )
 from litellm.proxy.auth.auth_checks import (
    ExperimentalUIJWTToken,
+    _can_object_call_model,
    _can_object_call_vector_stores,
    _check_end_user_budget,
    _check_team_member_budget,
@ -206,6 +208,52 @@ def test_get_key_object_from_ui_hash_key_invalid():
    assert key_object is None


+@pytest.mark.parametrize(
+    "object_type,expected_error_type",
+    [
+        ("key", ProxyErrorTypes.key_model_access_denied),
+        ("team", ProxyErrorTypes.team_model_access_denied),
+        ("user", ProxyErrorTypes.user_model_access_denied),
+        ("org", ProxyErrorTypes.org_model_access_denied),
+        ("project", ProxyErrorTypes.project_model_access_denied),
+    ],
+)
+def test_can_object_call_model_denials_return_forbidden(
+    object_type, expected_error_type
+):
+    with pytest.raises(ProxyException) as exc_info:
+        _can_object_call_model(
+            model="restricted-model",
+            llm_router=None,
+            models=["allowed-model"],
+            object_type=object_type,
+        )
+
+    assert exc_info.value.type == expected_error_type
+    assert int(exc_info.value.code) == status.HTTP_403_FORBIDDEN
+
+
+@pytest.mark.asyncio
+async def test_can_user_call_model_no_default_models_returns_forbidden():
+    from litellm.proxy._types import SpecialModelNames
+    from litellm.proxy.auth.auth_checks import can_user_call_model
+
+    user_object = LiteLLM_UserTable(
+        user_id="test-user",
+        models=[SpecialModelNames.no_default_models.value],
+    )
+
+    with pytest.raises(ProxyException) as exc_info:
+        await can_user_call_model(
+            model="restricted-model",
+            llm_router=None,
+            user_object=user_object,
+        )
+
+    assert exc_info.value.type == ProxyErrorTypes.key_model_access_denied
+    assert int(exc_info.value.code) == status.HTTP_403_FORBIDDEN
+
+
@pytest.mark.asyncio
 async def test_get_key_object_should_reconnect_once_on_db_connection_error():
    mock_prisma_client = MagicMock()
@ -1144,6 +1192,7 @@ async def test_check_team_member_model_access_denied_model():
                proxy_logging_obj=MagicMock(),
            )
        assert exc_info.value.type == ProxyErrorTypes.team_model_access_denied
+        assert int(exc_info.value.code) == status.HTTP_403_FORBIDDEN


@pytest.mark.asyncio
--- a/tests/test_litellm/proxy/auth/test_auth_exception_handler.py
+++ b/tests/test_litellm/proxy/auth/test_auth_exception_handler.py
@ -140,6 +140,7 @@ async def test_handle_authentication_error_budget_exceeded():
        )

    assert exc_info.value.type == ProxyErrorTypes.budget_exceeded
+    assert int(exc_info.value.code) == status.HTTP_429_TOO_MANY_REQUESTS


@pytest.mark.asyncio
--- a/tests/test_litellm/proxy/auth/test_user_api_key_auth.py
+++ b/tests/test_litellm/proxy/auth/test_user_api_key_auth.py
@ -1,6 +1,7 @@
 import json
 import os
 import sys
+from datetime import datetime, timedelta
 from types import SimpleNamespace
 from unittest.mock import ANY, AsyncMock, MagicMock, patch

@ -9,6 +10,7 @@ sys.path.insert(
 )  # Adds the parent directory to the system path

 import pytest
+from fastapi import status

 import litellm
 import litellm.proxy.proxy_server
@ -178,6 +180,26 @@ async def test_custom_auth_does_not_enforce_key_model_access_by_default():
        mock_can_key.assert_not_awaited()


+@pytest.mark.asyncio
+async def test_post_custom_auth_expired_key_returns_unauthorized():
+    expired_token = UserAPIKeyAuth(
+        token="test_token",
+        expires=datetime.now() - timedelta(minutes=1),
+    )
+
+    with pytest.raises(ProxyException) as exc_info:
+        await _run_post_custom_auth_checks(
+            valid_token=expired_token,
+            request=MagicMock(),
+            request_data={},
+            route="/v1/chat/completions",
+            parent_otel_span=None,
+        )
+
+    assert exc_info.value.type == ProxyErrorTypes.expired_key
+    assert int(exc_info.value.code) == status.HTTP_401_UNAUTHORIZED
+
+
@pytest.mark.asyncio
 async def test_custom_auth_honors_key_level_model_access_restriction_allowed_with_opt_in():
    valid_token = UserAPIKeyAuth(token="test_token", models=["gpt-4o-mini"])
@ -934,6 +956,7 @@ async def test_proxy_admin_expired_key_from_cache():
            assert (
                exc_info.value.type == ProxyErrorTypes.expired_key
            ), f"Expected expired_key error type, got {exc_info.value.type}"
+            assert int(exc_info.value.code) == status.HTTP_401_UNAUTHORIZED
            assert "Expired Key" in str(
                exc_info.value.message
            ), f"Exception message should mention 'Expired Key', got: {exc_info.value.message}"
--- a/tests/test_litellm/proxy/common_utils/test_reset_budget_job.py
+++ b/tests/test_litellm/proxy/common_utils/test_reset_budget_job.py
@ -39,6 +39,46 @@ class MockLiteLLMVerificationToken:
        return {"count": 1}


+class MockLiteLLMOrganizationTable:
+    def __init__(self):
+        self.update_many_calls: List[Dict[str, Any]] = []
+        self.find_many_calls: List[Dict[str, Any]] = []
+        self._find_many_results: List[Any] = []
+
+    def set_find_many_results(self, results: List[Any]):
+        self._find_many_results = results
+
+    async def find_many(self, where: Dict[str, Any]) -> List[Any]:
+        self.find_many_calls.append({"where": where})
+        return self._find_many_results
+
+    async def update_many(
+        self, where: Dict[str, Any], data: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        self.update_many_calls.append({"where": where, "data": data})
+        return {"count": 1}
+
+
+class MockLiteLLMTagTable:
+    def __init__(self):
+        self.update_many_calls: List[Dict[str, Any]] = []
+        self.find_many_calls: List[Dict[str, Any]] = []
+        self._find_many_results: List[Any] = []
+
+    def set_find_many_results(self, results: List[Any]):
+        self._find_many_results = results
+
+    async def find_many(self, where: Dict[str, Any]) -> List[Any]:
+        self.find_many_calls.append({"where": where})
+        return self._find_many_results
+
+    async def update_many(
+        self, where: Dict[str, Any], data: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        self.update_many_calls.append({"where": where, "data": data})
+        return {"count": 1}
+
+
 class MockLiteLLMEndUserTable:
    def __init__(self):
        self.find_many_calls: List[Dict[str, Any]] = []
@ -57,6 +97,8 @@ class MockDB:
        self.litellm_teammembership = MockLiteLLMTeamMembership()
        self.litellm_verificationtoken = MockLiteLLMVerificationToken()
        self.litellm_endusertable = MockLiteLLMEndUserTable()
+        self.litellm_organizationtable = MockLiteLLMOrganizationTable()
+        self.litellm_tagtable = MockLiteLLMTagTable()


 class MockPrismaClient:
@ -459,6 +501,100 @@ def test_reset_budget_for_keys_linked_to_budgets_empty(
    assert len(calls) == 0


+def test_reset_budget_for_orgs_linked_to_budgets(reset_budget_job, mock_prisma_client):
+    """
+    Test that when a budget tier is reset, orgs linked to that budget
+    (via budget_id) also get their spend reset.
+    """
+    now = datetime.now(timezone.utc)
+
+    test_budget = type(
+        "LiteLLM_BudgetTableFull",
+        (),
+        {
+            "max_budget": 100.0,
+            "budget_duration": "30d",
+            "budget_reset_at": now - timedelta(hours=1),
+            "budget_id": "30d-org-budget",
+            "created_at": now - timedelta(days=30),
+        },
+    )
+
+    asyncio.run(
+        reset_budget_job.reset_budget_for_orgs_linked_to_budgets(
+            budgets_to_reset=[test_budget]
+        )
+    )
+
+    calls = mock_prisma_client.db.litellm_organizationtable.update_many_calls
+    assert len(calls) == 1
+    call = calls[0]
+    assert call["where"]["budget_id"] == {"in": ["30d-org-budget"]}
+    assert call["where"]["spend"] == {"gt": 0}
+    assert call["data"]["spend"] == 0
+
+
+def test_reset_budget_for_orgs_linked_to_budgets_empty(
+    reset_budget_job, mock_prisma_client
+):
+    """
+    Test that when there are no budgets to reset, no update is performed
+    on the organization table.
+    """
+    asyncio.run(
+        reset_budget_job.reset_budget_for_orgs_linked_to_budgets(budgets_to_reset=[])
+    )
+    calls = mock_prisma_client.db.litellm_organizationtable.update_many_calls
+    assert len(calls) == 0
+
+
+def test_reset_budget_for_tags_linked_to_budgets(reset_budget_job, mock_prisma_client):
+    """
+    Test that when a budget tier is reset, tags linked to that budget
+    (via budget_id) also get their spend reset.
+    """
+    now = datetime.now(timezone.utc)
+
+    test_budget = type(
+        "LiteLLM_BudgetTableFull",
+        (),
+        {
+            "max_budget": 50.0,
+            "budget_duration": "30d",
+            "budget_reset_at": now - timedelta(hours=1),
+            "budget_id": "30d-tag-budget",
+            "created_at": now - timedelta(days=30),
+        },
+    )
+
+    asyncio.run(
+        reset_budget_job.reset_budget_for_tags_linked_to_budgets(
+            budgets_to_reset=[test_budget]
+        )
+    )
+
+    calls = mock_prisma_client.db.litellm_tagtable.update_many_calls
+    assert len(calls) == 1
+    call = calls[0]
+    assert call["where"]["budget_id"] == {"in": ["30d-tag-budget"]}
+    assert call["where"]["spend"] == {"gt": 0}
+    assert call["data"]["spend"] == 0
+
+
+def test_reset_budget_for_tags_linked_to_budgets_empty(
+    reset_budget_job, mock_prisma_client
+):
+    """
+    Test that when there are no budgets to reset, no update is performed
+    on the tag table.
+    """
+    asyncio.run(
+        reset_budget_job.reset_budget_for_tags_linked_to_budgets(budgets_to_reset=[])
+    )
+    calls = mock_prisma_client.db.litellm_tagtable.update_many_calls
+    assert len(calls) == 0
+
+
@pytest.mark.parametrize(
    "budget_duration, expected_day, expected_month",
    [
@ -618,6 +754,75 @@ def test_budget_table_reset_also_resets_linked_keys(
    assert calls[0]["data"]["spend"] == 0


+def test_budget_table_reset_also_resets_linked_orgs(
+    reset_budget_job, mock_prisma_client
+):
+    """
+    Integration-style test: when reset_budget_for_litellm_budget_table runs,
+    it should also reset spend for orgs linked to the expiring budget tiers
+    (in addition to end-users, team members, and keys).
+    """
+    now = datetime.now(timezone.utc)
+
+    test_budget = type(
+        "LiteLLM_BudgetTableFull",
+        (),
+        {
+            "max_budget": 100.0,
+            "budget_duration": "30d",
+            "budget_reset_at": now - timedelta(hours=1),
+            "budget_id": "30d-org-budget",
+            "created_at": now - timedelta(days=30),
+        },
+    )
+
+    mock_prisma_client.data["budget"] = [test_budget]
+
+    asyncio.run(reset_budget_job.reset_budget_for_litellm_budget_table())
+
+    calls = mock_prisma_client.db.litellm_organizationtable.update_many_calls
+    assert len(calls) == 1, (
+        "Expected reset_budget_for_litellm_budget_table to also reset orgs "
+        f"linked to expiring budgets, but got {len(calls)} update_many calls"
+    )
+    assert calls[0]["where"]["budget_id"] == {"in": ["30d-org-budget"]}
+    assert calls[0]["data"]["spend"] == 0
+
+
+def test_budget_table_reset_also_resets_linked_tags(
+    reset_budget_job, mock_prisma_client
+):
+    """
+    Integration-style test: when reset_budget_for_litellm_budget_table runs,
+    it should also reset spend for tags linked to the expiring budget tiers.
+    """
+    now = datetime.now(timezone.utc)
+
+    test_budget = type(
+        "LiteLLM_BudgetTableFull",
+        (),
+        {
+            "max_budget": 50.0,
+            "budget_duration": "30d",
+            "budget_reset_at": now - timedelta(hours=1),
+            "budget_id": "30d-tag-budget",
+            "created_at": now - timedelta(days=30),
+        },
+    )
+
+    mock_prisma_client.data["budget"] = [test_budget]
+
+    asyncio.run(reset_budget_job.reset_budget_for_litellm_budget_table())
+
+    calls = mock_prisma_client.db.litellm_tagtable.update_many_calls
+    assert len(calls) == 1, (
+        "Expected reset_budget_for_litellm_budget_table to also reset tags "
+        f"linked to expiring budgets, but got {len(calls)} update_many calls"
+    )
+    assert calls[0]["where"]["budget_id"] == {"in": ["30d-tag-budget"]}
+    assert calls[0]["data"]["spend"] == 0
+
+
 def test_reset_budget_resets_endusers_with_null_budget_id(
    reset_budget_job, mock_prisma_client
 ):
@ -1057,16 +1262,26 @@ def test_reset_budget_windows_query_error_does_not_break_team_path(monkeypatch):


 def _make_counter_invalidation_job(monkeypatch):
-    """Stub spend_counter_cache so we can observe invalidation calls."""
+    """Stub spend_counter_cache (and user_api_key_cache) so we can observe
+    invalidation calls.
+
+    Both caches are looked up via ``from litellm.proxy.proxy_server import
+    <name>`` inside the reset job, so we publish them on a fake module.
+    """
    spend_counter_cache = MagicMock()
    spend_counter_cache.in_memory_cache.set_cache = MagicMock()
    spend_counter_cache.redis_cache = MagicMock()
    spend_counter_cache.redis_cache.async_set_cache = AsyncMock()

+    user_api_key_cache = MagicMock()
+    user_api_key_cache.async_delete_cache = AsyncMock()
+
    fake_module = types.ModuleType("litellm.proxy.proxy_server")
    fake_module.spend_counter_cache = spend_counter_cache
+    fake_module.user_api_key_cache = user_api_key_cache
    monkeypatch.setitem(sys.modules, "litellm.proxy.proxy_server", fake_module)

+    spend_counter_cache.user_api_key_cache = user_api_key_cache
    return spend_counter_cache


@ -1205,3 +1420,136 @@ def test_reset_budget_for_keys_linked_to_budgets_invalidates_redis_counter(monke
    counter_cache.in_memory_cache.set_cache.assert_any_call(
        key="spend:key:sk-linked", value=0.0, ttl=60
    )
+
+
+def test_reset_budget_for_orgs_linked_to_budgets_invalidates_redis_counter(monkeypatch):
+    """Resetting orgs via budget tier must clear each linked org's counter."""
+    counter_cache = _make_counter_invalidation_job(monkeypatch)
+
+    expired_budget = type("B", (), {"budget_id": "budget-1"})
+    linked_org = type("Org", (), {"organization_id": "org-acme"})
+
+    prisma_client = MagicMock()
+    prisma_client.db.litellm_organizationtable.find_many = AsyncMock(
+        return_value=[linked_org]
+    )
+    prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
+        return_value={"count": 1}
+    )
+
+    job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
+    asyncio.run(job.reset_budget_for_orgs_linked_to_budgets([expired_budget]))
+
+    counter_cache.in_memory_cache.set_cache.assert_any_call(
+        key="spend:org:org-acme", value=0.0, ttl=60
+    )
+    counter_cache.redis_cache.async_set_cache.assert_any_await(
+        key="spend:org:org-acme", value=0.0, ttl=60
+    )
+
+
+def test_reset_budget_for_tags_linked_to_budgets_invalidates_redis_counter(monkeypatch):
+    """Resetting tags via budget tier must clear each linked tag's counter."""
+    counter_cache = _make_counter_invalidation_job(monkeypatch)
+
+    expired_budget = type("B", (), {"budget_id": "budget-1"})
+    linked_tag = type("Tag", (), {"tag_name": "tenant-42"})
+
+    prisma_client = MagicMock()
+    prisma_client.db.litellm_tagtable.find_many = AsyncMock(return_value=[linked_tag])
+    prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 1})
+
+    job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
+    asyncio.run(job.reset_budget_for_tags_linked_to_budgets([expired_budget]))
+
+    counter_cache.in_memory_cache.set_cache.assert_any_call(
+        key="spend:tag:tenant-42", value=0.0, ttl=60
+    )
+    counter_cache.redis_cache.async_set_cache.assert_any_await(
+        key="spend:tag:tenant-42", value=0.0, ttl=60
+    )
+
+
+def test_reset_budget_for_tags_linked_to_budgets_invalidates_management_cache(
+    monkeypatch,
+):
+    """Regression guard for the bug where tag spend stayed frozen across cycles.
+
+    ``SpendCounterReseed.from_db`` returns ``None`` for ``spend:tag:*`` keys,
+    so once the spend counter expires the tag budget check falls back to the
+    cached ``LiteLLM_TagTable.spend``. If we don't drop the management cache
+    entry on reset, that cached object lingers (TTL 60s) with the pre-reset
+    spend, and ``_tag_max_budget_check`` keeps returning HTTP 400 even though
+    the DB row has been zeroed.
+    """
+    counter_cache = _make_counter_invalidation_job(monkeypatch)
+
+    expired_budget = type("B", (), {"budget_id": "budget-1"})
+    linked_tag = type("Tag", (), {"tag_name": "tenant-42"})
+
+    prisma_client = MagicMock()
+    prisma_client.db.litellm_tagtable.find_many = AsyncMock(return_value=[linked_tag])
+    prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 1})
+
+    job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
+    asyncio.run(job.reset_budget_for_tags_linked_to_budgets([expired_budget]))
+
+    counter_cache.user_api_key_cache.async_delete_cache.assert_any_await(
+        key="tag:tenant-42"
+    )
+
+
+def test_reset_budget_for_tags_linked_to_budgets_invalidates_each_tag_management_cache(
+    monkeypatch,
+):
+    """When multiple tags share the expired budget tier, every one of them
+    has its ``user_api_key_cache`` entry dropped — not just the first."""
+    counter_cache = _make_counter_invalidation_job(monkeypatch)
+
+    expired_budget = type("B", (), {"budget_id": "budget-1"})
+    linked_tags = [
+        type("Tag", (), {"tag_name": "tenant-a"}),
+        type("Tag", (), {"tag_name": "tenant-b"}),
+        type("Tag", (), {"tag_name": "tenant-c"}),
+    ]
+
+    prisma_client = MagicMock()
+    prisma_client.db.litellm_tagtable.find_many = AsyncMock(return_value=linked_tags)
+    prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 3})
+
+    job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
+    asyncio.run(job.reset_budget_for_tags_linked_to_budgets([expired_budget]))
+
+    deleted_keys = {
+        call.kwargs.get("key")
+        for call in counter_cache.user_api_key_cache.async_delete_cache.await_args_list
+    }
+    assert deleted_keys == {"tag:tenant-a", "tag:tenant-b", "tag:tenant-c"}
+
+
+def test_reset_budget_for_keys_linked_to_budgets_does_not_touch_management_cache(
+    monkeypatch,
+):
+    """Cache invalidation is opt-in: keys / orgs / team-members rely on
+    ``SpendCounterReseed.from_db`` (which DOES handle their counter keys),
+    so the cache_key_fn hook is intentionally not wired for them. This test
+    locks in that no-op so a future refactor doesn't accidentally start
+    clobbering the key cache (which would cost an extra DB round-trip per
+    reset cycle without fixing anything)."""
+    counter_cache = _make_counter_invalidation_job(monkeypatch)
+
+    expired_budget = type("B", (), {"budget_id": "budget-1"})
+    linked_key = type("Key", (), {"token": "sk-linked"})
+
+    prisma_client = MagicMock()
+    prisma_client.db.litellm_verificationtoken.find_many = AsyncMock(
+        return_value=[linked_key]
+    )
+    prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
+        return_value={"count": 1}
+    )
+
+    job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
+    asyncio.run(job.reset_budget_for_keys_linked_to_budgets([expired_budget]))
+
+    counter_cache.user_api_key_cache.async_delete_cache.assert_not_awaited()
--- a/tests/test_litellm/proxy/test_proxy_server.py
+++ b/tests/test_litellm/proxy/test_proxy_server.py
@ -1728,6 +1728,67 @@ async def test_add_proxy_budget_to_db_only_creates_user_no_keys():
        assert call_args.kwargs["query_type"] == "update_data"


+@pytest.mark.asyncio
+async def test_add_proxy_budget_to_db_backfills_budget_reset_at():
+    """
+    Test that _upsert_proxy_budget_with_reset_at_backfill issues a conditional
+    update_many with `WHERE budget_reset_at IS NULL` to backfill the column on
+    rows that pre-existed without a reset schedule. Without this, the proxy
+    admin row stays at NULL and reset_budget_for_litellm_users never matches
+    it (NULL < now() is unknown in SQL), so the global proxy budget never
+    resets.
+    """
+    from unittest.mock import AsyncMock, MagicMock, patch
+
+    import litellm
+    from litellm.proxy.proxy_server import ProxyStartupEvent
+
+    litellm.budget_duration = "30d"
+    litellm.max_budget = 100.0
+    litellm_proxy_budget_name = "litellm-proxy-budget"
+
+    mock_prisma = MagicMock()
+    mock_prisma.db.litellm_usertable.update_many = AsyncMock(return_value={"count": 1})
+
+    mock_generate_key_helper = AsyncMock(
+        return_value={
+            "user_id": litellm_proxy_budget_name,
+            "max_budget": 100.0,
+            "budget_duration": "30d",
+            "spend": 0,
+            "models": [],
+        }
+    )
+
+    with (
+        patch(
+            "litellm.proxy.proxy_server.generate_key_helper_fn",
+            mock_generate_key_helper,
+        ),
+        patch("litellm.proxy.proxy_server.prisma_client", mock_prisma),
+    ):
+        await ProxyStartupEvent._upsert_proxy_budget_with_reset_at_backfill(
+            litellm_proxy_budget_name
+        )
+
+    # Upsert ran with the configured budget
+    mock_generate_key_helper.assert_called_once()
+
+    # Backfill update_many ran with the conditional WHERE
+    mock_prisma.db.litellm_usertable.update_many.assert_called_once()
+    backfill_call = mock_prisma.db.litellm_usertable.update_many.call_args
+    assert backfill_call.kwargs["where"]["user_id"] == litellm_proxy_budget_name
+    assert backfill_call.kwargs["where"]["budget_reset_at"] is None
+
+    # The backfilled value must be a real future datetime — anything else and
+    # reset_budget_for_litellm_users would still skip the row.
+    from datetime import datetime, timezone
+
+    backfilled_reset_at = backfill_call.kwargs["data"]["budget_reset_at"]
+    assert isinstance(backfilled_reset_at, datetime)
+    assert backfilled_reset_at > datetime.now(timezone.utc)
+
+
@pytest.mark.asyncio
 async def test_custom_ui_sso_sign_in_handler_config_loading():
    """
--- a/tests/test_openai_endpoints.py
+++ b/tests/test_openai_endpoints.py
@ -303,7 +303,7 @@ async def test_chat_completion():
            api_key=key_gen["key"],
            api_version="2024-02-15-preview",
        )
-        with pytest.raises(openai.AuthenticationError) as e:
+        with pytest.raises(openai.PermissionDeniedError) as e:
            response = await azure_client.chat.completions.create(
                model="gpt-4",
                messages=[{"role": "user", "content": "Hello!"}],
--- a/tests/test_users.py
+++ b/tests/test_users.py
@ -302,14 +302,14 @@ async def test_user_model_access():
            model="good-model",
        )

-        with pytest.raises(openai.AuthenticationError):
+        with pytest.raises(openai.PermissionDeniedError):
            await chat_completion(
                session=session,
                key=key,
                model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
            )

-        with pytest.raises(openai.AuthenticationError):
+        with pytest.raises(openai.PermissionDeniedError):
            await chat_completion(
                session=session,
                key=key,
--- a/uv.lock
+++ b/uv.lock
@ -3405,7 +3405,7 @@ requires-dist = [
    { name = "gunicorn", marker = "extra == 'proxy'", specifier = "==23.0.0" },
    { name = "httpx", specifier = ">=0.28.0,<1.0" },
    { name = "importlib-metadata", specifier = ">=8.0.0,<9.0" },
-    { name = "jinja2", specifier = ">=3.1.0,<4.0" },
+    { name = "jinja2", specifier = ">=3.1.6,<4.0" },
    { name = "jsonschema", specifier = ">=4.0.0,<5.0" },
    { name = "langfuse", marker = "extra == 'proxy-runtime'", specifier = "==2.59.7" },
    { name = "litellm-enterprise", marker = "extra == 'proxy'", editable = "enterprise" },