Fix: tag budget reset must drop stale management-cache entry (#27568)

Squash-merged by litellm-agent from oss-agent-shin's PR.
This commit is contained in:
oss-agent-shin 2026-05-09 17:18:55 -07:00 committed by GitHub
parent c7739c9ed5
commit 9f68d2bb77
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
44 changed files with 727 additions and 3602 deletions

View File

@ -241,10 +241,27 @@ When opening issues or pull requests, follow these templates:
### Running the proxy server
Start the proxy with a config file:
Create a minimal config file and start the proxy:
```yaml
# config.yaml
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake-model
api_key: fake-key
api_base: https://fake-api.example.com
general_settings:
master_key: sk-1234
litellm_settings:
drop_params: True
telemetry: False
```
```bash
uv run litellm --config dev_config.yaml --port 4000
uv run litellm --config config.yaml --port 4000
```
The proxy takes ~15-20 seconds to fully start (it runs Prisma migrations on boot). Wait for `/health` to return before sending requests. Without a PostgreSQL `DATABASE_URL`, the proxy connects to a default Neon dev database embedded in the `litellm-proxy-extras` package.

View File

@ -146,7 +146,7 @@ LiteLLM is a unified interface for 100+ LLM providers with two main components:
- **Bound large result sets.** Prisma materializes full results in memory. For results over ~10 MB, paginate with `take`/`skip` or `cursor`/`take`, always with an explicit `order`. Prefer cursor-based pagination (`skip` is O(n)). Don't paginate naturally small result sets.
- **Limit fetched columns on wide tables.** Use `select` to fetch only needed fields — returns a partial object, so downstream code must not access unselected fields.
- **Check index coverage.** For new or modified queries, check `schema.prisma` for a supporting index. Prefer extending an existing index (e.g. `@@index([a])``@@index([a, b])`) over adding a new one, unless it's a `@@unique`. Only add indexes for large/frequent queries.
- **Keep schema files in sync.** Apply schema changes to all `schema.prisma` copies (`schema.prisma`, `litellm/proxy/`, `litellm-proxy-extras/`, `litellm-js/spend-logs/` for SpendLogs) with a migration under `litellm-proxy-extras/litellm_proxy_extras/migrations/`.
- **Keep schema files in sync.** Apply schema changes to all `schema.prisma` copies (`schema.prisma`, `litellm/proxy/`, `litellm-proxy-extras/`) with a migration under `litellm-proxy-extras/litellm_proxy_extras/migrations/`.
### Setup Wizard (`litellm/setup_wizard.py`)
- The wizard is implemented as a single `SetupWizard` class with `@staticmethod` methods — keep it that way. No module-level functions except `run_setup_wizard()` (the public entrypoint) and pure helpers (color, ANSI).

View File

@ -1,18 +0,0 @@
# Use the provided base image
FROM ghcr.io/berriai/litellm:main-latest@sha256:7c311546c25e7bb6e8cafede9fcd3d0d622ac636b5c9418befaa32e85dfb0186
# Set the working directory to /app
WORKDIR /app
# Copy the configuration file into the container at /app
COPY config.yaml .
# Make sure your docker/entrypoint.sh is executable
# Convert Windows line endings to Unix
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
# Expose the necessary port
EXPOSE 4000/tcp
# Override the CMD instruction with your desired command and arguments
CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug", "--run_gunicorn"]

View File

@ -1,56 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: litellm-deployment
spec:
replicas: 3
selector:
matchLabels:
app: litellm
template:
metadata:
labels:
app: litellm
spec:
containers:
- name: litellm-container
image: ghcr.io/berriai/litellm:main-latest
imagePullPolicy: Always
env:
- name: AZURE_API_KEY
value: "d6f****"
- name: AZURE_API_BASE
value: "https://openai"
- name: LITELLM_MASTER_KEY
value: "sk-1234"
- name: DATABASE_URL
value: "postgresql://ishaan*********"
args:
- "--config"
- "/app/proxy_config.yaml" # Update the path to mount the config file
volumeMounts: # Define volume mount for proxy_config.yaml
- name: config-volume
mountPath: /app
readOnly: true
livenessProbe:
httpGet:
path: /health/liveliness
port: 4000
initialDelaySeconds: 120
periodSeconds: 15
successThreshold: 1
failureThreshold: 3
timeoutSeconds: 10
readinessProbe:
httpGet:
path: /health/readiness
port: 4000
initialDelaySeconds: 120
periodSeconds: 15
successThreshold: 1
failureThreshold: 3
timeoutSeconds: 10
volumes: # Define volume to mount proxy_config.yaml
- name: config-volume
configMap:
name: litellm-config

View File

@ -1,12 +0,0 @@
apiVersion: v1
kind: Service
metadata:
name: litellm-service
spec:
selector:
app: litellm
ports:
- protocol: TCP
port: 4000
targetPort: 4000
type: LoadBalancer

View File

@ -1,13 +0,0 @@
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake-model
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
general_settings:
master_key: sk-1234
litellm_settings:
drop_params: True
telemetry: False

View File

@ -1,68 +0,0 @@
# Base image for building
ARG LITELLM_BUILD_IMAGE=python:3.11-alpine@sha256:f07e2ace46f560f09a6eeec7b4913b80ee99546e749ef82342a419a326620856
# Runtime image
ARG LITELLM_RUNTIME_IMAGE=python:3.11-alpine@sha256:f07e2ace46f560f09a6eeec7b4913b80ee99546e749ef82342a419a326620856
ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
FROM $UV_IMAGE AS uvbin
FROM $LITELLM_BUILD_IMAGE AS builder
WORKDIR /app
COPY --from=uvbin /uv /usr/local/bin/uv
COPY --from=uvbin /uvx /usr/local/bin/uvx
RUN apk add --no-cache gcc python3-dev musl-dev nodejs npm libsndfile
ENV PRISMA_BINARY_CACHE_DIR=/app/.cache/prisma-python/binaries \
UV_PROJECT_ENVIRONMENT=/app/.venv \
UV_LINK_MODE=copy \
XDG_CACHE_HOME=/app/.cache \
PATH="/app/.venv/bin:${PATH}"
# Copy dependency metadata first for layer caching
COPY pyproject.toml uv.lock ./
COPY enterprise/pyproject.toml enterprise/
COPY litellm-proxy-extras/pyproject.toml litellm-proxy-extras/
# Install third-party dependencies (cached unless pyproject.toml/uv.lock change)
RUN uv sync --frozen --no-install-project --no-install-workspace --no-default-groups --no-editable \
--extra proxy \
--extra proxy-runtime \
--extra extra_proxy \
--extra semantic-router \
--python python3
# Copy full source tree
COPY . .
# Install project and workspace packages (fast - deps already cached)
RUN uv sync --frozen --no-default-groups --no-editable \
--extra proxy \
--extra proxy-runtime \
--extra extra_proxy \
--extra semantic-router \
--python python3
RUN prisma generate --schema=./schema.prisma
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh && \
sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
FROM $LITELLM_RUNTIME_IMAGE AS runtime
RUN apk upgrade --no-cache && apk add --no-cache libsndfile nodejs npm
WORKDIR /app
ENV PRISMA_BINARY_CACHE_DIR=/app/.cache/prisma-python/binaries \
XDG_CACHE_HOME=/app/.cache \
PATH="/app/.venv/bin:${PATH}"
COPY --from=builder /app /app
EXPOSE 4000/tcp
ENTRYPOINT ["docker/prod_entrypoint.sh"]
CMD ["--port", "4000"]

View File

@ -1,86 +0,0 @@
# Use the provided base image
# NOTE: This is a dev/branch-specific tag. Update digest when the base image is rebuilt.
FROM ghcr.io/berriai/litellm:litellm_fwd_server_root_path-dev
# Set the working directory to /app
WORKDIR /app
# Install Node.js and npm (adjust version as needed)
RUN apt-get update && apt-get upgrade -y \
libxml2 \
libexpat1 \
openssl \
libssl3 \
git \
libkrb5-3 \
libglib2.0-0 \
wget \
libaom3 \
libxslt1.1 \
libgnutls30 \
libc6 && \
apt-get install -y --no-install-recommends nodejs npm && \
npm install -g npm@11.12.1 tar@7.5.11 glob@11.1.0 @isaacs/brace-expansion@5.0.1 minimatch@10.2.4 diff@8.0.3 && \
GLOBAL="$(npm root -g)" && \
find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
done && \
find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
done && \
find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
done && \
find "$GLOBAL/npm" -type d -name "minimatch" -path "*/node_modules/minimatch" | while read d; do \
rm -rf "$d" && cp -rL "$GLOBAL/minimatch" "$d"; \
done && \
find "$GLOBAL/npm" -type d -name "diff" -path "*/node_modules/diff" | while read d; do \
rm -rf "$d" && cp -rL "$GLOBAL/diff" "$d"; \
done && \
find /usr/local/lib /usr/lib -path "*/node_modules/npm/package.json" -exec \
sed -i 's/"tar": "\^7\.5\.[0-9]*"/"tar": "^7.5.10"/g; s/"minimatch": "\^10\.[0-9.]*"/"minimatch": "^10.2.4"/g' {} + 2>/dev/null && \
npm cache clean --force && \
apt-get purge -y npm
# Copy the UI source into the container
COPY ./ui/litellm-dashboard /app/ui/litellm-dashboard
# Set an environment variable for UI_BASE_PATH
# This can be overridden at build time
# set UI_BASE_PATH to "<your server root path>/ui"
ENV UI_BASE_PATH="/prod/ui"
# Build the UI with the specified UI_BASE_PATH
WORKDIR /app/ui/litellm-dashboard
RUN npm ci
RUN UI_BASE_PATH=$UI_BASE_PATH npm run build
# Create the destination directory
RUN mkdir -p /app/litellm/proxy/_experimental/out
# Move the built files to the appropriate location
# Assuming the build output is in ./out directory
RUN rm -rf /app/litellm/proxy/_experimental/out/* && \
mv ./out/* /app/litellm/proxy/_experimental/out/
# Switch back to the main app directory
WORKDIR /app
# Make sure your docker/entrypoint.sh is executable
# Convert Windows line endings to Unix for entrypoint scripts
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
# Run as non-root user
RUN groupadd --gid 1000 appuser && useradd --uid 1000 --gid 1000 --no-create-home appuser \
&& chown -R appuser:appuser /app
USER appuser
# Expose the necessary port
EXPOSE 4000/tcp
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD ["python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:4000/health')"]
# Override the CMD instruction with your desired command and arguments
CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug"]

View File

@ -1,121 +0,0 @@
# Base image for building
ARG LITELLM_BUILD_IMAGE=python:3.13-slim@sha256:739e7213785e88c0f702dcdc12c0973afcbd606dbf021a589cab77d6b00b579d
# Runtime image
ARG LITELLM_RUNTIME_IMAGE=python:3.13-slim@sha256:739e7213785e88c0f702dcdc12c0973afcbd606dbf021a589cab77d6b00b579d
ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
FROM $UV_IMAGE AS uvbin
FROM $LITELLM_BUILD_IMAGE AS builder
WORKDIR /app
USER root
COPY --from=uvbin /uv /usr/local/bin/uv
COPY --from=uvbin /uvx /usr/local/bin/uvx
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
g++ \
python3-dev \
libssl-dev \
pkg-config \
nodejs \
npm \
&& rm -rf /var/lib/apt/lists/*
ENV PRISMA_BINARY_CACHE_DIR=/app/.cache/prisma-python/binaries \
UV_PROJECT_ENVIRONMENT=/app/.venv \
UV_LINK_MODE=copy \
XDG_CACHE_HOME=/app/.cache \
PATH="/app/.venv/bin:${PATH}"
# Copy dependency metadata first for layer caching
COPY pyproject.toml uv.lock ./
COPY enterprise/pyproject.toml enterprise/
COPY litellm-proxy-extras/pyproject.toml litellm-proxy-extras/
# Install third-party dependencies (cached unless pyproject.toml/uv.lock change)
RUN uv sync --frozen --no-install-project --no-install-workspace --no-default-groups --no-editable \
--extra proxy \
--extra proxy-runtime \
--extra extra_proxy \
--extra semantic-router \
--python python
# Copy full source tree
COPY . .
# Build Admin UI before final sync
RUN sed -i 's/\r$//' docker/build_admin_ui.sh && chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
# Install project and workspace packages (fast - deps already cached)
RUN uv sync --frozen --no-default-groups --no-editable \
--extra proxy \
--extra proxy-runtime \
--extra extra_proxy \
--extra semantic-router \
--python python
RUN prisma generate --schema=./schema.prisma
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh && \
sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
FROM $LITELLM_RUNTIME_IMAGE AS runtime
USER root
RUN apt-get update && apt-get upgrade -y \
libxml2 \
libexpat1 \
openssl \
libssl3 \
git \
libkrb5-3 \
libglib2.0-0 \
wget \
libaom3 \
libxslt1.1 \
libgnutls30 \
libc6 \
&& apt-get install -y --no-install-recommends \
libssl3 \
libatomic1 \
nodejs \
npm \
&& rm -rf /var/lib/apt/lists/* \
&& npm install -g npm@11.12.1 tar@7.5.11 glob@11.1.0 @isaacs/brace-expansion@5.0.1 minimatch@10.2.4 diff@8.0.3 \
&& GLOBAL="$(npm root -g)" \
&& find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
done \
&& find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
done \
&& find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
done \
&& find "$GLOBAL/npm" -type d -name "minimatch" -path "*/node_modules/minimatch" | while read d; do \
rm -rf "$d" && cp -rL "$GLOBAL/minimatch" "$d"; \
done \
&& find "$GLOBAL/npm" -type d -name "diff" -path "*/node_modules/diff" | while read d; do \
rm -rf "$d" && cp -rL "$GLOBAL/diff" "$d"; \
done \
&& find /usr/local/lib /usr/lib -path "*/node_modules/npm/package.json" -exec \
sed -i 's/"tar": "\^7\.5\.[0-9]*"/"tar": "^7.5.10"/g; s/"minimatch": "\^10\.[0-9.]*"/"minimatch": "^10.2.4"/g' {} + 2>/dev/null \
&& npm cache clean --force \
&& apt-get purge -y npm
WORKDIR /app
ENV PRISMA_BINARY_CACHE_DIR=/app/.cache/prisma-python/binaries \
XDG_CACHE_HOME=/app/.cache \
PATH="/app/.venv/bin:${PATH}"
COPY --from=builder /app /app
EXPOSE 4000/tcp
ENTRYPOINT ["docker/prod_entrypoint.sh"]
CMD ["--port", "4000"]

View File

@ -1,30 +0,0 @@
ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
FROM $UV_IMAGE AS uvbin
FROM python:3.13-slim@sha256:739e7213785e88c0f702dcdc12c0973afcbd606dbf021a589cab77d6b00b579d
WORKDIR /app
# Copy the uv binary and the health check script.
COPY --from=uvbin /uv /usr/local/bin/uv
COPY pyproject.toml uv.lock /app/
COPY scripts/health_check/health_check_client.py /app/health_check_client.py
# Resolve and install the health-check dependencies from the project lockfile
# so the runtime image stays self-contained and reproducible.
RUN uv export --frozen --no-default-groups --only-group healthcheck --no-emit-project --no-hashes --output-file /tmp/health-check-requirements.txt \
&& uv pip install --system -r /tmp/health-check-requirements.txt \
&& rm /tmp/health-check-requirements.txt \
&& rm /app/pyproject.toml /app/uv.lock \
&& chmod +x /app/health_check_client.py
# Run as non-root user
RUN groupadd --gid 1000 appuser && useradd --uid 1000 --gid 1000 --no-create-home appuser
USER appuser
# Health check
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
CMD ["python", "/app/health_check_client.py", "--help"]
# Set entrypoint
ENTRYPOINT ["python", "/app/health_check_client.py"]

View File

@ -1,108 +0,0 @@
apiVersion: v1
entries:
litellm-helm:
- apiVersion: v2
appVersion: v1.43.18
created: "2024-08-19T23:58:25.331689+08:00"
dependencies:
- condition: db.deployStandalone
name: postgresql
repository: oci://registry-1.docker.io/bitnamicharts
version: '>=13.3.0'
- condition: redis.enabled
name: redis
repository: oci://registry-1.docker.io/bitnamicharts
version: '>=18.0.0'
description: Call all LLM APIs using the OpenAI format
digest: 0411df3dc42868be8af3ad3e00cb252790e6bd7ad15f5b77f1ca5214573a8531
name: litellm-helm
type: application
urls:
- https://berriai.github.io/litellm/litellm-helm-0.2.3.tgz
version: 0.2.3
postgresql:
- annotations:
category: Database
images: |
- name: os-shell
image: docker.io/bitnami/os-shell:12-debian-12-r16
- name: postgres-exporter
image: docker.io/bitnami/postgres-exporter:0.15.0-debian-12-r14
- name: postgresql
image: docker.io/bitnami/postgresql:16.2.0-debian-12-r6
licenses: Apache-2.0
apiVersion: v2
appVersion: 16.2.0
created: "2024-08-19T23:58:25.335716+08:00"
dependencies:
- name: common
repository: oci://registry-1.docker.io/bitnamicharts
tags:
- bitnami-common
version: 2.x.x
description: PostgreSQL (Postgres) is an open source object-relational database
known for reliability and data integrity. ACID-compliant, it supports foreign
keys, joins, views, triggers and stored procedures.
digest: 3c8125526b06833df32e2f626db34aeaedb29d38f03d15349db6604027d4a167
home: https://bitnami.com
icon: https://bitnami.com/assets/stacks/postgresql/img/postgresql-stack-220x234.png
keywords:
- postgresql
- postgres
- database
- sql
- replication
- cluster
maintainers:
- name: VMware, Inc.
url: https://github.com/bitnami/charts
name: postgresql
sources:
- https://github.com/bitnami/charts/tree/main/bitnami/postgresql
urls:
- https://berriai.github.io/litellm/charts/postgresql-14.3.1.tgz
version: 14.3.1
redis:
- annotations:
category: Database
images: |
- name: kubectl
image: docker.io/bitnami/kubectl:1.29.2-debian-12-r3
- name: os-shell
image: docker.io/bitnami/os-shell:12-debian-12-r16
- name: redis
image: docker.io/bitnami/redis:7.2.4-debian-12-r9
- name: redis-exporter
image: docker.io/bitnami/redis-exporter:1.58.0-debian-12-r4
- name: redis-sentinel
image: docker.io/bitnami/redis-sentinel:7.2.4-debian-12-r7
licenses: Apache-2.0
apiVersion: v2
appVersion: 7.2.4
created: "2024-08-19T23:58:25.339392+08:00"
dependencies:
- name: common
repository: oci://registry-1.docker.io/bitnamicharts
tags:
- bitnami-common
version: 2.x.x
description: Redis(R) is an open source, advanced key-value store. It is often
referred to as a data structure server since keys can contain strings, hashes,
lists, sets and sorted sets.
digest: b2fa1835f673a18002ca864c54fadac3c33789b26f6c5e58e2851b0b14a8f984
home: https://bitnami.com
icon: https://bitnami.com/assets/stacks/redis/img/redis-stack-220x234.png
keywords:
- redis
- keyvalue
- database
maintainers:
- name: VMware, Inc.
url: https://github.com/bitnami/charts
name: redis
sources:
- https://github.com/bitnami/charts/tree/main/bitnami/redis
urls:
- https://berriai.github.io/litellm/charts/redis-18.19.1.tgz
version: 18.19.1
generated: "2024-08-19T23:58:25.322532+08:00"

View File

@ -1,5 +0,0 @@
# Supply-chain hardening
# Packages needing lifecycle scripts: npm rebuild <pkg>
ignore-scripts=true
# Protects local npm install only — npm ci (used in CI) ignores this
min-release-age=3

View File

@ -1,8 +0,0 @@
```
npm install
npm run dev
```
```
npm run deploy
```

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +0,0 @@
{
"scripts": {
"dev": "wrangler dev src/index.ts",
"deploy": "wrangler deploy --minify src/index.ts"
},
"dependencies": {
"hono": "4.12.16",
"openai": "4.29.2"
},
"devDependencies": {
"@cloudflare/workers-types": "4.20260501.1",
"wrangler": "4.87.0"
}
}

View File

@ -1,59 +0,0 @@
import { Hono } from 'hono'
import { Context } from 'hono';
import { bearerAuth } from 'hono/bearer-auth'
import OpenAI from "openai";
const openai = new OpenAI({
apiKey: "sk-1234",
baseURL: "https://openai-endpoint.ishaanjaffer0324.workers.dev"
});
async function call_proxy() {
const completion = await openai.chat.completions.create({
messages: [{ role: "system", content: "You are a helpful assistant." }],
model: "gpt-3.5-turbo",
});
return completion
}
const app = new Hono()
// Middleware for API Key Authentication
const apiKeyAuth = async (c: Context, next: Function) => {
const apiKey = c.req.header('Authorization');
if (!apiKey || apiKey !== 'Bearer sk-1234') {
return c.text('Unauthorized', 401);
}
await next();
};
app.use('/*', apiKeyAuth)
app.get('/', (c) => {
return c.text('Hello Hono!')
})
// Handler for chat completions
const chatCompletionHandler = async (c: Context) => {
// Assuming your logic for handling chat completion goes here
// For demonstration, just returning a simple JSON response
const response = await call_proxy()
return c.json(response);
};
// Register the above handler for different POST routes with the apiKeyAuth middleware
app.post('/v1/chat/completions', chatCompletionHandler);
app.post('/chat/completions', chatCompletionHandler);
// Example showing how you might handle dynamic segments within the URL
// Here, using ':model*' to capture the rest of the path as a parameter 'model'
app.post('/openai/deployments/:model*/chat/completions', chatCompletionHandler);
export default app

View File

@ -1,17 +0,0 @@
{
"compilerOptions": {
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "Bundler",
"strict": true,
"lib": [
"ESNext"
],
"types": [
"@cloudflare/workers-types"
],
"jsx": "react-jsx",
"jsxImportSource": "hono/jsx",
"skipLibCheck": true
},
}

View File

@ -1,18 +0,0 @@
name = "my-app"
compatibility_date = "2023-12-01"
# [vars]
# MY_VAR = "my-variable"
# [[kv_namespaces]]
# binding = "MY_KV_NAMESPACE"
# id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
# [[r2_buckets]]
# binding = "MY_BUCKET"
# bucket_name = "my-bucket"
# [[d1_databases]]
# binding = "DB"
# database_name = "my-database"
# database_id = ""

View File

@ -1,5 +0,0 @@
# Supply-chain hardening
# Packages needing lifecycle scripts: npm rebuild <pkg>
ignore-scripts=true
# Protects local npm install only — npm ci (used in CI) ignores this
min-release-age=3

View File

@ -1,26 +0,0 @@
# Use the specific Node.js v20.11.0 image
FROM node:20.18.1-alpine3.20
# Set the working directory inside the container
WORKDIR /app
# Copy package.json and package-lock.json to the working directory
COPY ./litellm-js/spend-logs/package*.json ./
# Install dependencies
RUN npm ci
# Install Prisma globally
RUN npm install -g prisma
# Copy the rest of the application code
COPY ./litellm-js/spend-logs .
# Generate Prisma client
RUN npx prisma generate
# Expose the port that the Node.js server will run on
EXPOSE 3000
# Command to run the Node.js app with npm run dev
CMD ["npm", "run", "dev"]

View File

@ -1,8 +0,0 @@
```
npm install
npm run dev
```
```
open http://localhost:3000
```

View File

@ -1,597 +0,0 @@
{
"name": "spend-logs",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"dependencies": {
"@hono/node-server": "1.19.13",
"hono": "4.12.16"
},
"devDependencies": {
"@types/node": "20.19.25",
"tsx": "4.20.6"
}
},
"node_modules/@esbuild/aix-ppc64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.12.tgz",
"integrity": "sha512-Hhmwd6CInZ3dwpuGTF8fJG6yoWmsToE+vYgD4nytZVxcu1ulHpUQRAB1UJ8+N1Am3Mz4+xOByoQoSZf4D+CpkA==",
"cpu": [
"ppc64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"aix"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/android-arm": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.12.tgz",
"integrity": "sha512-VJ+sKvNA/GE7Ccacc9Cha7bpS8nyzVv0jdVgwNDaR4gDMC/2TTRc33Ip8qrNYUcpkOHUT5OZ0bUcNNVZQ9RLlg==",
"cpu": [
"arm"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/android-arm64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.12.tgz",
"integrity": "sha512-6AAmLG7zwD1Z159jCKPvAxZd4y/VTO0VkprYy+3N2FtJ8+BQWFXU+OxARIwA46c5tdD9SsKGZ/1ocqBS/gAKHg==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/android-x64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.12.tgz",
"integrity": "sha512-5jbb+2hhDHx5phYR2By8GTWEzn6I9UqR11Kwf22iKbNpYrsmRB18aX/9ivc5cabcUiAT/wM+YIZ6SG9QO6a8kg==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/darwin-arm64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.12.tgz",
"integrity": "sha512-N3zl+lxHCifgIlcMUP5016ESkeQjLj/959RxxNYIthIg+CQHInujFuXeWbWMgnTo4cp5XVHqFPmpyu9J65C1Yg==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/darwin-x64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.12.tgz",
"integrity": "sha512-HQ9ka4Kx21qHXwtlTUVbKJOAnmG1ipXhdWTmNXiPzPfWKpXqASVcWdnf2bnL73wgjNrFXAa3yYvBSd9pzfEIpA==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/freebsd-arm64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.12.tgz",
"integrity": "sha512-gA0Bx759+7Jve03K1S0vkOu5Lg/85dou3EseOGUes8flVOGxbhDDh/iZaoek11Y8mtyKPGF3vP8XhnkDEAmzeg==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/freebsd-x64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.12.tgz",
"integrity": "sha512-TGbO26Yw2xsHzxtbVFGEXBFH0FRAP7gtcPE7P5yP7wGy7cXK2oO7RyOhL5NLiqTlBh47XhmIUXuGciXEqYFfBQ==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-arm": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.12.tgz",
"integrity": "sha512-lPDGyC1JPDou8kGcywY0YILzWlhhnRjdof3UlcoqYmS9El818LLfJJc3PXXgZHrHCAKs/Z2SeZtDJr5MrkxtOw==",
"cpu": [
"arm"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-arm64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.12.tgz",
"integrity": "sha512-8bwX7a8FghIgrupcxb4aUmYDLp8pX06rGh5HqDT7bB+8Rdells6mHvrFHHW2JAOPZUbnjUpKTLg6ECyzvas2AQ==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-ia32": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.12.tgz",
"integrity": "sha512-0y9KrdVnbMM2/vG8KfU0byhUN+EFCny9+8g202gYqSSVMonbsCfLjUO+rCci7pM0WBEtz+oK/PIwHkzxkyharA==",
"cpu": [
"ia32"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-loong64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.12.tgz",
"integrity": "sha512-h///Lr5a9rib/v1GGqXVGzjL4TMvVTv+s1DPoxQdz7l/AYv6LDSxdIwzxkrPW438oUXiDtwM10o9PmwS/6Z0Ng==",
"cpu": [
"loong64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-mips64el": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.12.tgz",
"integrity": "sha512-iyRrM1Pzy9GFMDLsXn1iHUm18nhKnNMWscjmp4+hpafcZjrr2WbT//d20xaGljXDBYHqRcl8HnxbX6uaA/eGVw==",
"cpu": [
"mips64el"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-ppc64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.12.tgz",
"integrity": "sha512-9meM/lRXxMi5PSUqEXRCtVjEZBGwB7P/D4yT8UG/mwIdze2aV4Vo6U5gD3+RsoHXKkHCfSxZKzmDssVlRj1QQA==",
"cpu": [
"ppc64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-riscv64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.12.tgz",
"integrity": "sha512-Zr7KR4hgKUpWAwb1f3o5ygT04MzqVrGEGXGLnj15YQDJErYu/BGg+wmFlIDOdJp0PmB0lLvxFIOXZgFRrdjR0w==",
"cpu": [
"riscv64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-s390x": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.12.tgz",
"integrity": "sha512-MsKncOcgTNvdtiISc/jZs/Zf8d0cl/t3gYWX8J9ubBnVOwlk65UIEEvgBORTiljloIWnBzLs4qhzPkJcitIzIg==",
"cpu": [
"s390x"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/linux-x64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.12.tgz",
"integrity": "sha512-uqZMTLr/zR/ed4jIGnwSLkaHmPjOjJvnm6TVVitAa08SLS9Z0VM8wIRx7gWbJB5/J54YuIMInDquWyYvQLZkgw==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/netbsd-arm64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.12.tgz",
"integrity": "sha512-xXwcTq4GhRM7J9A8Gv5boanHhRa/Q9KLVmcyXHCTaM4wKfIpWkdXiMog/KsnxzJ0A1+nD+zoecuzqPmCRyBGjg==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"netbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/netbsd-x64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.12.tgz",
"integrity": "sha512-Ld5pTlzPy3YwGec4OuHh1aCVCRvOXdH8DgRjfDy/oumVovmuSzWfnSJg+VtakB9Cm0gxNO9BzWkj6mtO1FMXkQ==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"netbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/openbsd-arm64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.12.tgz",
"integrity": "sha512-fF96T6KsBo/pkQI950FARU9apGNTSlZGsv1jZBAlcLL1MLjLNIWPBkj5NlSz8aAzYKg+eNqknrUJ24QBybeR5A==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"openbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/openbsd-x64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.12.tgz",
"integrity": "sha512-MZyXUkZHjQxUvzK7rN8DJ3SRmrVrke8ZyRusHlP+kuwqTcfWLyqMOE3sScPPyeIXN/mDJIfGXvcMqCgYKekoQw==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"openbsd"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/openharmony-arm64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.25.12.tgz",
"integrity": "sha512-rm0YWsqUSRrjncSXGA7Zv78Nbnw4XL6/dzr20cyrQf7ZmRcsovpcRBdhD43Nuk3y7XIoW2OxMVvwuRvk9XdASg==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"openharmony"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/sunos-x64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.12.tgz",
"integrity": "sha512-3wGSCDyuTHQUzt0nV7bocDy72r2lI33QL3gkDNGkod22EsYl04sMf0qLb8luNKTOmgF/eDEDP5BFNwoBKH441w==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"sunos"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/win32-arm64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.12.tgz",
"integrity": "sha512-rMmLrur64A7+DKlnSuwqUdRKyd3UE7oPJZmnljqEptesKM8wx9J8gx5u0+9Pq0fQQW8vqeKebwNXdfOyP+8Bsg==",
"cpu": [
"arm64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/win32-ia32": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.12.tgz",
"integrity": "sha512-HkqnmmBoCbCwxUKKNPBixiWDGCpQGVsrQfJoVGYLPT41XWF8lHuE5N6WhVia2n4o5QK5M4tYr21827fNhi4byQ==",
"cpu": [
"ia32"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@esbuild/win32-x64": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.12.tgz",
"integrity": "sha512-alJC0uCZpTFrSL0CCDjcgleBXPnCrEAhTBILpeAp7M/OFgoqtAetfBzX0xM00MUsVVPpVjlPuMbREqnZCXaTnA==",
"cpu": [
"x64"
],
"dev": true,
"license": "MIT",
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=18"
}
},
"node_modules/@hono/node-server": {
"version": "1.19.13",
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.13.tgz",
"integrity": "sha512-TsQLe4i2gvoTtrHje625ngThGBySOgSK3Xo2XRYOdqGN1teR8+I7vchQC46uLJi8OF62YTYA3AhSpumtkhsaKQ==",
"license": "MIT",
"engines": {
"node": ">=18.14.1"
},
"peerDependencies": {
"hono": "^4"
}
},
"node_modules/@types/node": {
"version": "20.19.25",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
"dev": true,
"license": "MIT",
"dependencies": {
"undici-types": "~6.21.0"
}
},
"node_modules/esbuild": {
"version": "0.25.12",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.12.tgz",
"integrity": "sha512-bbPBYYrtZbkt6Os6FiTLCTFxvq4tt3JKall1vRwshA3fdVztsLAatFaZobhkBC8/BrPetoa0oksYoKXoG4ryJg==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"bin": {
"esbuild": "bin/esbuild"
},
"engines": {
"node": ">=18"
},
"optionalDependencies": {
"@esbuild/aix-ppc64": "0.25.12",
"@esbuild/android-arm": "0.25.12",
"@esbuild/android-arm64": "0.25.12",
"@esbuild/android-x64": "0.25.12",
"@esbuild/darwin-arm64": "0.25.12",
"@esbuild/darwin-x64": "0.25.12",
"@esbuild/freebsd-arm64": "0.25.12",
"@esbuild/freebsd-x64": "0.25.12",
"@esbuild/linux-arm": "0.25.12",
"@esbuild/linux-arm64": "0.25.12",
"@esbuild/linux-ia32": "0.25.12",
"@esbuild/linux-loong64": "0.25.12",
"@esbuild/linux-mips64el": "0.25.12",
"@esbuild/linux-ppc64": "0.25.12",
"@esbuild/linux-riscv64": "0.25.12",
"@esbuild/linux-s390x": "0.25.12",
"@esbuild/linux-x64": "0.25.12",
"@esbuild/netbsd-arm64": "0.25.12",
"@esbuild/netbsd-x64": "0.25.12",
"@esbuild/openbsd-arm64": "0.25.12",
"@esbuild/openbsd-x64": "0.25.12",
"@esbuild/openharmony-arm64": "0.25.12",
"@esbuild/sunos-x64": "0.25.12",
"@esbuild/win32-arm64": "0.25.12",
"@esbuild/win32-ia32": "0.25.12",
"@esbuild/win32-x64": "0.25.12"
}
},
"node_modules/fsevents": {
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/get-tsconfig": {
"version": "4.14.0",
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.14.0.tgz",
"integrity": "sha512-yTb+8DXzDREzgvYmh6s9vHsSVCHeC0G3PI5bEXNBHtmshPnO+S5O7qgLEOn0I5QvMy6kpZN8K1NKGyilLb93wA==",
"dev": true,
"license": "MIT",
"dependencies": {
"resolve-pkg-maps": "^1.0.0"
},
"funding": {
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
}
},
"node_modules/hono": {
"version": "4.12.16",
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.16.tgz",
"integrity": "sha512-jN0ZewiNAWSe5khM3EyCmBb250+b40wWbwNILNfEvq84VREWwOIkuUsFONk/3i3nqkz7Oe1PcpM2mwQEK2L9Kg==",
"license": "MIT",
"engines": {
"node": ">=16.9.0"
}
},
"node_modules/resolve-pkg-maps": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
"dev": true,
"license": "MIT",
"funding": {
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
}
},
"node_modules/tsx": {
"version": "4.20.6",
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.20.6.tgz",
"integrity": "sha512-ytQKuwgmrrkDTFP4LjR0ToE2nqgy886GpvRSpU0JAnrdBYppuY5rLkRUYPU1yCryb24SsKBTL/hlDQAEFVwtZg==",
"dev": true,
"license": "MIT",
"dependencies": {
"esbuild": "~0.25.0",
"get-tsconfig": "^4.7.5"
},
"bin": {
"tsx": "dist/cli.mjs"
},
"engines": {
"node": ">=18.0.0"
},
"optionalDependencies": {
"fsevents": "~2.3.3"
}
},
"node_modules/undici-types": {
"version": "6.21.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
"dev": true,
"license": "MIT"
}
}
}

View File

@ -1,13 +0,0 @@
{
"scripts": {
"dev": "tsx watch src/index.ts"
},
"dependencies": {
"@hono/node-server": "1.19.13",
"hono": "4.12.16"
},
"devDependencies": {
"@types/node": "20.19.25",
"tsx": "4.20.6"
}
}

View File

@ -1,29 +0,0 @@
generator client {
provider = "prisma-client-js"
}
datasource client {
provider = "postgresql"
url = env("DATABASE_URL")
}
model LiteLLM_SpendLogs {
request_id String @id
call_type String
api_key String @default("")
spend Float @default(0.0)
total_tokens Int @default(0)
prompt_tokens Int @default(0)
completion_tokens Int @default(0)
startTime DateTime
endTime DateTime
model String @default("")
api_base String @default("")
user String @default("")
metadata Json @default("{}")
cache_hit String @default("")
cache_key String @default("")
request_tags Json @default("[]")
team_id String?
end_user String?
}

View File

@ -1,32 +0,0 @@
export type LiteLLM_IncrementSpend = {
key_transactions: Array<LiteLLM_IncrementObject>, // [{"key": spend},..]
user_transactions: Array<LiteLLM_IncrementObject>,
team_transactions: Array<LiteLLM_IncrementObject>,
spend_logs_transactions: Array<LiteLLM_SpendLogs>
}
export type LiteLLM_IncrementObject = {
key: string,
spend: number
}
export type LiteLLM_SpendLogs = {
request_id: string; // @id means it's a unique identifier
call_type: string;
api_key: string; // @default("") means it defaults to an empty string if not provided
spend: number; // Float in Prisma corresponds to number in TypeScript
total_tokens: number; // Int in Prisma corresponds to number in TypeScript
prompt_tokens: number;
completion_tokens: number;
startTime: Date; // DateTime in Prisma corresponds to Date in TypeScript
endTime: Date;
model: string; // @default("") means it defaults to an empty string if not provided
api_base: string;
user: string;
metadata: any; // Json type in Prisma is represented by any in TypeScript; could also use a more specific type if the structure of JSON is known
cache_hit: string;
cache_key: string;
request_tags: any; // Similarly, this could be an array or a more specific type depending on the expected structure
team_id?: string | null; // ? indicates it's optional and can be undefined, but could also be null if not provided
end_user?: string | null;
};

View File

@ -1,84 +0,0 @@
import { serve } from '@hono/node-server'
import { Hono } from 'hono'
import { PrismaClient } from '@prisma/client'
import {LiteLLM_SpendLogs, LiteLLM_IncrementSpend, LiteLLM_IncrementObject} from './_types'
const app = new Hono()
const prisma = new PrismaClient()
// In-memory storage for logs
let spend_logs: LiteLLM_SpendLogs[] = [];
const key_logs: LiteLLM_IncrementObject[] = [];
const user_logs: LiteLLM_IncrementObject[] = [];
const transaction_logs: LiteLLM_IncrementObject[] = [];
app.get('/', (c) => {
return c.text('Hello Hono!')
})
const MIN_LOGS = 1; // Minimum number of logs needed to initiate a flush
const FLUSH_INTERVAL = 5000; // Time in ms to wait before trying to flush again
const BATCH_SIZE = 100; // Preferred size of each batch to write to the database
const MAX_LOGS_PER_INTERVAL = 1000; // Maximum number of logs to flush in a single interval
const flushLogsToDb = async () => {
if (spend_logs.length >= MIN_LOGS) {
// Limit the logs to process in this interval to MAX_LOGS_PER_INTERVAL or less
const logsToProcess = spend_logs.slice(0, MAX_LOGS_PER_INTERVAL);
for (let i = 0; i < logsToProcess.length; i += BATCH_SIZE) {
// Create subarray for current batch, ensuring it doesn't exceed the BATCH_SIZE
const batch = logsToProcess.slice(i, i + BATCH_SIZE);
// Convert datetime strings to Date objects
const batchWithDates = batch.map(entry => ({
...entry,
startTime: new Date(entry.startTime),
endTime: new Date(entry.endTime),
// Repeat for any other DateTime fields you may have
}));
await prisma.liteLLM_SpendLogs.createMany({
data: batchWithDates,
});
console.log(`Flushed ${batch.length} logs to the DB.`);
}
// Remove the processed logs from spend_logs
spend_logs = spend_logs.slice(logsToProcess.length);
console.log(`${logsToProcess.length} logs processed. Remaining in queue: ${spend_logs.length}`);
} else {
// This will ensure it doesn't falsely claim "No logs to flush." when it's merely below the MIN_LOGS threshold.
if(spend_logs.length > 0) {
console.log(`Accumulating logs. Currently at ${spend_logs.length}, waiting for at least ${MIN_LOGS}.`);
} else {
console.log("No logs to flush.");
}
}
};
// Setup interval for attempting to flush the logs
setInterval(flushLogsToDb, FLUSH_INTERVAL);
// Route to receive log messages
app.post('/spend/update', async (c) => {
const incomingLogs = await c.req.json<LiteLLM_SpendLogs[]>();
spend_logs.push(...incomingLogs);
console.log(`Received and stored ${incomingLogs.length} logs. Total logs in memory: ${spend_logs.length}`);
return c.json({ message: `Successfully stored ${incomingLogs.length} logs` });
});
const port = 3000
console.log(`Server is running on port ${port}`)
serve({
fetch: app.fetch,
port
})

View File

@ -1,13 +0,0 @@
{
"compilerOptions": {
"target": "ESNext",
"module": "ESNext",
"moduleResolution": "Bundler",
"strict": true,
"types": [
"node"
],
"jsx": "react-jsx",
"jsxImportSource": "hono/jsx",
}
}

View File

@ -2849,7 +2849,7 @@ def _can_object_call_model(
object_type=object_type
),
param="model",
code=status.HTTP_401_UNAUTHORIZED,
code=status.HTTP_403_FORBIDDEN,
)
@ -3082,7 +3082,7 @@ async def can_user_call_model(
message=f"User not allowed to access model. No default model access, only team models allowed. Tried to access {model}",
type=ProxyErrorTypes.key_model_access_denied,
param="model",
code=status.HTTP_401_UNAUTHORIZED,
code=status.HTTP_403_FORBIDDEN,
)
return _can_object_call_model(
@ -3625,7 +3625,7 @@ async def _check_team_member_model_access(
message=f"Team member not allowed to access model. User={valid_token.user_id}, Team={team_object.team_id}, Model={model}. Allowed member models = {member_allowed_models}",
type=ProxyErrorTypes.team_model_access_denied,
param="model",
code=status.HTTP_401_UNAUTHORIZED,
code=status.HTTP_403_FORBIDDEN,
)

View File

@ -123,7 +123,7 @@ class UserAPIKeyAuthExceptionHandler:
message=e.message,
type=ProxyErrorTypes.budget_exceeded,
param=None,
code=400,
code=getattr(e, "status_code", status.HTTP_429_TOO_MANY_REQUESTS),
)
if isinstance(e, HTTPException):
raise ProxyException(

View File

@ -1107,7 +1107,7 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
raise ProxyException(
message=f"Authentication Error - Expired Key. Key Expiry time {expiry_time} and current time {current_time}",
type=ProxyErrorTypes.expired_key,
code=400,
code=status.HTTP_401_UNAUTHORIZED,
param=abbreviate_api_key(api_key=api_key),
)
valid_token = update_valid_token_with_end_user_params(
@ -1432,7 +1432,7 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
raise ProxyException(
message=f"Authentication Error - Expired Key. Key Expiry time {expiry_time} and current time {current_time}",
type=ProxyErrorTypes.expired_key,
code=400,
code=status.HTTP_401_UNAUTHORIZED,
param=abbreviate_api_key(api_key=api_key),
)
@ -2417,7 +2417,7 @@ async def _run_post_custom_auth_checks(
raise ProxyException(
message=f"Authentication Error - Expired Key. Key Expiry time {expiry_time} and current time {current_time}",
type=ProxyErrorTypes.expired_key,
code=400,
code=status.HTTP_401_UNAUTHORIZED,
param=(
abbreviate_api_key(api_key=valid_token.token)
if valid_token.token

View File

@ -2,7 +2,7 @@ import asyncio
import json
import time
from datetime import datetime, timezone
from typing import Any, List, Literal, Optional, Union
from typing import Any, Callable, List, Literal, Optional, Union
import litellm
from litellm._logging import verbose_proxy_logger
@ -83,93 +83,139 @@ class ResetBudgetJob:
"Failed to reset spend counter %s: %s", counter_key, e
)
@staticmethod
async def _invalidate_user_api_key_cache_entry(cache_key: str) -> None:
"""Drop a stale management-cache entry so the next read fetches from DB.
Some entity types (notably tags and end-users) are not handled by
SpendCounterReseed.from_db, so when a spend counter expires the
budget check falls back to ``cached_obj.spend``. If that cached
object lingers in ``user_api_key_cache`` past a budget reset, the
stale ``.spend`` keeps the entity blocked indefinitely. Deleting
the cache entry forces the next auth-time fetch to reload the
zeroed row from Postgres.
"""
try:
from litellm.proxy.proxy_server import user_api_key_cache
await user_api_key_cache.async_delete_cache(key=cache_key)
except Exception as e:
verbose_proxy_logger.warning(
"Failed to invalidate user_api_key_cache entry %s: %s",
cache_key,
e,
)
async def _cascade_reset_spend_for_budget_link(
self,
budgets_to_reset: List[LiteLLM_BudgetTableFull],
table: Any,
counter_key_fn: Callable[[Any], str],
log_subject: str,
extra_where: Optional[dict] = None,
cache_key_fn: Optional[Callable[[Any], str]] = None,
):
"""
Generic cascade: zero spend on rows whose budget_id is in the reset set.
``cache_key_fn`` is optional: when provided, after the DB update each
matching row's entry in ``user_api_key_cache`` is also dropped. This
is required for entities whose spend counter is read with the cached
object's ``.spend`` as fallback (tags, end-users) — otherwise the
stale cached object pins enforcement to the pre-reset spend until
its TTL expires.
"""
budget_ids = [b.budget_id for b in budgets_to_reset if b.budget_id is not None]
if not budget_ids:
return
where: dict = {"budget_id": {"in": budget_ids}}
if extra_where:
where.update(extra_where)
try:
rows = await table.find_many(where=where)
except Exception as e:
rows = []
verbose_proxy_logger.warning(
"Failed to fetch %s for counter invalidation: %s", log_subject, e
)
update_result = await table.update_many(where=where, data={"spend": 0})
for row in rows:
await self._invalidate_spend_counter(counter_key_fn(row))
if cache_key_fn is not None:
await self._invalidate_user_api_key_cache_entry(cache_key_fn(row))
return update_result
async def reset_budget_for_litellm_team_members(
self, budgets_to_reset: List[LiteLLM_BudgetTableFull]
):
"""
Resets the budget for all LiteLLM Team Members if their budget has expired
"""
budget_ids = [
budget.budget_id
for budget in budgets_to_reset
if budget.budget_id is not None
]
try:
memberships = await self.prisma_client.db.litellm_teammembership.find_many(
where={"budget_id": {"in": budget_ids}}
)
except Exception as e:
memberships = []
verbose_proxy_logger.warning(
"Failed to fetch team memberships for counter invalidation: %s", e
)
update_result = await self.prisma_client.db.litellm_teammembership.update_many(
where={"budget_id": {"in": budget_ids}},
data={
"spend": 0,
},
return await self._cascade_reset_spend_for_budget_link(
budgets_to_reset=budgets_to_reset,
table=self.prisma_client.db.litellm_teammembership,
counter_key_fn=lambda m: f"spend:team_member:{m.user_id}:{m.team_id}",
log_subject="team memberships",
)
for m in memberships:
await self._invalidate_spend_counter(
f"spend:team_member:{m.user_id}:{m.team_id}"
)
return update_result
async def reset_budget_for_keys_linked_to_budgets(
self, budgets_to_reset: List[LiteLLM_BudgetTableFull]
):
"""
Resets the spend for keys linked to budget tiers that are being reset.
This handles keys that have budget_id but no budget_duration set on the key
itself. Keys with budget_id rely on their linked budget tier's reset schedule
rather than having their own budget_duration.
Keys that have their own budget_duration are already handled by
reset_budget_for_litellm_keys() and are excluded here to avoid
double-resetting.
Excludes keys with their own budget_duration; those are reset by
reset_budget_for_litellm_keys() to avoid double-resetting.
"""
budget_ids = [
budget.budget_id
for budget in budgets_to_reset
if budget.budget_id is not None
]
if not budget_ids:
return
where_clause: dict = {
"budget_id": {"in": budget_ids},
"budget_duration": None, # only keys without their own reset schedule
"spend": {"gt": 0}, # only reset keys that have accumulated spend
}
try:
keys = await self.prisma_client.db.litellm_verificationtoken.find_many(
where=where_clause
)
except Exception as e:
keys = []
verbose_proxy_logger.warning(
"Failed to fetch keys for counter invalidation: %s", e
)
update_result = (
await self.prisma_client.db.litellm_verificationtoken.update_many(
where=where_clause,
data={
"spend": 0,
},
)
return await self._cascade_reset_spend_for_budget_link(
budgets_to_reset=budgets_to_reset,
table=self.prisma_client.db.litellm_verificationtoken,
counter_key_fn=lambda k: f"spend:key:{k.token}",
log_subject="keys",
extra_where={"budget_duration": None, "spend": {"gt": 0}},
)
for k in keys:
await self._invalidate_spend_counter(f"spend:key:{k.token}")
async def reset_budget_for_orgs_linked_to_budgets(
self, budgets_to_reset: List[LiteLLM_BudgetTableFull]
):
"""
Resets the spend for orgs linked to budget tiers that are being reset.
"""
return await self._cascade_reset_spend_for_budget_link(
budgets_to_reset=budgets_to_reset,
table=self.prisma_client.db.litellm_organizationtable,
counter_key_fn=lambda o: f"spend:org:{o.organization_id}",
log_subject="orgs",
extra_where={"spend": {"gt": 0}},
)
return update_result
async def reset_budget_for_tags_linked_to_budgets(
self, budgets_to_reset: List[LiteLLM_BudgetTableFull]
):
"""
Resets the spend for tags linked to budget tiers that are being reset.
Also drops each tag's ``user_api_key_cache`` entry so the next
``_tag_max_budget_check`` reloads the zeroed row from the DB.
``SpendCounterReseed.from_db`` intentionally returns ``None`` for
tags, so the budget check falls back to the cached
``LiteLLM_TagTable.spend`` once the spend counter expires; without
this invalidation, that stale ``.spend`` keeps the tag over-budget
indefinitely.
"""
return await self._cascade_reset_spend_for_budget_link(
budgets_to_reset=budgets_to_reset,
table=self.prisma_client.db.litellm_tagtable,
counter_key_fn=lambda t: f"spend:tag:{t.tag_name}",
log_subject="tags",
extra_where={"spend": {"gt": 0}},
cache_key_fn=lambda t: f"tag:{t.tag_name}",
)
async def reset_budget_for_litellm_budget_table(self):
"""
@ -237,6 +283,14 @@ class ResetBudgetJob:
budgets_to_reset=budgets_to_reset
)
await self.reset_budget_for_orgs_linked_to_budgets(
budgets_to_reset=budgets_to_reset
)
await self.reset_budget_for_tags_linked_to_budgets(
budgets_to_reset=budgets_to_reset
)
if endusers_to_reset is not None and len(endusers_to_reset) > 0:
for enduser in endusers_to_reset:
try:

View File

@ -211,6 +211,7 @@ from litellm import Router
from litellm._logging import verbose_proxy_logger, verbose_router_logger
from litellm.caching.caching import DualCache, RedisCache
from litellm.caching.redis_cluster_cache import RedisClusterCache
from litellm.proxy.common_utils.timezone_utils import get_budget_reset_time
from litellm.proxy.common_utils.user_api_key_cache import UserApiKeyCache
from litellm.constants import (
_REALTIME_BODY_CACHE_SIZE,
@ -6750,27 +6751,64 @@ class ProxyStartupEvent:
"budget_duration not set on Proxy. budget_duration is required to use max_budget."
)
# add proxy budget to db in the user table
asyncio.create_task(
generate_key_helper_fn( # type: ignore
request_type="user",
table_name="user",
user_id=litellm_proxy_budget_name,
duration=None,
models=[],
aliases={},
config={},
spend=0,
max_budget=litellm.max_budget,
budget_duration=litellm.budget_duration,
query_type="update_data",
update_key_values={
"max_budget": litellm.max_budget,
"budget_duration": litellm.budget_duration,
},
)
cls._upsert_proxy_budget_with_reset_at_backfill(litellm_proxy_budget_name)
)
@classmethod
async def _upsert_proxy_budget_with_reset_at_backfill(
cls, litellm_proxy_budget_name: str
) -> None:
"""
Upsert the proxy admin user row with the configured max_budget /
budget_duration, then backfill budget_reset_at if currently NULL.
The backfill uses `WHERE budget_reset_at IS NULL` so it only fires
when the row pre-existed without a reset schedule (e.g. row created
via a different path before the proxy budget was configured). On
subsequent restarts it no-ops, so an active reset window is never
slid forward.
"""
await generate_key_helper_fn( # type: ignore
request_type="user",
table_name="user",
user_id=litellm_proxy_budget_name,
duration=None,
models=[],
aliases={},
config={},
spend=0,
max_budget=litellm.max_budget,
budget_duration=litellm.budget_duration,
query_type="update_data",
update_key_values={
"max_budget": litellm.max_budget,
"budget_duration": litellm.budget_duration,
},
)
# Without this, the upsert leaves budget_reset_at=NULL on rows that
# took the UPDATE path, and reset_budget_for_litellm_users never
# matches them (NULL < now() is unknown in SQL) — so the proxy-wide
# spend cap blocks forever once it's hit.
if prisma_client is not None and litellm.budget_duration is not None:
try:
await prisma_client.db.litellm_usertable.update_many(
where={
"user_id": litellm_proxy_budget_name,
"budget_reset_at": None,
},
data={
"budget_reset_at": get_budget_reset_time(
budget_duration=litellm.budget_duration
)
},
)
except Exception as e:
verbose_proxy_logger.warning(
"Failed to backfill budget_reset_at on proxy admin row: %s", e
)
@classmethod
async def _warm_global_spend_cache(
cls,

View File

@ -22,7 +22,7 @@ dependencies = [
"importlib-metadata>=8.0.0,<9.0",
"tokenizers>=0.21.0,<1.0",
"click>=8.0.0,<9.0",
"jinja2>=3.1.0,<4.0",
"jinja2>=3.1.6,<4.0",
"aiohttp>=3.10,<4.0",
"pydantic>=2.10.0,<3.0.0",
"jsonschema>=4.0.0,<5.0",

View File

@ -233,6 +233,12 @@ async def test_reset_budget_endusers_partial_failure():
prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
return_value={"count": 0}
)
# Mock db.litellm_organizationtable.update_many (used by reset_budget_for_orgs_linked_to_budgets)
prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
return_value={"count": 0}
)
# Mock db.litellm_tagtable.update_many (used by reset_budget_for_tags_linked_to_budgets)
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})
proxy_logging_obj = MagicMock()
proxy_logging_obj.service_logging_obj = MagicMock()
@ -400,6 +406,12 @@ async def test_reset_budget_continues_other_categories_on_failure():
prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
return_value={"count": 0}
)
# Mock db.litellm_organizationtable.update_many (used by reset_budget_for_orgs_linked_to_budgets)
prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
return_value={"count": 0}
)
# Mock db.litellm_tagtable.update_many (used by reset_budget_for_tags_linked_to_budgets)
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})
proxy_logging_obj = MagicMock()
proxy_logging_obj.service_logging_obj = MagicMock()
@ -884,6 +896,12 @@ async def test_service_logger_endusers_success():
prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
return_value={"count": 0}
)
# Mock db.litellm_organizationtable.update_many (used by reset_budget_for_orgs_linked_to_budgets)
prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
return_value={"count": 0}
)
# Mock db.litellm_tagtable.update_many (used by reset_budget_for_tags_linked_to_budgets)
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})
proxy_logging_obj = MagicMock()
proxy_logging_obj.service_logging_obj = MagicMock()
@ -966,6 +984,12 @@ async def test_service_logger_endusers_failure():
prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
return_value={"count": 0}
)
# Mock db.litellm_organizationtable.update_many (used by reset_budget_for_orgs_linked_to_budgets)
prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
return_value={"count": 0}
)
# Mock db.litellm_tagtable.update_many (used by reset_budget_for_tags_linked_to_budgets)
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})
proxy_logging_obj = MagicMock()
proxy_logging_obj.service_logging_obj = MagicMock()
@ -1060,6 +1084,10 @@ async def test_reset_budget_for_litellm_team_members_called():
prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
return_value={"count": 0}
)
prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
return_value={"count": 0}
)
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})
proxy_logging_obj = MagicMock()
proxy_logging_obj.service_logging_obj = MagicMock()

View File

@ -25,8 +25,8 @@ async def make_calls_until_budget_exceeded(session, key: str, call_function, **k
# Check error structure and values that should be consistent
assert (
error_dict["code"] == "400"
), f"Expected error code 400, got: {error_dict['code']}"
error_dict["code"] == "429"
), f"Expected error code 429, got: {error_dict['code']}"
assert (
error_dict["type"] == "budget_exceeded"
), f"Expected error type budget_exceeded, got: {error_dict['type']}"

View File

@ -99,7 +99,7 @@ async def test_model_access_patterns(key_models, test_model, expect_success):
# Assert error structure and values
assert _error_body["type"] == "key_model_access_denied"
assert _error_body["param"] == "model"
assert _error_body["code"] == "401"
assert _error_body["code"] == "403"
assert "key not allowed to access model" in _error_body["message"]
@ -297,7 +297,7 @@ def _validate_model_access_exception(
# Assert error structure and values
assert _error_body["type"] == expected_type
assert _error_body["param"] == "model"
assert _error_body["code"] == "401"
assert _error_body["code"] == "403"
if expected_type == "key_model_access_denied":
assert "key not allowed to access model" in _error_body["message"]
elif expected_type == "team_model_access_denied":

View File

@ -12,6 +12,7 @@ from datetime import datetime, timedelta
import httpx
import pytest
from fastapi import status
import litellm
from litellm.proxy._types import (
@ -31,6 +32,7 @@ from litellm.proxy._types import (
)
from litellm.proxy.auth.auth_checks import (
ExperimentalUIJWTToken,
_can_object_call_model,
_can_object_call_vector_stores,
_check_end_user_budget,
_check_team_member_budget,
@ -206,6 +208,52 @@ def test_get_key_object_from_ui_hash_key_invalid():
assert key_object is None
@pytest.mark.parametrize(
"object_type,expected_error_type",
[
("key", ProxyErrorTypes.key_model_access_denied),
("team", ProxyErrorTypes.team_model_access_denied),
("user", ProxyErrorTypes.user_model_access_denied),
("org", ProxyErrorTypes.org_model_access_denied),
("project", ProxyErrorTypes.project_model_access_denied),
],
)
def test_can_object_call_model_denials_return_forbidden(
object_type, expected_error_type
):
with pytest.raises(ProxyException) as exc_info:
_can_object_call_model(
model="restricted-model",
llm_router=None,
models=["allowed-model"],
object_type=object_type,
)
assert exc_info.value.type == expected_error_type
assert int(exc_info.value.code) == status.HTTP_403_FORBIDDEN
@pytest.mark.asyncio
async def test_can_user_call_model_no_default_models_returns_forbidden():
from litellm.proxy._types import SpecialModelNames
from litellm.proxy.auth.auth_checks import can_user_call_model
user_object = LiteLLM_UserTable(
user_id="test-user",
models=[SpecialModelNames.no_default_models.value],
)
with pytest.raises(ProxyException) as exc_info:
await can_user_call_model(
model="restricted-model",
llm_router=None,
user_object=user_object,
)
assert exc_info.value.type == ProxyErrorTypes.key_model_access_denied
assert int(exc_info.value.code) == status.HTTP_403_FORBIDDEN
@pytest.mark.asyncio
async def test_get_key_object_should_reconnect_once_on_db_connection_error():
mock_prisma_client = MagicMock()
@ -1144,6 +1192,7 @@ async def test_check_team_member_model_access_denied_model():
proxy_logging_obj=MagicMock(),
)
assert exc_info.value.type == ProxyErrorTypes.team_model_access_denied
assert int(exc_info.value.code) == status.HTTP_403_FORBIDDEN
@pytest.mark.asyncio

View File

@ -140,6 +140,7 @@ async def test_handle_authentication_error_budget_exceeded():
)
assert exc_info.value.type == ProxyErrorTypes.budget_exceeded
assert int(exc_info.value.code) == status.HTTP_429_TOO_MANY_REQUESTS
@pytest.mark.asyncio

View File

@ -1,6 +1,7 @@
import json
import os
import sys
from datetime import datetime, timedelta
from types import SimpleNamespace
from unittest.mock import ANY, AsyncMock, MagicMock, patch
@ -9,6 +10,7 @@ sys.path.insert(
) # Adds the parent directory to the system path
import pytest
from fastapi import status
import litellm
import litellm.proxy.proxy_server
@ -178,6 +180,26 @@ async def test_custom_auth_does_not_enforce_key_model_access_by_default():
mock_can_key.assert_not_awaited()
@pytest.mark.asyncio
async def test_post_custom_auth_expired_key_returns_unauthorized():
expired_token = UserAPIKeyAuth(
token="test_token",
expires=datetime.now() - timedelta(minutes=1),
)
with pytest.raises(ProxyException) as exc_info:
await _run_post_custom_auth_checks(
valid_token=expired_token,
request=MagicMock(),
request_data={},
route="/v1/chat/completions",
parent_otel_span=None,
)
assert exc_info.value.type == ProxyErrorTypes.expired_key
assert int(exc_info.value.code) == status.HTTP_401_UNAUTHORIZED
@pytest.mark.asyncio
async def test_custom_auth_honors_key_level_model_access_restriction_allowed_with_opt_in():
valid_token = UserAPIKeyAuth(token="test_token", models=["gpt-4o-mini"])
@ -934,6 +956,7 @@ async def test_proxy_admin_expired_key_from_cache():
assert (
exc_info.value.type == ProxyErrorTypes.expired_key
), f"Expected expired_key error type, got {exc_info.value.type}"
assert int(exc_info.value.code) == status.HTTP_401_UNAUTHORIZED
assert "Expired Key" in str(
exc_info.value.message
), f"Exception message should mention 'Expired Key', got: {exc_info.value.message}"

View File

@ -39,6 +39,46 @@ class MockLiteLLMVerificationToken:
return {"count": 1}
class MockLiteLLMOrganizationTable:
def __init__(self):
self.update_many_calls: List[Dict[str, Any]] = []
self.find_many_calls: List[Dict[str, Any]] = []
self._find_many_results: List[Any] = []
def set_find_many_results(self, results: List[Any]):
self._find_many_results = results
async def find_many(self, where: Dict[str, Any]) -> List[Any]:
self.find_many_calls.append({"where": where})
return self._find_many_results
async def update_many(
self, where: Dict[str, Any], data: Dict[str, Any]
) -> Dict[str, Any]:
self.update_many_calls.append({"where": where, "data": data})
return {"count": 1}
class MockLiteLLMTagTable:
def __init__(self):
self.update_many_calls: List[Dict[str, Any]] = []
self.find_many_calls: List[Dict[str, Any]] = []
self._find_many_results: List[Any] = []
def set_find_many_results(self, results: List[Any]):
self._find_many_results = results
async def find_many(self, where: Dict[str, Any]) -> List[Any]:
self.find_many_calls.append({"where": where})
return self._find_many_results
async def update_many(
self, where: Dict[str, Any], data: Dict[str, Any]
) -> Dict[str, Any]:
self.update_many_calls.append({"where": where, "data": data})
return {"count": 1}
class MockLiteLLMEndUserTable:
def __init__(self):
self.find_many_calls: List[Dict[str, Any]] = []
@ -57,6 +97,8 @@ class MockDB:
self.litellm_teammembership = MockLiteLLMTeamMembership()
self.litellm_verificationtoken = MockLiteLLMVerificationToken()
self.litellm_endusertable = MockLiteLLMEndUserTable()
self.litellm_organizationtable = MockLiteLLMOrganizationTable()
self.litellm_tagtable = MockLiteLLMTagTable()
class MockPrismaClient:
@ -459,6 +501,100 @@ def test_reset_budget_for_keys_linked_to_budgets_empty(
assert len(calls) == 0
def test_reset_budget_for_orgs_linked_to_budgets(reset_budget_job, mock_prisma_client):
"""
Test that when a budget tier is reset, orgs linked to that budget
(via budget_id) also get their spend reset.
"""
now = datetime.now(timezone.utc)
test_budget = type(
"LiteLLM_BudgetTableFull",
(),
{
"max_budget": 100.0,
"budget_duration": "30d",
"budget_reset_at": now - timedelta(hours=1),
"budget_id": "30d-org-budget",
"created_at": now - timedelta(days=30),
},
)
asyncio.run(
reset_budget_job.reset_budget_for_orgs_linked_to_budgets(
budgets_to_reset=[test_budget]
)
)
calls = mock_prisma_client.db.litellm_organizationtable.update_many_calls
assert len(calls) == 1
call = calls[0]
assert call["where"]["budget_id"] == {"in": ["30d-org-budget"]}
assert call["where"]["spend"] == {"gt": 0}
assert call["data"]["spend"] == 0
def test_reset_budget_for_orgs_linked_to_budgets_empty(
reset_budget_job, mock_prisma_client
):
"""
Test that when there are no budgets to reset, no update is performed
on the organization table.
"""
asyncio.run(
reset_budget_job.reset_budget_for_orgs_linked_to_budgets(budgets_to_reset=[])
)
calls = mock_prisma_client.db.litellm_organizationtable.update_many_calls
assert len(calls) == 0
def test_reset_budget_for_tags_linked_to_budgets(reset_budget_job, mock_prisma_client):
"""
Test that when a budget tier is reset, tags linked to that budget
(via budget_id) also get their spend reset.
"""
now = datetime.now(timezone.utc)
test_budget = type(
"LiteLLM_BudgetTableFull",
(),
{
"max_budget": 50.0,
"budget_duration": "30d",
"budget_reset_at": now - timedelta(hours=1),
"budget_id": "30d-tag-budget",
"created_at": now - timedelta(days=30),
},
)
asyncio.run(
reset_budget_job.reset_budget_for_tags_linked_to_budgets(
budgets_to_reset=[test_budget]
)
)
calls = mock_prisma_client.db.litellm_tagtable.update_many_calls
assert len(calls) == 1
call = calls[0]
assert call["where"]["budget_id"] == {"in": ["30d-tag-budget"]}
assert call["where"]["spend"] == {"gt": 0}
assert call["data"]["spend"] == 0
def test_reset_budget_for_tags_linked_to_budgets_empty(
reset_budget_job, mock_prisma_client
):
"""
Test that when there are no budgets to reset, no update is performed
on the tag table.
"""
asyncio.run(
reset_budget_job.reset_budget_for_tags_linked_to_budgets(budgets_to_reset=[])
)
calls = mock_prisma_client.db.litellm_tagtable.update_many_calls
assert len(calls) == 0
@pytest.mark.parametrize(
"budget_duration, expected_day, expected_month",
[
@ -618,6 +754,75 @@ def test_budget_table_reset_also_resets_linked_keys(
assert calls[0]["data"]["spend"] == 0
def test_budget_table_reset_also_resets_linked_orgs(
reset_budget_job, mock_prisma_client
):
"""
Integration-style test: when reset_budget_for_litellm_budget_table runs,
it should also reset spend for orgs linked to the expiring budget tiers
(in addition to end-users, team members, and keys).
"""
now = datetime.now(timezone.utc)
test_budget = type(
"LiteLLM_BudgetTableFull",
(),
{
"max_budget": 100.0,
"budget_duration": "30d",
"budget_reset_at": now - timedelta(hours=1),
"budget_id": "30d-org-budget",
"created_at": now - timedelta(days=30),
},
)
mock_prisma_client.data["budget"] = [test_budget]
asyncio.run(reset_budget_job.reset_budget_for_litellm_budget_table())
calls = mock_prisma_client.db.litellm_organizationtable.update_many_calls
assert len(calls) == 1, (
"Expected reset_budget_for_litellm_budget_table to also reset orgs "
f"linked to expiring budgets, but got {len(calls)} update_many calls"
)
assert calls[0]["where"]["budget_id"] == {"in": ["30d-org-budget"]}
assert calls[0]["data"]["spend"] == 0
def test_budget_table_reset_also_resets_linked_tags(
reset_budget_job, mock_prisma_client
):
"""
Integration-style test: when reset_budget_for_litellm_budget_table runs,
it should also reset spend for tags linked to the expiring budget tiers.
"""
now = datetime.now(timezone.utc)
test_budget = type(
"LiteLLM_BudgetTableFull",
(),
{
"max_budget": 50.0,
"budget_duration": "30d",
"budget_reset_at": now - timedelta(hours=1),
"budget_id": "30d-tag-budget",
"created_at": now - timedelta(days=30),
},
)
mock_prisma_client.data["budget"] = [test_budget]
asyncio.run(reset_budget_job.reset_budget_for_litellm_budget_table())
calls = mock_prisma_client.db.litellm_tagtable.update_many_calls
assert len(calls) == 1, (
"Expected reset_budget_for_litellm_budget_table to also reset tags "
f"linked to expiring budgets, but got {len(calls)} update_many calls"
)
assert calls[0]["where"]["budget_id"] == {"in": ["30d-tag-budget"]}
assert calls[0]["data"]["spend"] == 0
def test_reset_budget_resets_endusers_with_null_budget_id(
reset_budget_job, mock_prisma_client
):
@ -1057,16 +1262,26 @@ def test_reset_budget_windows_query_error_does_not_break_team_path(monkeypatch):
def _make_counter_invalidation_job(monkeypatch):
"""Stub spend_counter_cache so we can observe invalidation calls."""
"""Stub spend_counter_cache (and user_api_key_cache) so we can observe
invalidation calls.
Both caches are looked up via ``from litellm.proxy.proxy_server import
<name>`` inside the reset job, so we publish them on a fake module.
"""
spend_counter_cache = MagicMock()
spend_counter_cache.in_memory_cache.set_cache = MagicMock()
spend_counter_cache.redis_cache = MagicMock()
spend_counter_cache.redis_cache.async_set_cache = AsyncMock()
user_api_key_cache = MagicMock()
user_api_key_cache.async_delete_cache = AsyncMock()
fake_module = types.ModuleType("litellm.proxy.proxy_server")
fake_module.spend_counter_cache = spend_counter_cache
fake_module.user_api_key_cache = user_api_key_cache
monkeypatch.setitem(sys.modules, "litellm.proxy.proxy_server", fake_module)
spend_counter_cache.user_api_key_cache = user_api_key_cache
return spend_counter_cache
@ -1205,3 +1420,136 @@ def test_reset_budget_for_keys_linked_to_budgets_invalidates_redis_counter(monke
counter_cache.in_memory_cache.set_cache.assert_any_call(
key="spend:key:sk-linked", value=0.0, ttl=60
)
def test_reset_budget_for_orgs_linked_to_budgets_invalidates_redis_counter(monkeypatch):
"""Resetting orgs via budget tier must clear each linked org's counter."""
counter_cache = _make_counter_invalidation_job(monkeypatch)
expired_budget = type("B", (), {"budget_id": "budget-1"})
linked_org = type("Org", (), {"organization_id": "org-acme"})
prisma_client = MagicMock()
prisma_client.db.litellm_organizationtable.find_many = AsyncMock(
return_value=[linked_org]
)
prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
return_value={"count": 1}
)
job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
asyncio.run(job.reset_budget_for_orgs_linked_to_budgets([expired_budget]))
counter_cache.in_memory_cache.set_cache.assert_any_call(
key="spend:org:org-acme", value=0.0, ttl=60
)
counter_cache.redis_cache.async_set_cache.assert_any_await(
key="spend:org:org-acme", value=0.0, ttl=60
)
def test_reset_budget_for_tags_linked_to_budgets_invalidates_redis_counter(monkeypatch):
"""Resetting tags via budget tier must clear each linked tag's counter."""
counter_cache = _make_counter_invalidation_job(monkeypatch)
expired_budget = type("B", (), {"budget_id": "budget-1"})
linked_tag = type("Tag", (), {"tag_name": "tenant-42"})
prisma_client = MagicMock()
prisma_client.db.litellm_tagtable.find_many = AsyncMock(return_value=[linked_tag])
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 1})
job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
asyncio.run(job.reset_budget_for_tags_linked_to_budgets([expired_budget]))
counter_cache.in_memory_cache.set_cache.assert_any_call(
key="spend:tag:tenant-42", value=0.0, ttl=60
)
counter_cache.redis_cache.async_set_cache.assert_any_await(
key="spend:tag:tenant-42", value=0.0, ttl=60
)
def test_reset_budget_for_tags_linked_to_budgets_invalidates_management_cache(
monkeypatch,
):
"""Regression guard for the bug where tag spend stayed frozen across cycles.
``SpendCounterReseed.from_db`` returns ``None`` for ``spend:tag:*`` keys,
so once the spend counter expires the tag budget check falls back to the
cached ``LiteLLM_TagTable.spend``. If we don't drop the management cache
entry on reset, that cached object lingers (TTL 60s) with the pre-reset
spend, and ``_tag_max_budget_check`` keeps returning HTTP 400 even though
the DB row has been zeroed.
"""
counter_cache = _make_counter_invalidation_job(monkeypatch)
expired_budget = type("B", (), {"budget_id": "budget-1"})
linked_tag = type("Tag", (), {"tag_name": "tenant-42"})
prisma_client = MagicMock()
prisma_client.db.litellm_tagtable.find_many = AsyncMock(return_value=[linked_tag])
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 1})
job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
asyncio.run(job.reset_budget_for_tags_linked_to_budgets([expired_budget]))
counter_cache.user_api_key_cache.async_delete_cache.assert_any_await(
key="tag:tenant-42"
)
def test_reset_budget_for_tags_linked_to_budgets_invalidates_each_tag_management_cache(
monkeypatch,
):
"""When multiple tags share the expired budget tier, every one of them
has its ``user_api_key_cache`` entry dropped not just the first."""
counter_cache = _make_counter_invalidation_job(monkeypatch)
expired_budget = type("B", (), {"budget_id": "budget-1"})
linked_tags = [
type("Tag", (), {"tag_name": "tenant-a"}),
type("Tag", (), {"tag_name": "tenant-b"}),
type("Tag", (), {"tag_name": "tenant-c"}),
]
prisma_client = MagicMock()
prisma_client.db.litellm_tagtable.find_many = AsyncMock(return_value=linked_tags)
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 3})
job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
asyncio.run(job.reset_budget_for_tags_linked_to_budgets([expired_budget]))
deleted_keys = {
call.kwargs.get("key")
for call in counter_cache.user_api_key_cache.async_delete_cache.await_args_list
}
assert deleted_keys == {"tag:tenant-a", "tag:tenant-b", "tag:tenant-c"}
def test_reset_budget_for_keys_linked_to_budgets_does_not_touch_management_cache(
monkeypatch,
):
"""Cache invalidation is opt-in: keys / orgs / team-members rely on
``SpendCounterReseed.from_db`` (which DOES handle their counter keys),
so the cache_key_fn hook is intentionally not wired for them. This test
locks in that no-op so a future refactor doesn't accidentally start
clobbering the key cache (which would cost an extra DB round-trip per
reset cycle without fixing anything)."""
counter_cache = _make_counter_invalidation_job(monkeypatch)
expired_budget = type("B", (), {"budget_id": "budget-1"})
linked_key = type("Key", (), {"token": "sk-linked"})
prisma_client = MagicMock()
prisma_client.db.litellm_verificationtoken.find_many = AsyncMock(
return_value=[linked_key]
)
prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
return_value={"count": 1}
)
job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
asyncio.run(job.reset_budget_for_keys_linked_to_budgets([expired_budget]))
counter_cache.user_api_key_cache.async_delete_cache.assert_not_awaited()

View File

@ -1728,6 +1728,67 @@ async def test_add_proxy_budget_to_db_only_creates_user_no_keys():
assert call_args.kwargs["query_type"] == "update_data"
@pytest.mark.asyncio
async def test_add_proxy_budget_to_db_backfills_budget_reset_at():
"""
Test that _upsert_proxy_budget_with_reset_at_backfill issues a conditional
update_many with `WHERE budget_reset_at IS NULL` to backfill the column on
rows that pre-existed without a reset schedule. Without this, the proxy
admin row stays at NULL and reset_budget_for_litellm_users never matches
it (NULL < now() is unknown in SQL), so the global proxy budget never
resets.
"""
from unittest.mock import AsyncMock, MagicMock, patch
import litellm
from litellm.proxy.proxy_server import ProxyStartupEvent
litellm.budget_duration = "30d"
litellm.max_budget = 100.0
litellm_proxy_budget_name = "litellm-proxy-budget"
mock_prisma = MagicMock()
mock_prisma.db.litellm_usertable.update_many = AsyncMock(return_value={"count": 1})
mock_generate_key_helper = AsyncMock(
return_value={
"user_id": litellm_proxy_budget_name,
"max_budget": 100.0,
"budget_duration": "30d",
"spend": 0,
"models": [],
}
)
with (
patch(
"litellm.proxy.proxy_server.generate_key_helper_fn",
mock_generate_key_helper,
),
patch("litellm.proxy.proxy_server.prisma_client", mock_prisma),
):
await ProxyStartupEvent._upsert_proxy_budget_with_reset_at_backfill(
litellm_proxy_budget_name
)
# Upsert ran with the configured budget
mock_generate_key_helper.assert_called_once()
# Backfill update_many ran with the conditional WHERE
mock_prisma.db.litellm_usertable.update_many.assert_called_once()
backfill_call = mock_prisma.db.litellm_usertable.update_many.call_args
assert backfill_call.kwargs["where"]["user_id"] == litellm_proxy_budget_name
assert backfill_call.kwargs["where"]["budget_reset_at"] is None
# The backfilled value must be a real future datetime — anything else and
# reset_budget_for_litellm_users would still skip the row.
from datetime import datetime, timezone
backfilled_reset_at = backfill_call.kwargs["data"]["budget_reset_at"]
assert isinstance(backfilled_reset_at, datetime)
assert backfilled_reset_at > datetime.now(timezone.utc)
@pytest.mark.asyncio
async def test_custom_ui_sso_sign_in_handler_config_loading():
"""

View File

@ -303,7 +303,7 @@ async def test_chat_completion():
api_key=key_gen["key"],
api_version="2024-02-15-preview",
)
with pytest.raises(openai.AuthenticationError) as e:
with pytest.raises(openai.PermissionDeniedError) as e:
response = await azure_client.chat.completions.create(
model="gpt-4",
messages=[{"role": "user", "content": "Hello!"}],

View File

@ -302,14 +302,14 @@ async def test_user_model_access():
model="good-model",
)
with pytest.raises(openai.AuthenticationError):
with pytest.raises(openai.PermissionDeniedError):
await chat_completion(
session=session,
key=key,
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
)
with pytest.raises(openai.AuthenticationError):
with pytest.raises(openai.PermissionDeniedError):
await chat_completion(
session=session,
key=key,

2
uv.lock generated
View File

@ -3405,7 +3405,7 @@ requires-dist = [
{ name = "gunicorn", marker = "extra == 'proxy'", specifier = "==23.0.0" },
{ name = "httpx", specifier = ">=0.28.0,<1.0" },
{ name = "importlib-metadata", specifier = ">=8.0.0,<9.0" },
{ name = "jinja2", specifier = ">=3.1.0,<4.0" },
{ name = "jinja2", specifier = ">=3.1.6,<4.0" },
{ name = "jsonschema", specifier = ">=4.0.0,<5.0" },
{ name = "langfuse", marker = "extra == 'proxy-runtime'", specifier = "==2.59.7" },
{ name = "litellm-enterprise", marker = "extra == 'proxy'", editable = "enterprise" },