Fix: tag budget reset must drop stale management-cache entry (#27568)
Squash-merged by litellm-agent from oss-agent-shin's PR.
This commit is contained in:
parent
c7739c9ed5
commit
9f68d2bb77
21
AGENTS.md
21
AGENTS.md
@ -241,10 +241,27 @@ When opening issues or pull requests, follow these templates:
|
||||
|
||||
### Running the proxy server
|
||||
|
||||
Start the proxy with a config file:
|
||||
Create a minimal config file and start the proxy:
|
||||
|
||||
```yaml
|
||||
# config.yaml
|
||||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake-model
|
||||
api_key: fake-key
|
||||
api_base: https://fake-api.example.com
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
|
||||
litellm_settings:
|
||||
drop_params: True
|
||||
telemetry: False
|
||||
```
|
||||
|
||||
```bash
|
||||
uv run litellm --config dev_config.yaml --port 4000
|
||||
uv run litellm --config config.yaml --port 4000
|
||||
```
|
||||
|
||||
The proxy takes ~15-20 seconds to fully start (it runs Prisma migrations on boot). Wait for `/health` to return before sending requests. Without a PostgreSQL `DATABASE_URL`, the proxy connects to a default Neon dev database embedded in the `litellm-proxy-extras` package.
|
||||
|
||||
@ -146,7 +146,7 @@ LiteLLM is a unified interface for 100+ LLM providers with two main components:
|
||||
- **Bound large result sets.** Prisma materializes full results in memory. For results over ~10 MB, paginate with `take`/`skip` or `cursor`/`take`, always with an explicit `order`. Prefer cursor-based pagination (`skip` is O(n)). Don't paginate naturally small result sets.
|
||||
- **Limit fetched columns on wide tables.** Use `select` to fetch only needed fields — returns a partial object, so downstream code must not access unselected fields.
|
||||
- **Check index coverage.** For new or modified queries, check `schema.prisma` for a supporting index. Prefer extending an existing index (e.g. `@@index([a])` → `@@index([a, b])`) over adding a new one, unless it's a `@@unique`. Only add indexes for large/frequent queries.
|
||||
- **Keep schema files in sync.** Apply schema changes to all `schema.prisma` copies (`schema.prisma`, `litellm/proxy/`, `litellm-proxy-extras/`, `litellm-js/spend-logs/` for SpendLogs) with a migration under `litellm-proxy-extras/litellm_proxy_extras/migrations/`.
|
||||
- **Keep schema files in sync.** Apply schema changes to all `schema.prisma` copies (`schema.prisma`, `litellm/proxy/`, `litellm-proxy-extras/`) with a migration under `litellm-proxy-extras/litellm_proxy_extras/migrations/`.
|
||||
|
||||
### Setup Wizard (`litellm/setup_wizard.py`)
|
||||
- The wizard is implemented as a single `SetupWizard` class with `@staticmethod` methods — keep it that way. No module-level functions except `run_setup_wizard()` (the public entrypoint) and pure helpers (color, ANSI).
|
||||
|
||||
@ -1,18 +0,0 @@
|
||||
# Use the provided base image
|
||||
FROM ghcr.io/berriai/litellm:main-latest@sha256:7c311546c25e7bb6e8cafede9fcd3d0d622ac636b5c9418befaa32e85dfb0186
|
||||
|
||||
# Set the working directory to /app
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the configuration file into the container at /app
|
||||
COPY config.yaml .
|
||||
|
||||
# Make sure your docker/entrypoint.sh is executable
|
||||
# Convert Windows line endings to Unix
|
||||
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
|
||||
|
||||
# Expose the necessary port
|
||||
EXPOSE 4000/tcp
|
||||
|
||||
# Override the CMD instruction with your desired command and arguments
|
||||
CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug", "--run_gunicorn"]
|
||||
@ -1,56 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: litellm-deployment
|
||||
spec:
|
||||
replicas: 3
|
||||
selector:
|
||||
matchLabels:
|
||||
app: litellm
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: litellm
|
||||
spec:
|
||||
containers:
|
||||
- name: litellm-container
|
||||
image: ghcr.io/berriai/litellm:main-latest
|
||||
imagePullPolicy: Always
|
||||
env:
|
||||
- name: AZURE_API_KEY
|
||||
value: "d6f****"
|
||||
- name: AZURE_API_BASE
|
||||
value: "https://openai"
|
||||
- name: LITELLM_MASTER_KEY
|
||||
value: "sk-1234"
|
||||
- name: DATABASE_URL
|
||||
value: "postgresql://ishaan*********"
|
||||
args:
|
||||
- "--config"
|
||||
- "/app/proxy_config.yaml" # Update the path to mount the config file
|
||||
volumeMounts: # Define volume mount for proxy_config.yaml
|
||||
- name: config-volume
|
||||
mountPath: /app
|
||||
readOnly: true
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health/liveliness
|
||||
port: 4000
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 15
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
timeoutSeconds: 10
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health/readiness
|
||||
port: 4000
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 15
|
||||
successThreshold: 1
|
||||
failureThreshold: 3
|
||||
timeoutSeconds: 10
|
||||
volumes: # Define volume to mount proxy_config.yaml
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: litellm-config
|
||||
@ -1,12 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: litellm-service
|
||||
spec:
|
||||
selector:
|
||||
app: litellm
|
||||
ports:
|
||||
- protocol: TCP
|
||||
port: 4000
|
||||
targetPort: 4000
|
||||
type: LoadBalancer
|
||||
@ -1,13 +0,0 @@
|
||||
model_list:
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/fake-model
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234
|
||||
|
||||
litellm_settings:
|
||||
drop_params: True
|
||||
telemetry: False
|
||||
@ -1,68 +0,0 @@
|
||||
# Base image for building
|
||||
ARG LITELLM_BUILD_IMAGE=python:3.11-alpine@sha256:f07e2ace46f560f09a6eeec7b4913b80ee99546e749ef82342a419a326620856
|
||||
|
||||
# Runtime image
|
||||
ARG LITELLM_RUNTIME_IMAGE=python:3.11-alpine@sha256:f07e2ace46f560f09a6eeec7b4913b80ee99546e749ef82342a419a326620856
|
||||
ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
|
||||
|
||||
FROM $UV_IMAGE AS uvbin
|
||||
|
||||
FROM $LITELLM_BUILD_IMAGE AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY --from=uvbin /uv /usr/local/bin/uv
|
||||
COPY --from=uvbin /uvx /usr/local/bin/uvx
|
||||
|
||||
RUN apk add --no-cache gcc python3-dev musl-dev nodejs npm libsndfile
|
||||
|
||||
ENV PRISMA_BINARY_CACHE_DIR=/app/.cache/prisma-python/binaries \
|
||||
UV_PROJECT_ENVIRONMENT=/app/.venv \
|
||||
UV_LINK_MODE=copy \
|
||||
XDG_CACHE_HOME=/app/.cache \
|
||||
PATH="/app/.venv/bin:${PATH}"
|
||||
|
||||
# Copy dependency metadata first for layer caching
|
||||
COPY pyproject.toml uv.lock ./
|
||||
COPY enterprise/pyproject.toml enterprise/
|
||||
COPY litellm-proxy-extras/pyproject.toml litellm-proxy-extras/
|
||||
|
||||
# Install third-party dependencies (cached unless pyproject.toml/uv.lock change)
|
||||
RUN uv sync --frozen --no-install-project --no-install-workspace --no-default-groups --no-editable \
|
||||
--extra proxy \
|
||||
--extra proxy-runtime \
|
||||
--extra extra_proxy \
|
||||
--extra semantic-router \
|
||||
--python python3
|
||||
|
||||
# Copy full source tree
|
||||
COPY . .
|
||||
|
||||
# Install project and workspace packages (fast - deps already cached)
|
||||
RUN uv sync --frozen --no-default-groups --no-editable \
|
||||
--extra proxy \
|
||||
--extra proxy-runtime \
|
||||
--extra extra_proxy \
|
||||
--extra semantic-router \
|
||||
--python python3
|
||||
|
||||
RUN prisma generate --schema=./schema.prisma
|
||||
|
||||
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh && \
|
||||
sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
|
||||
|
||||
FROM $LITELLM_RUNTIME_IMAGE AS runtime
|
||||
|
||||
RUN apk upgrade --no-cache && apk add --no-cache libsndfile nodejs npm
|
||||
|
||||
WORKDIR /app
|
||||
ENV PRISMA_BINARY_CACHE_DIR=/app/.cache/prisma-python/binaries \
|
||||
XDG_CACHE_HOME=/app/.cache \
|
||||
PATH="/app/.venv/bin:${PATH}"
|
||||
|
||||
COPY --from=builder /app /app
|
||||
|
||||
EXPOSE 4000/tcp
|
||||
|
||||
ENTRYPOINT ["docker/prod_entrypoint.sh"]
|
||||
CMD ["--port", "4000"]
|
||||
@ -1,86 +0,0 @@
|
||||
# Use the provided base image
|
||||
# NOTE: This is a dev/branch-specific tag. Update digest when the base image is rebuilt.
|
||||
FROM ghcr.io/berriai/litellm:litellm_fwd_server_root_path-dev
|
||||
|
||||
# Set the working directory to /app
|
||||
WORKDIR /app
|
||||
|
||||
# Install Node.js and npm (adjust version as needed)
|
||||
RUN apt-get update && apt-get upgrade -y \
|
||||
libxml2 \
|
||||
libexpat1 \
|
||||
openssl \
|
||||
libssl3 \
|
||||
git \
|
||||
libkrb5-3 \
|
||||
libglib2.0-0 \
|
||||
wget \
|
||||
libaom3 \
|
||||
libxslt1.1 \
|
||||
libgnutls30 \
|
||||
libc6 && \
|
||||
apt-get install -y --no-install-recommends nodejs npm && \
|
||||
npm install -g npm@11.12.1 tar@7.5.11 glob@11.1.0 @isaacs/brace-expansion@5.0.1 minimatch@10.2.4 diff@8.0.3 && \
|
||||
GLOBAL="$(npm root -g)" && \
|
||||
find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
|
||||
done && \
|
||||
find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
|
||||
done && \
|
||||
find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
|
||||
done && \
|
||||
find "$GLOBAL/npm" -type d -name "minimatch" -path "*/node_modules/minimatch" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/minimatch" "$d"; \
|
||||
done && \
|
||||
find "$GLOBAL/npm" -type d -name "diff" -path "*/node_modules/diff" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/diff" "$d"; \
|
||||
done && \
|
||||
find /usr/local/lib /usr/lib -path "*/node_modules/npm/package.json" -exec \
|
||||
sed -i 's/"tar": "\^7\.5\.[0-9]*"/"tar": "^7.5.10"/g; s/"minimatch": "\^10\.[0-9.]*"/"minimatch": "^10.2.4"/g' {} + 2>/dev/null && \
|
||||
npm cache clean --force && \
|
||||
apt-get purge -y npm
|
||||
|
||||
# Copy the UI source into the container
|
||||
COPY ./ui/litellm-dashboard /app/ui/litellm-dashboard
|
||||
|
||||
# Set an environment variable for UI_BASE_PATH
|
||||
# This can be overridden at build time
|
||||
# set UI_BASE_PATH to "<your server root path>/ui"
|
||||
ENV UI_BASE_PATH="/prod/ui"
|
||||
|
||||
# Build the UI with the specified UI_BASE_PATH
|
||||
WORKDIR /app/ui/litellm-dashboard
|
||||
RUN npm ci
|
||||
RUN UI_BASE_PATH=$UI_BASE_PATH npm run build
|
||||
|
||||
# Create the destination directory
|
||||
RUN mkdir -p /app/litellm/proxy/_experimental/out
|
||||
|
||||
# Move the built files to the appropriate location
|
||||
# Assuming the build output is in ./out directory
|
||||
RUN rm -rf /app/litellm/proxy/_experimental/out/* && \
|
||||
mv ./out/* /app/litellm/proxy/_experimental/out/
|
||||
|
||||
# Switch back to the main app directory
|
||||
WORKDIR /app
|
||||
|
||||
# Make sure your docker/entrypoint.sh is executable
|
||||
# Convert Windows line endings to Unix for entrypoint scripts
|
||||
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh
|
||||
RUN sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
|
||||
|
||||
# Run as non-root user
|
||||
RUN groupadd --gid 1000 appuser && useradd --uid 1000 --gid 1000 --no-create-home appuser \
|
||||
&& chown -R appuser:appuser /app
|
||||
USER appuser
|
||||
|
||||
# Expose the necessary port
|
||||
EXPOSE 4000/tcp
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
|
||||
CMD ["python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:4000/health')"]
|
||||
|
||||
# Override the CMD instruction with your desired command and arguments
|
||||
CMD ["--port", "4000", "--config", "config.yaml", "--detailed_debug"]
|
||||
@ -1,121 +0,0 @@
|
||||
# Base image for building
|
||||
ARG LITELLM_BUILD_IMAGE=python:3.13-slim@sha256:739e7213785e88c0f702dcdc12c0973afcbd606dbf021a589cab77d6b00b579d
|
||||
|
||||
# Runtime image
|
||||
ARG LITELLM_RUNTIME_IMAGE=python:3.13-slim@sha256:739e7213785e88c0f702dcdc12c0973afcbd606dbf021a589cab77d6b00b579d
|
||||
ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
|
||||
|
||||
FROM $UV_IMAGE AS uvbin
|
||||
|
||||
FROM $LITELLM_BUILD_IMAGE AS builder
|
||||
|
||||
WORKDIR /app
|
||||
USER root
|
||||
|
||||
COPY --from=uvbin /uv /usr/local/bin/uv
|
||||
COPY --from=uvbin /uvx /usr/local/bin/uvx
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
g++ \
|
||||
python3-dev \
|
||||
libssl-dev \
|
||||
pkg-config \
|
||||
nodejs \
|
||||
npm \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PRISMA_BINARY_CACHE_DIR=/app/.cache/prisma-python/binaries \
|
||||
UV_PROJECT_ENVIRONMENT=/app/.venv \
|
||||
UV_LINK_MODE=copy \
|
||||
XDG_CACHE_HOME=/app/.cache \
|
||||
PATH="/app/.venv/bin:${PATH}"
|
||||
|
||||
# Copy dependency metadata first for layer caching
|
||||
COPY pyproject.toml uv.lock ./
|
||||
COPY enterprise/pyproject.toml enterprise/
|
||||
COPY litellm-proxy-extras/pyproject.toml litellm-proxy-extras/
|
||||
|
||||
# Install third-party dependencies (cached unless pyproject.toml/uv.lock change)
|
||||
RUN uv sync --frozen --no-install-project --no-install-workspace --no-default-groups --no-editable \
|
||||
--extra proxy \
|
||||
--extra proxy-runtime \
|
||||
--extra extra_proxy \
|
||||
--extra semantic-router \
|
||||
--python python
|
||||
|
||||
# Copy full source tree
|
||||
COPY . .
|
||||
|
||||
# Build Admin UI before final sync
|
||||
RUN sed -i 's/\r$//' docker/build_admin_ui.sh && chmod +x docker/build_admin_ui.sh && ./docker/build_admin_ui.sh
|
||||
|
||||
# Install project and workspace packages (fast - deps already cached)
|
||||
RUN uv sync --frozen --no-default-groups --no-editable \
|
||||
--extra proxy \
|
||||
--extra proxy-runtime \
|
||||
--extra extra_proxy \
|
||||
--extra semantic-router \
|
||||
--python python
|
||||
|
||||
RUN prisma generate --schema=./schema.prisma
|
||||
|
||||
RUN sed -i 's/\r$//' docker/entrypoint.sh && chmod +x docker/entrypoint.sh && \
|
||||
sed -i 's/\r$//' docker/prod_entrypoint.sh && chmod +x docker/prod_entrypoint.sh
|
||||
|
||||
FROM $LITELLM_RUNTIME_IMAGE AS runtime
|
||||
|
||||
USER root
|
||||
|
||||
RUN apt-get update && apt-get upgrade -y \
|
||||
libxml2 \
|
||||
libexpat1 \
|
||||
openssl \
|
||||
libssl3 \
|
||||
git \
|
||||
libkrb5-3 \
|
||||
libglib2.0-0 \
|
||||
wget \
|
||||
libaom3 \
|
||||
libxslt1.1 \
|
||||
libgnutls30 \
|
||||
libc6 \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
libssl3 \
|
||||
libatomic1 \
|
||||
nodejs \
|
||||
npm \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& npm install -g npm@11.12.1 tar@7.5.11 glob@11.1.0 @isaacs/brace-expansion@5.0.1 minimatch@10.2.4 diff@8.0.3 \
|
||||
&& GLOBAL="$(npm root -g)" \
|
||||
&& find "$GLOBAL/npm" -type d -name "tar" -path "*/node_modules/tar" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/tar" "$d"; \
|
||||
done \
|
||||
&& find "$GLOBAL/npm" -type d -name "glob" -path "*/node_modules/glob" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/glob" "$d"; \
|
||||
done \
|
||||
&& find "$GLOBAL/npm" -type d -name "brace-expansion" -path "*/node_modules/@isaacs/brace-expansion" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/@isaacs/brace-expansion" "$d"; \
|
||||
done \
|
||||
&& find "$GLOBAL/npm" -type d -name "minimatch" -path "*/node_modules/minimatch" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/minimatch" "$d"; \
|
||||
done \
|
||||
&& find "$GLOBAL/npm" -type d -name "diff" -path "*/node_modules/diff" | while read d; do \
|
||||
rm -rf "$d" && cp -rL "$GLOBAL/diff" "$d"; \
|
||||
done \
|
||||
&& find /usr/local/lib /usr/lib -path "*/node_modules/npm/package.json" -exec \
|
||||
sed -i 's/"tar": "\^7\.5\.[0-9]*"/"tar": "^7.5.10"/g; s/"minimatch": "\^10\.[0-9.]*"/"minimatch": "^10.2.4"/g' {} + 2>/dev/null \
|
||||
&& npm cache clean --force \
|
||||
&& apt-get purge -y npm
|
||||
|
||||
WORKDIR /app
|
||||
ENV PRISMA_BINARY_CACHE_DIR=/app/.cache/prisma-python/binaries \
|
||||
XDG_CACHE_HOME=/app/.cache \
|
||||
PATH="/app/.venv/bin:${PATH}"
|
||||
|
||||
COPY --from=builder /app /app
|
||||
|
||||
EXPOSE 4000/tcp
|
||||
|
||||
ENTRYPOINT ["docker/prod_entrypoint.sh"]
|
||||
CMD ["--port", "4000"]
|
||||
@ -1,30 +0,0 @@
|
||||
ARG UV_IMAGE=ghcr.io/astral-sh/uv:0.11.7@sha256:240fb85ab0f263ef12f492d8476aa3a2e4e1e333f7d67fbdd923d00a506a516a
|
||||
FROM $UV_IMAGE AS uvbin
|
||||
|
||||
FROM python:3.13-slim@sha256:739e7213785e88c0f702dcdc12c0973afcbd606dbf021a589cab77d6b00b579d
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the uv binary and the health check script.
|
||||
COPY --from=uvbin /uv /usr/local/bin/uv
|
||||
COPY pyproject.toml uv.lock /app/
|
||||
COPY scripts/health_check/health_check_client.py /app/health_check_client.py
|
||||
|
||||
# Resolve and install the health-check dependencies from the project lockfile
|
||||
# so the runtime image stays self-contained and reproducible.
|
||||
RUN uv export --frozen --no-default-groups --only-group healthcheck --no-emit-project --no-hashes --output-file /tmp/health-check-requirements.txt \
|
||||
&& uv pip install --system -r /tmp/health-check-requirements.txt \
|
||||
&& rm /tmp/health-check-requirements.txt \
|
||||
&& rm /app/pyproject.toml /app/uv.lock \
|
||||
&& chmod +x /app/health_check_client.py
|
||||
|
||||
# Run as non-root user
|
||||
RUN groupadd --gid 1000 appuser && useradd --uid 1000 --gid 1000 --no-create-home appuser
|
||||
USER appuser
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
|
||||
CMD ["python", "/app/health_check_client.py", "--help"]
|
||||
|
||||
# Set entrypoint
|
||||
ENTRYPOINT ["python", "/app/health_check_client.py"]
|
||||
108
index.yaml
108
index.yaml
@ -1,108 +0,0 @@
|
||||
apiVersion: v1
|
||||
entries:
|
||||
litellm-helm:
|
||||
- apiVersion: v2
|
||||
appVersion: v1.43.18
|
||||
created: "2024-08-19T23:58:25.331689+08:00"
|
||||
dependencies:
|
||||
- condition: db.deployStandalone
|
||||
name: postgresql
|
||||
repository: oci://registry-1.docker.io/bitnamicharts
|
||||
version: '>=13.3.0'
|
||||
- condition: redis.enabled
|
||||
name: redis
|
||||
repository: oci://registry-1.docker.io/bitnamicharts
|
||||
version: '>=18.0.0'
|
||||
description: Call all LLM APIs using the OpenAI format
|
||||
digest: 0411df3dc42868be8af3ad3e00cb252790e6bd7ad15f5b77f1ca5214573a8531
|
||||
name: litellm-helm
|
||||
type: application
|
||||
urls:
|
||||
- https://berriai.github.io/litellm/litellm-helm-0.2.3.tgz
|
||||
version: 0.2.3
|
||||
postgresql:
|
||||
- annotations:
|
||||
category: Database
|
||||
images: |
|
||||
- name: os-shell
|
||||
image: docker.io/bitnami/os-shell:12-debian-12-r16
|
||||
- name: postgres-exporter
|
||||
image: docker.io/bitnami/postgres-exporter:0.15.0-debian-12-r14
|
||||
- name: postgresql
|
||||
image: docker.io/bitnami/postgresql:16.2.0-debian-12-r6
|
||||
licenses: Apache-2.0
|
||||
apiVersion: v2
|
||||
appVersion: 16.2.0
|
||||
created: "2024-08-19T23:58:25.335716+08:00"
|
||||
dependencies:
|
||||
- name: common
|
||||
repository: oci://registry-1.docker.io/bitnamicharts
|
||||
tags:
|
||||
- bitnami-common
|
||||
version: 2.x.x
|
||||
description: PostgreSQL (Postgres) is an open source object-relational database
|
||||
known for reliability and data integrity. ACID-compliant, it supports foreign
|
||||
keys, joins, views, triggers and stored procedures.
|
||||
digest: 3c8125526b06833df32e2f626db34aeaedb29d38f03d15349db6604027d4a167
|
||||
home: https://bitnami.com
|
||||
icon: https://bitnami.com/assets/stacks/postgresql/img/postgresql-stack-220x234.png
|
||||
keywords:
|
||||
- postgresql
|
||||
- postgres
|
||||
- database
|
||||
- sql
|
||||
- replication
|
||||
- cluster
|
||||
maintainers:
|
||||
- name: VMware, Inc.
|
||||
url: https://github.com/bitnami/charts
|
||||
name: postgresql
|
||||
sources:
|
||||
- https://github.com/bitnami/charts/tree/main/bitnami/postgresql
|
||||
urls:
|
||||
- https://berriai.github.io/litellm/charts/postgresql-14.3.1.tgz
|
||||
version: 14.3.1
|
||||
redis:
|
||||
- annotations:
|
||||
category: Database
|
||||
images: |
|
||||
- name: kubectl
|
||||
image: docker.io/bitnami/kubectl:1.29.2-debian-12-r3
|
||||
- name: os-shell
|
||||
image: docker.io/bitnami/os-shell:12-debian-12-r16
|
||||
- name: redis
|
||||
image: docker.io/bitnami/redis:7.2.4-debian-12-r9
|
||||
- name: redis-exporter
|
||||
image: docker.io/bitnami/redis-exporter:1.58.0-debian-12-r4
|
||||
- name: redis-sentinel
|
||||
image: docker.io/bitnami/redis-sentinel:7.2.4-debian-12-r7
|
||||
licenses: Apache-2.0
|
||||
apiVersion: v2
|
||||
appVersion: 7.2.4
|
||||
created: "2024-08-19T23:58:25.339392+08:00"
|
||||
dependencies:
|
||||
- name: common
|
||||
repository: oci://registry-1.docker.io/bitnamicharts
|
||||
tags:
|
||||
- bitnami-common
|
||||
version: 2.x.x
|
||||
description: Redis(R) is an open source, advanced key-value store. It is often
|
||||
referred to as a data structure server since keys can contain strings, hashes,
|
||||
lists, sets and sorted sets.
|
||||
digest: b2fa1835f673a18002ca864c54fadac3c33789b26f6c5e58e2851b0b14a8f984
|
||||
home: https://bitnami.com
|
||||
icon: https://bitnami.com/assets/stacks/redis/img/redis-stack-220x234.png
|
||||
keywords:
|
||||
- redis
|
||||
- keyvalue
|
||||
- database
|
||||
maintainers:
|
||||
- name: VMware, Inc.
|
||||
url: https://github.com/bitnami/charts
|
||||
name: redis
|
||||
sources:
|
||||
- https://github.com/bitnami/charts/tree/main/bitnami/redis
|
||||
urls:
|
||||
- https://berriai.github.io/litellm/charts/redis-18.19.1.tgz
|
||||
version: 18.19.1
|
||||
generated: "2024-08-19T23:58:25.322532+08:00"
|
||||
@ -1,5 +0,0 @@
|
||||
# Supply-chain hardening
|
||||
# Packages needing lifecycle scripts: npm rebuild <pkg>
|
||||
ignore-scripts=true
|
||||
# Protects local npm install only — npm ci (used in CI) ignores this
|
||||
min-release-age=3
|
||||
@ -1,8 +0,0 @@
|
||||
```
|
||||
npm install
|
||||
npm run dev
|
||||
```
|
||||
|
||||
```
|
||||
npm run deploy
|
||||
```
|
||||
2054
litellm-js/proxy/package-lock.json
generated
2054
litellm-js/proxy/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@ -1,14 +0,0 @@
|
||||
{
|
||||
"scripts": {
|
||||
"dev": "wrangler dev src/index.ts",
|
||||
"deploy": "wrangler deploy --minify src/index.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"hono": "4.12.16",
|
||||
"openai": "4.29.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@cloudflare/workers-types": "4.20260501.1",
|
||||
"wrangler": "4.87.0"
|
||||
}
|
||||
}
|
||||
@ -1,59 +0,0 @@
|
||||
import { Hono } from 'hono'
|
||||
import { Context } from 'hono';
|
||||
import { bearerAuth } from 'hono/bearer-auth'
|
||||
import OpenAI from "openai";
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: "sk-1234",
|
||||
baseURL: "https://openai-endpoint.ishaanjaffer0324.workers.dev"
|
||||
});
|
||||
|
||||
async function call_proxy() {
|
||||
const completion = await openai.chat.completions.create({
|
||||
messages: [{ role: "system", content: "You are a helpful assistant." }],
|
||||
model: "gpt-3.5-turbo",
|
||||
});
|
||||
|
||||
return completion
|
||||
}
|
||||
|
||||
const app = new Hono()
|
||||
|
||||
// Middleware for API Key Authentication
|
||||
const apiKeyAuth = async (c: Context, next: Function) => {
|
||||
const apiKey = c.req.header('Authorization');
|
||||
if (!apiKey || apiKey !== 'Bearer sk-1234') {
|
||||
return c.text('Unauthorized', 401);
|
||||
}
|
||||
await next();
|
||||
};
|
||||
|
||||
|
||||
app.use('/*', apiKeyAuth)
|
||||
|
||||
|
||||
app.get('/', (c) => {
|
||||
return c.text('Hello Hono!')
|
||||
})
|
||||
|
||||
|
||||
|
||||
|
||||
// Handler for chat completions
|
||||
const chatCompletionHandler = async (c: Context) => {
|
||||
// Assuming your logic for handling chat completion goes here
|
||||
// For demonstration, just returning a simple JSON response
|
||||
const response = await call_proxy()
|
||||
return c.json(response);
|
||||
};
|
||||
|
||||
// Register the above handler for different POST routes with the apiKeyAuth middleware
|
||||
app.post('/v1/chat/completions', chatCompletionHandler);
|
||||
app.post('/chat/completions', chatCompletionHandler);
|
||||
|
||||
// Example showing how you might handle dynamic segments within the URL
|
||||
// Here, using ':model*' to capture the rest of the path as a parameter 'model'
|
||||
app.post('/openai/deployments/:model*/chat/completions', chatCompletionHandler);
|
||||
|
||||
|
||||
export default app
|
||||
@ -1,17 +0,0 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "Bundler",
|
||||
"strict": true,
|
||||
"lib": [
|
||||
"ESNext"
|
||||
],
|
||||
"types": [
|
||||
"@cloudflare/workers-types"
|
||||
],
|
||||
"jsx": "react-jsx",
|
||||
"jsxImportSource": "hono/jsx",
|
||||
"skipLibCheck": true
|
||||
},
|
||||
}
|
||||
@ -1,18 +0,0 @@
|
||||
name = "my-app"
|
||||
compatibility_date = "2023-12-01"
|
||||
|
||||
# [vars]
|
||||
# MY_VAR = "my-variable"
|
||||
|
||||
# [[kv_namespaces]]
|
||||
# binding = "MY_KV_NAMESPACE"
|
||||
# id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
|
||||
|
||||
# [[r2_buckets]]
|
||||
# binding = "MY_BUCKET"
|
||||
# bucket_name = "my-bucket"
|
||||
|
||||
# [[d1_databases]]
|
||||
# binding = "DB"
|
||||
# database_name = "my-database"
|
||||
# database_id = ""
|
||||
@ -1,5 +0,0 @@
|
||||
# Supply-chain hardening
|
||||
# Packages needing lifecycle scripts: npm rebuild <pkg>
|
||||
ignore-scripts=true
|
||||
# Protects local npm install only — npm ci (used in CI) ignores this
|
||||
min-release-age=3
|
||||
@ -1,26 +0,0 @@
|
||||
# Use the specific Node.js v20.11.0 image
|
||||
FROM node:20.18.1-alpine3.20
|
||||
|
||||
# Set the working directory inside the container
|
||||
WORKDIR /app
|
||||
|
||||
# Copy package.json and package-lock.json to the working directory
|
||||
COPY ./litellm-js/spend-logs/package*.json ./
|
||||
|
||||
# Install dependencies
|
||||
RUN npm ci
|
||||
|
||||
# Install Prisma globally
|
||||
RUN npm install -g prisma
|
||||
|
||||
# Copy the rest of the application code
|
||||
COPY ./litellm-js/spend-logs .
|
||||
|
||||
# Generate Prisma client
|
||||
RUN npx prisma generate
|
||||
|
||||
# Expose the port that the Node.js server will run on
|
||||
EXPOSE 3000
|
||||
|
||||
# Command to run the Node.js app with npm run dev
|
||||
CMD ["npm", "run", "dev"]
|
||||
@ -1,8 +0,0 @@
|
||||
```
|
||||
npm install
|
||||
npm run dev
|
||||
```
|
||||
|
||||
```
|
||||
open http://localhost:3000
|
||||
```
|
||||
597
litellm-js/spend-logs/package-lock.json
generated
597
litellm-js/spend-logs/package-lock.json
generated
@ -1,597 +0,0 @@
|
||||
{
|
||||
"name": "spend-logs",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"dependencies": {
|
||||
"@hono/node-server": "1.19.13",
|
||||
"hono": "4.12.16"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "20.19.25",
|
||||
"tsx": "4.20.6"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/aix-ppc64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.12.tgz",
|
||||
"integrity": "sha512-Hhmwd6CInZ3dwpuGTF8fJG6yoWmsToE+vYgD4nytZVxcu1ulHpUQRAB1UJ8+N1Am3Mz4+xOByoQoSZf4D+CpkA==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"aix"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.12.tgz",
|
||||
"integrity": "sha512-VJ+sKvNA/GE7Ccacc9Cha7bpS8nyzVv0jdVgwNDaR4gDMC/2TTRc33Ip8qrNYUcpkOHUT5OZ0bUcNNVZQ9RLlg==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-arm64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.12.tgz",
|
||||
"integrity": "sha512-6AAmLG7zwD1Z159jCKPvAxZd4y/VTO0VkprYy+3N2FtJ8+BQWFXU+OxARIwA46c5tdD9SsKGZ/1ocqBS/gAKHg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/android-x64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.12.tgz",
|
||||
"integrity": "sha512-5jbb+2hhDHx5phYR2By8GTWEzn6I9UqR11Kwf22iKbNpYrsmRB18aX/9ivc5cabcUiAT/wM+YIZ6SG9QO6a8kg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"android"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-arm64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.12.tgz",
|
||||
"integrity": "sha512-N3zl+lxHCifgIlcMUP5016ESkeQjLj/959RxxNYIthIg+CQHInujFuXeWbWMgnTo4cp5XVHqFPmpyu9J65C1Yg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/darwin-x64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.12.tgz",
|
||||
"integrity": "sha512-HQ9ka4Kx21qHXwtlTUVbKJOAnmG1ipXhdWTmNXiPzPfWKpXqASVcWdnf2bnL73wgjNrFXAa3yYvBSd9pzfEIpA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-arm64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.12.tgz",
|
||||
"integrity": "sha512-gA0Bx759+7Jve03K1S0vkOu5Lg/85dou3EseOGUes8flVOGxbhDDh/iZaoek11Y8mtyKPGF3vP8XhnkDEAmzeg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/freebsd-x64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.12.tgz",
|
||||
"integrity": "sha512-TGbO26Yw2xsHzxtbVFGEXBFH0FRAP7gtcPE7P5yP7wGy7cXK2oO7RyOhL5NLiqTlBh47XhmIUXuGciXEqYFfBQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"freebsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.12.tgz",
|
||||
"integrity": "sha512-lPDGyC1JPDou8kGcywY0YILzWlhhnRjdof3UlcoqYmS9El818LLfJJc3PXXgZHrHCAKs/Z2SeZtDJr5MrkxtOw==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-arm64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.12.tgz",
|
||||
"integrity": "sha512-8bwX7a8FghIgrupcxb4aUmYDLp8pX06rGh5HqDT7bB+8Rdells6mHvrFHHW2JAOPZUbnjUpKTLg6ECyzvas2AQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ia32": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.12.tgz",
|
||||
"integrity": "sha512-0y9KrdVnbMM2/vG8KfU0byhUN+EFCny9+8g202gYqSSVMonbsCfLjUO+rCci7pM0WBEtz+oK/PIwHkzxkyharA==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-loong64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.12.tgz",
|
||||
"integrity": "sha512-h///Lr5a9rib/v1GGqXVGzjL4TMvVTv+s1DPoxQdz7l/AYv6LDSxdIwzxkrPW438oUXiDtwM10o9PmwS/6Z0Ng==",
|
||||
"cpu": [
|
||||
"loong64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-mips64el": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.12.tgz",
|
||||
"integrity": "sha512-iyRrM1Pzy9GFMDLsXn1iHUm18nhKnNMWscjmp4+hpafcZjrr2WbT//d20xaGljXDBYHqRcl8HnxbX6uaA/eGVw==",
|
||||
"cpu": [
|
||||
"mips64el"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-ppc64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.12.tgz",
|
||||
"integrity": "sha512-9meM/lRXxMi5PSUqEXRCtVjEZBGwB7P/D4yT8UG/mwIdze2aV4Vo6U5gD3+RsoHXKkHCfSxZKzmDssVlRj1QQA==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-riscv64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.12.tgz",
|
||||
"integrity": "sha512-Zr7KR4hgKUpWAwb1f3o5ygT04MzqVrGEGXGLnj15YQDJErYu/BGg+wmFlIDOdJp0PmB0lLvxFIOXZgFRrdjR0w==",
|
||||
"cpu": [
|
||||
"riscv64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-s390x": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.12.tgz",
|
||||
"integrity": "sha512-MsKncOcgTNvdtiISc/jZs/Zf8d0cl/t3gYWX8J9ubBnVOwlk65UIEEvgBORTiljloIWnBzLs4qhzPkJcitIzIg==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/linux-x64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.12.tgz",
|
||||
"integrity": "sha512-uqZMTLr/zR/ed4jIGnwSLkaHmPjOjJvnm6TVVitAa08SLS9Z0VM8wIRx7gWbJB5/J54YuIMInDquWyYvQLZkgw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/netbsd-arm64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.12.tgz",
|
||||
"integrity": "sha512-xXwcTq4GhRM7J9A8Gv5boanHhRa/Q9KLVmcyXHCTaM4wKfIpWkdXiMog/KsnxzJ0A1+nD+zoecuzqPmCRyBGjg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"netbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/netbsd-x64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.12.tgz",
|
||||
"integrity": "sha512-Ld5pTlzPy3YwGec4OuHh1aCVCRvOXdH8DgRjfDy/oumVovmuSzWfnSJg+VtakB9Cm0gxNO9BzWkj6mtO1FMXkQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"netbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openbsd-arm64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.12.tgz",
|
||||
"integrity": "sha512-fF96T6KsBo/pkQI950FARU9apGNTSlZGsv1jZBAlcLL1MLjLNIWPBkj5NlSz8aAzYKg+eNqknrUJ24QBybeR5A==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openbsd-x64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.12.tgz",
|
||||
"integrity": "sha512-MZyXUkZHjQxUvzK7rN8DJ3SRmrVrke8ZyRusHlP+kuwqTcfWLyqMOE3sScPPyeIXN/mDJIfGXvcMqCgYKekoQw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openbsd"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/openharmony-arm64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.25.12.tgz",
|
||||
"integrity": "sha512-rm0YWsqUSRrjncSXGA7Zv78Nbnw4XL6/dzr20cyrQf7ZmRcsovpcRBdhD43Nuk3y7XIoW2OxMVvwuRvk9XdASg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"openharmony"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/sunos-x64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.12.tgz",
|
||||
"integrity": "sha512-3wGSCDyuTHQUzt0nV7bocDy72r2lI33QL3gkDNGkod22EsYl04sMf0qLb8luNKTOmgF/eDEDP5BFNwoBKH441w==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"sunos"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-arm64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.12.tgz",
|
||||
"integrity": "sha512-rMmLrur64A7+DKlnSuwqUdRKyd3UE7oPJZmnljqEptesKM8wx9J8gx5u0+9Pq0fQQW8vqeKebwNXdfOyP+8Bsg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-ia32": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.12.tgz",
|
||||
"integrity": "sha512-HkqnmmBoCbCwxUKKNPBixiWDGCpQGVsrQfJoVGYLPT41XWF8lHuE5N6WhVia2n4o5QK5M4tYr21827fNhi4byQ==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@esbuild/win32-x64": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.12.tgz",
|
||||
"integrity": "sha512-alJC0uCZpTFrSL0CCDjcgleBXPnCrEAhTBILpeAp7M/OFgoqtAetfBzX0xM00MUsVVPpVjlPuMbREqnZCXaTnA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
}
|
||||
},
|
||||
"node_modules/@hono/node-server": {
|
||||
"version": "1.19.13",
|
||||
"resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.13.tgz",
|
||||
"integrity": "sha512-TsQLe4i2gvoTtrHje625ngThGBySOgSK3Xo2XRYOdqGN1teR8+I7vchQC46uLJi8OF62YTYA3AhSpumtkhsaKQ==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=18.14.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"hono": "^4"
|
||||
}
|
||||
},
|
||||
"node_modules/@types/node": {
|
||||
"version": "20.19.25",
|
||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.25.tgz",
|
||||
"integrity": "sha512-ZsJzA5thDQMSQO788d7IocwwQbI8B5OPzmqNvpf3NY/+MHDAS759Wo0gd2WQeXYt5AAAQjzcrTVC6SKCuYgoCQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"undici-types": "~6.21.0"
|
||||
}
|
||||
},
|
||||
"node_modules/esbuild": {
|
||||
"version": "0.25.12",
|
||||
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.12.tgz",
|
||||
"integrity": "sha512-bbPBYYrtZbkt6Os6FiTLCTFxvq4tt3JKall1vRwshA3fdVztsLAatFaZobhkBC8/BrPetoa0oksYoKXoG4ryJg==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"esbuild": "bin/esbuild"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@esbuild/aix-ppc64": "0.25.12",
|
||||
"@esbuild/android-arm": "0.25.12",
|
||||
"@esbuild/android-arm64": "0.25.12",
|
||||
"@esbuild/android-x64": "0.25.12",
|
||||
"@esbuild/darwin-arm64": "0.25.12",
|
||||
"@esbuild/darwin-x64": "0.25.12",
|
||||
"@esbuild/freebsd-arm64": "0.25.12",
|
||||
"@esbuild/freebsd-x64": "0.25.12",
|
||||
"@esbuild/linux-arm": "0.25.12",
|
||||
"@esbuild/linux-arm64": "0.25.12",
|
||||
"@esbuild/linux-ia32": "0.25.12",
|
||||
"@esbuild/linux-loong64": "0.25.12",
|
||||
"@esbuild/linux-mips64el": "0.25.12",
|
||||
"@esbuild/linux-ppc64": "0.25.12",
|
||||
"@esbuild/linux-riscv64": "0.25.12",
|
||||
"@esbuild/linux-s390x": "0.25.12",
|
||||
"@esbuild/linux-x64": "0.25.12",
|
||||
"@esbuild/netbsd-arm64": "0.25.12",
|
||||
"@esbuild/netbsd-x64": "0.25.12",
|
||||
"@esbuild/openbsd-arm64": "0.25.12",
|
||||
"@esbuild/openbsd-x64": "0.25.12",
|
||||
"@esbuild/openharmony-arm64": "0.25.12",
|
||||
"@esbuild/sunos-x64": "0.25.12",
|
||||
"@esbuild/win32-arm64": "0.25.12",
|
||||
"@esbuild/win32-ia32": "0.25.12",
|
||||
"@esbuild/win32-x64": "0.25.12"
|
||||
}
|
||||
},
|
||||
"node_modules/fsevents": {
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
|
||||
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/get-tsconfig": {
|
||||
"version": "4.14.0",
|
||||
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.14.0.tgz",
|
||||
"integrity": "sha512-yTb+8DXzDREzgvYmh6s9vHsSVCHeC0G3PI5bEXNBHtmshPnO+S5O7qgLEOn0I5QvMy6kpZN8K1NKGyilLb93wA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"resolve-pkg-maps": "^1.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/hono": {
|
||||
"version": "4.12.16",
|
||||
"resolved": "https://registry.npmjs.org/hono/-/hono-4.12.16.tgz",
|
||||
"integrity": "sha512-jN0ZewiNAWSe5khM3EyCmBb250+b40wWbwNILNfEvq84VREWwOIkuUsFONk/3i3nqkz7Oe1PcpM2mwQEK2L9Kg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=16.9.0"
|
||||
}
|
||||
},
|
||||
"node_modules/resolve-pkg-maps": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
|
||||
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"funding": {
|
||||
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/tsx": {
|
||||
"version": "4.20.6",
|
||||
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.20.6.tgz",
|
||||
"integrity": "sha512-ytQKuwgmrrkDTFP4LjR0ToE2nqgy886GpvRSpU0JAnrdBYppuY5rLkRUYPU1yCryb24SsKBTL/hlDQAEFVwtZg==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"esbuild": "~0.25.0",
|
||||
"get-tsconfig": "^4.7.5"
|
||||
},
|
||||
"bin": {
|
||||
"tsx": "dist/cli.mjs"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"fsevents": "~2.3.3"
|
||||
}
|
||||
},
|
||||
"node_modules/undici-types": {
|
||||
"version": "6.21.0",
|
||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,13 +0,0 @@
|
||||
{
|
||||
"scripts": {
|
||||
"dev": "tsx watch src/index.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"@hono/node-server": "1.19.13",
|
||||
"hono": "4.12.16"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "20.19.25",
|
||||
"tsx": "4.20.6"
|
||||
}
|
||||
}
|
||||
@ -1,29 +0,0 @@
|
||||
generator client {
|
||||
provider = "prisma-client-js"
|
||||
}
|
||||
|
||||
datasource client {
|
||||
provider = "postgresql"
|
||||
url = env("DATABASE_URL")
|
||||
}
|
||||
|
||||
model LiteLLM_SpendLogs {
|
||||
request_id String @id
|
||||
call_type String
|
||||
api_key String @default("")
|
||||
spend Float @default(0.0)
|
||||
total_tokens Int @default(0)
|
||||
prompt_tokens Int @default(0)
|
||||
completion_tokens Int @default(0)
|
||||
startTime DateTime
|
||||
endTime DateTime
|
||||
model String @default("")
|
||||
api_base String @default("")
|
||||
user String @default("")
|
||||
metadata Json @default("{}")
|
||||
cache_hit String @default("")
|
||||
cache_key String @default("")
|
||||
request_tags Json @default("[]")
|
||||
team_id String?
|
||||
end_user String?
|
||||
}
|
||||
@ -1,32 +0,0 @@
|
||||
export type LiteLLM_IncrementSpend = {
|
||||
key_transactions: Array<LiteLLM_IncrementObject>, // [{"key": spend},..]
|
||||
user_transactions: Array<LiteLLM_IncrementObject>,
|
||||
team_transactions: Array<LiteLLM_IncrementObject>,
|
||||
spend_logs_transactions: Array<LiteLLM_SpendLogs>
|
||||
}
|
||||
|
||||
export type LiteLLM_IncrementObject = {
|
||||
key: string,
|
||||
spend: number
|
||||
}
|
||||
|
||||
export type LiteLLM_SpendLogs = {
|
||||
request_id: string; // @id means it's a unique identifier
|
||||
call_type: string;
|
||||
api_key: string; // @default("") means it defaults to an empty string if not provided
|
||||
spend: number; // Float in Prisma corresponds to number in TypeScript
|
||||
total_tokens: number; // Int in Prisma corresponds to number in TypeScript
|
||||
prompt_tokens: number;
|
||||
completion_tokens: number;
|
||||
startTime: Date; // DateTime in Prisma corresponds to Date in TypeScript
|
||||
endTime: Date;
|
||||
model: string; // @default("") means it defaults to an empty string if not provided
|
||||
api_base: string;
|
||||
user: string;
|
||||
metadata: any; // Json type in Prisma is represented by any in TypeScript; could also use a more specific type if the structure of JSON is known
|
||||
cache_hit: string;
|
||||
cache_key: string;
|
||||
request_tags: any; // Similarly, this could be an array or a more specific type depending on the expected structure
|
||||
team_id?: string | null; // ? indicates it's optional and can be undefined, but could also be null if not provided
|
||||
end_user?: string | null;
|
||||
};
|
||||
@ -1,84 +0,0 @@
|
||||
import { serve } from '@hono/node-server'
|
||||
import { Hono } from 'hono'
|
||||
import { PrismaClient } from '@prisma/client'
|
||||
import {LiteLLM_SpendLogs, LiteLLM_IncrementSpend, LiteLLM_IncrementObject} from './_types'
|
||||
|
||||
const app = new Hono()
|
||||
const prisma = new PrismaClient()
|
||||
// In-memory storage for logs
|
||||
let spend_logs: LiteLLM_SpendLogs[] = [];
|
||||
const key_logs: LiteLLM_IncrementObject[] = [];
|
||||
const user_logs: LiteLLM_IncrementObject[] = [];
|
||||
const transaction_logs: LiteLLM_IncrementObject[] = [];
|
||||
|
||||
|
||||
app.get('/', (c) => {
|
||||
return c.text('Hello Hono!')
|
||||
})
|
||||
|
||||
const MIN_LOGS = 1; // Minimum number of logs needed to initiate a flush
|
||||
const FLUSH_INTERVAL = 5000; // Time in ms to wait before trying to flush again
|
||||
const BATCH_SIZE = 100; // Preferred size of each batch to write to the database
|
||||
const MAX_LOGS_PER_INTERVAL = 1000; // Maximum number of logs to flush in a single interval
|
||||
|
||||
const flushLogsToDb = async () => {
|
||||
if (spend_logs.length >= MIN_LOGS) {
|
||||
// Limit the logs to process in this interval to MAX_LOGS_PER_INTERVAL or less
|
||||
const logsToProcess = spend_logs.slice(0, MAX_LOGS_PER_INTERVAL);
|
||||
|
||||
for (let i = 0; i < logsToProcess.length; i += BATCH_SIZE) {
|
||||
// Create subarray for current batch, ensuring it doesn't exceed the BATCH_SIZE
|
||||
const batch = logsToProcess.slice(i, i + BATCH_SIZE);
|
||||
|
||||
// Convert datetime strings to Date objects
|
||||
const batchWithDates = batch.map(entry => ({
|
||||
...entry,
|
||||
startTime: new Date(entry.startTime),
|
||||
endTime: new Date(entry.endTime),
|
||||
// Repeat for any other DateTime fields you may have
|
||||
}));
|
||||
|
||||
await prisma.liteLLM_SpendLogs.createMany({
|
||||
data: batchWithDates,
|
||||
});
|
||||
|
||||
console.log(`Flushed ${batch.length} logs to the DB.`);
|
||||
}
|
||||
|
||||
// Remove the processed logs from spend_logs
|
||||
spend_logs = spend_logs.slice(logsToProcess.length);
|
||||
|
||||
console.log(`${logsToProcess.length} logs processed. Remaining in queue: ${spend_logs.length}`);
|
||||
} else {
|
||||
// This will ensure it doesn't falsely claim "No logs to flush." when it's merely below the MIN_LOGS threshold.
|
||||
if(spend_logs.length > 0) {
|
||||
console.log(`Accumulating logs. Currently at ${spend_logs.length}, waiting for at least ${MIN_LOGS}.`);
|
||||
} else {
|
||||
console.log("No logs to flush.");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Setup interval for attempting to flush the logs
|
||||
setInterval(flushLogsToDb, FLUSH_INTERVAL);
|
||||
|
||||
// Route to receive log messages
|
||||
app.post('/spend/update', async (c) => {
|
||||
const incomingLogs = await c.req.json<LiteLLM_SpendLogs[]>();
|
||||
|
||||
spend_logs.push(...incomingLogs);
|
||||
|
||||
console.log(`Received and stored ${incomingLogs.length} logs. Total logs in memory: ${spend_logs.length}`);
|
||||
|
||||
return c.json({ message: `Successfully stored ${incomingLogs.length} logs` });
|
||||
});
|
||||
|
||||
|
||||
|
||||
const port = 3000
|
||||
console.log(`Server is running on port ${port}`)
|
||||
|
||||
serve({
|
||||
fetch: app.fetch,
|
||||
port
|
||||
})
|
||||
@ -1,13 +0,0 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "ESNext",
|
||||
"module": "ESNext",
|
||||
"moduleResolution": "Bundler",
|
||||
"strict": true,
|
||||
"types": [
|
||||
"node"
|
||||
],
|
||||
"jsx": "react-jsx",
|
||||
"jsxImportSource": "hono/jsx",
|
||||
}
|
||||
}
|
||||
@ -2849,7 +2849,7 @@ def _can_object_call_model(
|
||||
object_type=object_type
|
||||
),
|
||||
param="model",
|
||||
code=status.HTTP_401_UNAUTHORIZED,
|
||||
code=status.HTTP_403_FORBIDDEN,
|
||||
)
|
||||
|
||||
|
||||
@ -3082,7 +3082,7 @@ async def can_user_call_model(
|
||||
message=f"User not allowed to access model. No default model access, only team models allowed. Tried to access {model}",
|
||||
type=ProxyErrorTypes.key_model_access_denied,
|
||||
param="model",
|
||||
code=status.HTTP_401_UNAUTHORIZED,
|
||||
code=status.HTTP_403_FORBIDDEN,
|
||||
)
|
||||
|
||||
return _can_object_call_model(
|
||||
@ -3625,7 +3625,7 @@ async def _check_team_member_model_access(
|
||||
message=f"Team member not allowed to access model. User={valid_token.user_id}, Team={team_object.team_id}, Model={model}. Allowed member models = {member_allowed_models}",
|
||||
type=ProxyErrorTypes.team_model_access_denied,
|
||||
param="model",
|
||||
code=status.HTTP_401_UNAUTHORIZED,
|
||||
code=status.HTTP_403_FORBIDDEN,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -123,7 +123,7 @@ class UserAPIKeyAuthExceptionHandler:
|
||||
message=e.message,
|
||||
type=ProxyErrorTypes.budget_exceeded,
|
||||
param=None,
|
||||
code=400,
|
||||
code=getattr(e, "status_code", status.HTTP_429_TOO_MANY_REQUESTS),
|
||||
)
|
||||
if isinstance(e, HTTPException):
|
||||
raise ProxyException(
|
||||
|
||||
@ -1107,7 +1107,7 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
|
||||
raise ProxyException(
|
||||
message=f"Authentication Error - Expired Key. Key Expiry time {expiry_time} and current time {current_time}",
|
||||
type=ProxyErrorTypes.expired_key,
|
||||
code=400,
|
||||
code=status.HTTP_401_UNAUTHORIZED,
|
||||
param=abbreviate_api_key(api_key=api_key),
|
||||
)
|
||||
valid_token = update_valid_token_with_end_user_params(
|
||||
@ -1432,7 +1432,7 @@ async def _user_api_key_auth_builder( # noqa: PLR0915
|
||||
raise ProxyException(
|
||||
message=f"Authentication Error - Expired Key. Key Expiry time {expiry_time} and current time {current_time}",
|
||||
type=ProxyErrorTypes.expired_key,
|
||||
code=400,
|
||||
code=status.HTTP_401_UNAUTHORIZED,
|
||||
param=abbreviate_api_key(api_key=api_key),
|
||||
)
|
||||
|
||||
@ -2417,7 +2417,7 @@ async def _run_post_custom_auth_checks(
|
||||
raise ProxyException(
|
||||
message=f"Authentication Error - Expired Key. Key Expiry time {expiry_time} and current time {current_time}",
|
||||
type=ProxyErrorTypes.expired_key,
|
||||
code=400,
|
||||
code=status.HTTP_401_UNAUTHORIZED,
|
||||
param=(
|
||||
abbreviate_api_key(api_key=valid_token.token)
|
||||
if valid_token.token
|
||||
|
||||
@ -2,7 +2,7 @@ import asyncio
|
||||
import json
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, List, Literal, Optional, Union
|
||||
from typing import Any, Callable, List, Literal, Optional, Union
|
||||
|
||||
import litellm
|
||||
from litellm._logging import verbose_proxy_logger
|
||||
@ -83,93 +83,139 @@ class ResetBudgetJob:
|
||||
"Failed to reset spend counter %s: %s", counter_key, e
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
async def _invalidate_user_api_key_cache_entry(cache_key: str) -> None:
|
||||
"""Drop a stale management-cache entry so the next read fetches from DB.
|
||||
|
||||
Some entity types (notably tags and end-users) are not handled by
|
||||
SpendCounterReseed.from_db, so when a spend counter expires the
|
||||
budget check falls back to ``cached_obj.spend``. If that cached
|
||||
object lingers in ``user_api_key_cache`` past a budget reset, the
|
||||
stale ``.spend`` keeps the entity blocked indefinitely. Deleting
|
||||
the cache entry forces the next auth-time fetch to reload the
|
||||
zeroed row from Postgres.
|
||||
"""
|
||||
try:
|
||||
from litellm.proxy.proxy_server import user_api_key_cache
|
||||
|
||||
await user_api_key_cache.async_delete_cache(key=cache_key)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.warning(
|
||||
"Failed to invalidate user_api_key_cache entry %s: %s",
|
||||
cache_key,
|
||||
e,
|
||||
)
|
||||
|
||||
async def _cascade_reset_spend_for_budget_link(
|
||||
self,
|
||||
budgets_to_reset: List[LiteLLM_BudgetTableFull],
|
||||
table: Any,
|
||||
counter_key_fn: Callable[[Any], str],
|
||||
log_subject: str,
|
||||
extra_where: Optional[dict] = None,
|
||||
cache_key_fn: Optional[Callable[[Any], str]] = None,
|
||||
):
|
||||
"""
|
||||
Generic cascade: zero spend on rows whose budget_id is in the reset set.
|
||||
|
||||
``cache_key_fn`` is optional: when provided, after the DB update each
|
||||
matching row's entry in ``user_api_key_cache`` is also dropped. This
|
||||
is required for entities whose spend counter is read with the cached
|
||||
object's ``.spend`` as fallback (tags, end-users) — otherwise the
|
||||
stale cached object pins enforcement to the pre-reset spend until
|
||||
its TTL expires.
|
||||
"""
|
||||
budget_ids = [b.budget_id for b in budgets_to_reset if b.budget_id is not None]
|
||||
if not budget_ids:
|
||||
return
|
||||
|
||||
where: dict = {"budget_id": {"in": budget_ids}}
|
||||
if extra_where:
|
||||
where.update(extra_where)
|
||||
|
||||
try:
|
||||
rows = await table.find_many(where=where)
|
||||
except Exception as e:
|
||||
rows = []
|
||||
verbose_proxy_logger.warning(
|
||||
"Failed to fetch %s for counter invalidation: %s", log_subject, e
|
||||
)
|
||||
|
||||
update_result = await table.update_many(where=where, data={"spend": 0})
|
||||
|
||||
for row in rows:
|
||||
await self._invalidate_spend_counter(counter_key_fn(row))
|
||||
if cache_key_fn is not None:
|
||||
await self._invalidate_user_api_key_cache_entry(cache_key_fn(row))
|
||||
|
||||
return update_result
|
||||
|
||||
async def reset_budget_for_litellm_team_members(
|
||||
self, budgets_to_reset: List[LiteLLM_BudgetTableFull]
|
||||
):
|
||||
"""
|
||||
Resets the budget for all LiteLLM Team Members if their budget has expired
|
||||
"""
|
||||
budget_ids = [
|
||||
budget.budget_id
|
||||
for budget in budgets_to_reset
|
||||
if budget.budget_id is not None
|
||||
]
|
||||
|
||||
try:
|
||||
memberships = await self.prisma_client.db.litellm_teammembership.find_many(
|
||||
where={"budget_id": {"in": budget_ids}}
|
||||
)
|
||||
except Exception as e:
|
||||
memberships = []
|
||||
verbose_proxy_logger.warning(
|
||||
"Failed to fetch team memberships for counter invalidation: %s", e
|
||||
)
|
||||
|
||||
update_result = await self.prisma_client.db.litellm_teammembership.update_many(
|
||||
where={"budget_id": {"in": budget_ids}},
|
||||
data={
|
||||
"spend": 0,
|
||||
},
|
||||
return await self._cascade_reset_spend_for_budget_link(
|
||||
budgets_to_reset=budgets_to_reset,
|
||||
table=self.prisma_client.db.litellm_teammembership,
|
||||
counter_key_fn=lambda m: f"spend:team_member:{m.user_id}:{m.team_id}",
|
||||
log_subject="team memberships",
|
||||
)
|
||||
|
||||
for m in memberships:
|
||||
await self._invalidate_spend_counter(
|
||||
f"spend:team_member:{m.user_id}:{m.team_id}"
|
||||
)
|
||||
|
||||
return update_result
|
||||
|
||||
async def reset_budget_for_keys_linked_to_budgets(
|
||||
self, budgets_to_reset: List[LiteLLM_BudgetTableFull]
|
||||
):
|
||||
"""
|
||||
Resets the spend for keys linked to budget tiers that are being reset.
|
||||
|
||||
This handles keys that have budget_id but no budget_duration set on the key
|
||||
itself. Keys with budget_id rely on their linked budget tier's reset schedule
|
||||
rather than having their own budget_duration.
|
||||
|
||||
Keys that have their own budget_duration are already handled by
|
||||
reset_budget_for_litellm_keys() and are excluded here to avoid
|
||||
double-resetting.
|
||||
Excludes keys with their own budget_duration; those are reset by
|
||||
reset_budget_for_litellm_keys() to avoid double-resetting.
|
||||
"""
|
||||
budget_ids = [
|
||||
budget.budget_id
|
||||
for budget in budgets_to_reset
|
||||
if budget.budget_id is not None
|
||||
]
|
||||
if not budget_ids:
|
||||
return
|
||||
|
||||
where_clause: dict = {
|
||||
"budget_id": {"in": budget_ids},
|
||||
"budget_duration": None, # only keys without their own reset schedule
|
||||
"spend": {"gt": 0}, # only reset keys that have accumulated spend
|
||||
}
|
||||
|
||||
try:
|
||||
keys = await self.prisma_client.db.litellm_verificationtoken.find_many(
|
||||
where=where_clause
|
||||
)
|
||||
except Exception as e:
|
||||
keys = []
|
||||
verbose_proxy_logger.warning(
|
||||
"Failed to fetch keys for counter invalidation: %s", e
|
||||
)
|
||||
|
||||
update_result = (
|
||||
await self.prisma_client.db.litellm_verificationtoken.update_many(
|
||||
where=where_clause,
|
||||
data={
|
||||
"spend": 0,
|
||||
},
|
||||
)
|
||||
return await self._cascade_reset_spend_for_budget_link(
|
||||
budgets_to_reset=budgets_to_reset,
|
||||
table=self.prisma_client.db.litellm_verificationtoken,
|
||||
counter_key_fn=lambda k: f"spend:key:{k.token}",
|
||||
log_subject="keys",
|
||||
extra_where={"budget_duration": None, "spend": {"gt": 0}},
|
||||
)
|
||||
|
||||
for k in keys:
|
||||
await self._invalidate_spend_counter(f"spend:key:{k.token}")
|
||||
async def reset_budget_for_orgs_linked_to_budgets(
|
||||
self, budgets_to_reset: List[LiteLLM_BudgetTableFull]
|
||||
):
|
||||
"""
|
||||
Resets the spend for orgs linked to budget tiers that are being reset.
|
||||
"""
|
||||
return await self._cascade_reset_spend_for_budget_link(
|
||||
budgets_to_reset=budgets_to_reset,
|
||||
table=self.prisma_client.db.litellm_organizationtable,
|
||||
counter_key_fn=lambda o: f"spend:org:{o.organization_id}",
|
||||
log_subject="orgs",
|
||||
extra_where={"spend": {"gt": 0}},
|
||||
)
|
||||
|
||||
return update_result
|
||||
async def reset_budget_for_tags_linked_to_budgets(
|
||||
self, budgets_to_reset: List[LiteLLM_BudgetTableFull]
|
||||
):
|
||||
"""
|
||||
Resets the spend for tags linked to budget tiers that are being reset.
|
||||
|
||||
Also drops each tag's ``user_api_key_cache`` entry so the next
|
||||
``_tag_max_budget_check`` reloads the zeroed row from the DB.
|
||||
``SpendCounterReseed.from_db`` intentionally returns ``None`` for
|
||||
tags, so the budget check falls back to the cached
|
||||
``LiteLLM_TagTable.spend`` once the spend counter expires; without
|
||||
this invalidation, that stale ``.spend`` keeps the tag over-budget
|
||||
indefinitely.
|
||||
"""
|
||||
return await self._cascade_reset_spend_for_budget_link(
|
||||
budgets_to_reset=budgets_to_reset,
|
||||
table=self.prisma_client.db.litellm_tagtable,
|
||||
counter_key_fn=lambda t: f"spend:tag:{t.tag_name}",
|
||||
log_subject="tags",
|
||||
extra_where={"spend": {"gt": 0}},
|
||||
cache_key_fn=lambda t: f"tag:{t.tag_name}",
|
||||
)
|
||||
|
||||
async def reset_budget_for_litellm_budget_table(self):
|
||||
"""
|
||||
@ -237,6 +283,14 @@ class ResetBudgetJob:
|
||||
budgets_to_reset=budgets_to_reset
|
||||
)
|
||||
|
||||
await self.reset_budget_for_orgs_linked_to_budgets(
|
||||
budgets_to_reset=budgets_to_reset
|
||||
)
|
||||
|
||||
await self.reset_budget_for_tags_linked_to_budgets(
|
||||
budgets_to_reset=budgets_to_reset
|
||||
)
|
||||
|
||||
if endusers_to_reset is not None and len(endusers_to_reset) > 0:
|
||||
for enduser in endusers_to_reset:
|
||||
try:
|
||||
|
||||
@ -211,6 +211,7 @@ from litellm import Router
|
||||
from litellm._logging import verbose_proxy_logger, verbose_router_logger
|
||||
from litellm.caching.caching import DualCache, RedisCache
|
||||
from litellm.caching.redis_cluster_cache import RedisClusterCache
|
||||
from litellm.proxy.common_utils.timezone_utils import get_budget_reset_time
|
||||
from litellm.proxy.common_utils.user_api_key_cache import UserApiKeyCache
|
||||
from litellm.constants import (
|
||||
_REALTIME_BODY_CACHE_SIZE,
|
||||
@ -6750,27 +6751,64 @@ class ProxyStartupEvent:
|
||||
"budget_duration not set on Proxy. budget_duration is required to use max_budget."
|
||||
)
|
||||
|
||||
# add proxy budget to db in the user table
|
||||
asyncio.create_task(
|
||||
generate_key_helper_fn( # type: ignore
|
||||
request_type="user",
|
||||
table_name="user",
|
||||
user_id=litellm_proxy_budget_name,
|
||||
duration=None,
|
||||
models=[],
|
||||
aliases={},
|
||||
config={},
|
||||
spend=0,
|
||||
max_budget=litellm.max_budget,
|
||||
budget_duration=litellm.budget_duration,
|
||||
query_type="update_data",
|
||||
update_key_values={
|
||||
"max_budget": litellm.max_budget,
|
||||
"budget_duration": litellm.budget_duration,
|
||||
},
|
||||
)
|
||||
cls._upsert_proxy_budget_with_reset_at_backfill(litellm_proxy_budget_name)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def _upsert_proxy_budget_with_reset_at_backfill(
|
||||
cls, litellm_proxy_budget_name: str
|
||||
) -> None:
|
||||
"""
|
||||
Upsert the proxy admin user row with the configured max_budget /
|
||||
budget_duration, then backfill budget_reset_at if currently NULL.
|
||||
|
||||
The backfill uses `WHERE budget_reset_at IS NULL` so it only fires
|
||||
when the row pre-existed without a reset schedule (e.g. row created
|
||||
via a different path before the proxy budget was configured). On
|
||||
subsequent restarts it no-ops, so an active reset window is never
|
||||
slid forward.
|
||||
"""
|
||||
await generate_key_helper_fn( # type: ignore
|
||||
request_type="user",
|
||||
table_name="user",
|
||||
user_id=litellm_proxy_budget_name,
|
||||
duration=None,
|
||||
models=[],
|
||||
aliases={},
|
||||
config={},
|
||||
spend=0,
|
||||
max_budget=litellm.max_budget,
|
||||
budget_duration=litellm.budget_duration,
|
||||
query_type="update_data",
|
||||
update_key_values={
|
||||
"max_budget": litellm.max_budget,
|
||||
"budget_duration": litellm.budget_duration,
|
||||
},
|
||||
)
|
||||
|
||||
# Without this, the upsert leaves budget_reset_at=NULL on rows that
|
||||
# took the UPDATE path, and reset_budget_for_litellm_users never
|
||||
# matches them (NULL < now() is unknown in SQL) — so the proxy-wide
|
||||
# spend cap blocks forever once it's hit.
|
||||
if prisma_client is not None and litellm.budget_duration is not None:
|
||||
try:
|
||||
await prisma_client.db.litellm_usertable.update_many(
|
||||
where={
|
||||
"user_id": litellm_proxy_budget_name,
|
||||
"budget_reset_at": None,
|
||||
},
|
||||
data={
|
||||
"budget_reset_at": get_budget_reset_time(
|
||||
budget_duration=litellm.budget_duration
|
||||
)
|
||||
},
|
||||
)
|
||||
except Exception as e:
|
||||
verbose_proxy_logger.warning(
|
||||
"Failed to backfill budget_reset_at on proxy admin row: %s", e
|
||||
)
|
||||
|
||||
@classmethod
|
||||
async def _warm_global_spend_cache(
|
||||
cls,
|
||||
|
||||
@ -22,7 +22,7 @@ dependencies = [
|
||||
"importlib-metadata>=8.0.0,<9.0",
|
||||
"tokenizers>=0.21.0,<1.0",
|
||||
"click>=8.0.0,<9.0",
|
||||
"jinja2>=3.1.0,<4.0",
|
||||
"jinja2>=3.1.6,<4.0",
|
||||
"aiohttp>=3.10,<4.0",
|
||||
"pydantic>=2.10.0,<3.0.0",
|
||||
"jsonschema>=4.0.0,<5.0",
|
||||
|
||||
@ -233,6 +233,12 @@ async def test_reset_budget_endusers_partial_failure():
|
||||
prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
|
||||
return_value={"count": 0}
|
||||
)
|
||||
# Mock db.litellm_organizationtable.update_many (used by reset_budget_for_orgs_linked_to_budgets)
|
||||
prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
|
||||
return_value={"count": 0}
|
||||
)
|
||||
# Mock db.litellm_tagtable.update_many (used by reset_budget_for_tags_linked_to_budgets)
|
||||
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})
|
||||
|
||||
proxy_logging_obj = MagicMock()
|
||||
proxy_logging_obj.service_logging_obj = MagicMock()
|
||||
@ -400,6 +406,12 @@ async def test_reset_budget_continues_other_categories_on_failure():
|
||||
prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
|
||||
return_value={"count": 0}
|
||||
)
|
||||
# Mock db.litellm_organizationtable.update_many (used by reset_budget_for_orgs_linked_to_budgets)
|
||||
prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
|
||||
return_value={"count": 0}
|
||||
)
|
||||
# Mock db.litellm_tagtable.update_many (used by reset_budget_for_tags_linked_to_budgets)
|
||||
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})
|
||||
|
||||
proxy_logging_obj = MagicMock()
|
||||
proxy_logging_obj.service_logging_obj = MagicMock()
|
||||
@ -884,6 +896,12 @@ async def test_service_logger_endusers_success():
|
||||
prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
|
||||
return_value={"count": 0}
|
||||
)
|
||||
# Mock db.litellm_organizationtable.update_many (used by reset_budget_for_orgs_linked_to_budgets)
|
||||
prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
|
||||
return_value={"count": 0}
|
||||
)
|
||||
# Mock db.litellm_tagtable.update_many (used by reset_budget_for_tags_linked_to_budgets)
|
||||
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})
|
||||
|
||||
proxy_logging_obj = MagicMock()
|
||||
proxy_logging_obj.service_logging_obj = MagicMock()
|
||||
@ -966,6 +984,12 @@ async def test_service_logger_endusers_failure():
|
||||
prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
|
||||
return_value={"count": 0}
|
||||
)
|
||||
# Mock db.litellm_organizationtable.update_many (used by reset_budget_for_orgs_linked_to_budgets)
|
||||
prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
|
||||
return_value={"count": 0}
|
||||
)
|
||||
# Mock db.litellm_tagtable.update_many (used by reset_budget_for_tags_linked_to_budgets)
|
||||
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})
|
||||
|
||||
proxy_logging_obj = MagicMock()
|
||||
proxy_logging_obj.service_logging_obj = MagicMock()
|
||||
@ -1060,6 +1084,10 @@ async def test_reset_budget_for_litellm_team_members_called():
|
||||
prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
|
||||
return_value={"count": 0}
|
||||
)
|
||||
prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
|
||||
return_value={"count": 0}
|
||||
)
|
||||
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 0})
|
||||
|
||||
proxy_logging_obj = MagicMock()
|
||||
proxy_logging_obj.service_logging_obj = MagicMock()
|
||||
|
||||
@ -25,8 +25,8 @@ async def make_calls_until_budget_exceeded(session, key: str, call_function, **k
|
||||
|
||||
# Check error structure and values that should be consistent
|
||||
assert (
|
||||
error_dict["code"] == "400"
|
||||
), f"Expected error code 400, got: {error_dict['code']}"
|
||||
error_dict["code"] == "429"
|
||||
), f"Expected error code 429, got: {error_dict['code']}"
|
||||
assert (
|
||||
error_dict["type"] == "budget_exceeded"
|
||||
), f"Expected error type budget_exceeded, got: {error_dict['type']}"
|
||||
|
||||
@ -99,7 +99,7 @@ async def test_model_access_patterns(key_models, test_model, expect_success):
|
||||
# Assert error structure and values
|
||||
assert _error_body["type"] == "key_model_access_denied"
|
||||
assert _error_body["param"] == "model"
|
||||
assert _error_body["code"] == "401"
|
||||
assert _error_body["code"] == "403"
|
||||
assert "key not allowed to access model" in _error_body["message"]
|
||||
|
||||
|
||||
@ -297,7 +297,7 @@ def _validate_model_access_exception(
|
||||
# Assert error structure and values
|
||||
assert _error_body["type"] == expected_type
|
||||
assert _error_body["param"] == "model"
|
||||
assert _error_body["code"] == "401"
|
||||
assert _error_body["code"] == "403"
|
||||
if expected_type == "key_model_access_denied":
|
||||
assert "key not allowed to access model" in _error_body["message"]
|
||||
elif expected_type == "team_model_access_denied":
|
||||
|
||||
@ -12,6 +12,7 @@ from datetime import datetime, timedelta
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
from fastapi import status
|
||||
|
||||
import litellm
|
||||
from litellm.proxy._types import (
|
||||
@ -31,6 +32,7 @@ from litellm.proxy._types import (
|
||||
)
|
||||
from litellm.proxy.auth.auth_checks import (
|
||||
ExperimentalUIJWTToken,
|
||||
_can_object_call_model,
|
||||
_can_object_call_vector_stores,
|
||||
_check_end_user_budget,
|
||||
_check_team_member_budget,
|
||||
@ -206,6 +208,52 @@ def test_get_key_object_from_ui_hash_key_invalid():
|
||||
assert key_object is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"object_type,expected_error_type",
|
||||
[
|
||||
("key", ProxyErrorTypes.key_model_access_denied),
|
||||
("team", ProxyErrorTypes.team_model_access_denied),
|
||||
("user", ProxyErrorTypes.user_model_access_denied),
|
||||
("org", ProxyErrorTypes.org_model_access_denied),
|
||||
("project", ProxyErrorTypes.project_model_access_denied),
|
||||
],
|
||||
)
|
||||
def test_can_object_call_model_denials_return_forbidden(
|
||||
object_type, expected_error_type
|
||||
):
|
||||
with pytest.raises(ProxyException) as exc_info:
|
||||
_can_object_call_model(
|
||||
model="restricted-model",
|
||||
llm_router=None,
|
||||
models=["allowed-model"],
|
||||
object_type=object_type,
|
||||
)
|
||||
|
||||
assert exc_info.value.type == expected_error_type
|
||||
assert int(exc_info.value.code) == status.HTTP_403_FORBIDDEN
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_can_user_call_model_no_default_models_returns_forbidden():
|
||||
from litellm.proxy._types import SpecialModelNames
|
||||
from litellm.proxy.auth.auth_checks import can_user_call_model
|
||||
|
||||
user_object = LiteLLM_UserTable(
|
||||
user_id="test-user",
|
||||
models=[SpecialModelNames.no_default_models.value],
|
||||
)
|
||||
|
||||
with pytest.raises(ProxyException) as exc_info:
|
||||
await can_user_call_model(
|
||||
model="restricted-model",
|
||||
llm_router=None,
|
||||
user_object=user_object,
|
||||
)
|
||||
|
||||
assert exc_info.value.type == ProxyErrorTypes.key_model_access_denied
|
||||
assert int(exc_info.value.code) == status.HTTP_403_FORBIDDEN
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_key_object_should_reconnect_once_on_db_connection_error():
|
||||
mock_prisma_client = MagicMock()
|
||||
@ -1144,6 +1192,7 @@ async def test_check_team_member_model_access_denied_model():
|
||||
proxy_logging_obj=MagicMock(),
|
||||
)
|
||||
assert exc_info.value.type == ProxyErrorTypes.team_model_access_denied
|
||||
assert int(exc_info.value.code) == status.HTTP_403_FORBIDDEN
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@ -140,6 +140,7 @@ async def test_handle_authentication_error_budget_exceeded():
|
||||
)
|
||||
|
||||
assert exc_info.value.type == ProxyErrorTypes.budget_exceeded
|
||||
assert int(exc_info.value.code) == status.HTTP_429_TOO_MANY_REQUESTS
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import ANY, AsyncMock, MagicMock, patch
|
||||
|
||||
@ -9,6 +10,7 @@ sys.path.insert(
|
||||
) # Adds the parent directory to the system path
|
||||
|
||||
import pytest
|
||||
from fastapi import status
|
||||
|
||||
import litellm
|
||||
import litellm.proxy.proxy_server
|
||||
@ -178,6 +180,26 @@ async def test_custom_auth_does_not_enforce_key_model_access_by_default():
|
||||
mock_can_key.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_post_custom_auth_expired_key_returns_unauthorized():
|
||||
expired_token = UserAPIKeyAuth(
|
||||
token="test_token",
|
||||
expires=datetime.now() - timedelta(minutes=1),
|
||||
)
|
||||
|
||||
with pytest.raises(ProxyException) as exc_info:
|
||||
await _run_post_custom_auth_checks(
|
||||
valid_token=expired_token,
|
||||
request=MagicMock(),
|
||||
request_data={},
|
||||
route="/v1/chat/completions",
|
||||
parent_otel_span=None,
|
||||
)
|
||||
|
||||
assert exc_info.value.type == ProxyErrorTypes.expired_key
|
||||
assert int(exc_info.value.code) == status.HTTP_401_UNAUTHORIZED
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_custom_auth_honors_key_level_model_access_restriction_allowed_with_opt_in():
|
||||
valid_token = UserAPIKeyAuth(token="test_token", models=["gpt-4o-mini"])
|
||||
@ -934,6 +956,7 @@ async def test_proxy_admin_expired_key_from_cache():
|
||||
assert (
|
||||
exc_info.value.type == ProxyErrorTypes.expired_key
|
||||
), f"Expected expired_key error type, got {exc_info.value.type}"
|
||||
assert int(exc_info.value.code) == status.HTTP_401_UNAUTHORIZED
|
||||
assert "Expired Key" in str(
|
||||
exc_info.value.message
|
||||
), f"Exception message should mention 'Expired Key', got: {exc_info.value.message}"
|
||||
|
||||
@ -39,6 +39,46 @@ class MockLiteLLMVerificationToken:
|
||||
return {"count": 1}
|
||||
|
||||
|
||||
class MockLiteLLMOrganizationTable:
|
||||
def __init__(self):
|
||||
self.update_many_calls: List[Dict[str, Any]] = []
|
||||
self.find_many_calls: List[Dict[str, Any]] = []
|
||||
self._find_many_results: List[Any] = []
|
||||
|
||||
def set_find_many_results(self, results: List[Any]):
|
||||
self._find_many_results = results
|
||||
|
||||
async def find_many(self, where: Dict[str, Any]) -> List[Any]:
|
||||
self.find_many_calls.append({"where": where})
|
||||
return self._find_many_results
|
||||
|
||||
async def update_many(
|
||||
self, where: Dict[str, Any], data: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
self.update_many_calls.append({"where": where, "data": data})
|
||||
return {"count": 1}
|
||||
|
||||
|
||||
class MockLiteLLMTagTable:
|
||||
def __init__(self):
|
||||
self.update_many_calls: List[Dict[str, Any]] = []
|
||||
self.find_many_calls: List[Dict[str, Any]] = []
|
||||
self._find_many_results: List[Any] = []
|
||||
|
||||
def set_find_many_results(self, results: List[Any]):
|
||||
self._find_many_results = results
|
||||
|
||||
async def find_many(self, where: Dict[str, Any]) -> List[Any]:
|
||||
self.find_many_calls.append({"where": where})
|
||||
return self._find_many_results
|
||||
|
||||
async def update_many(
|
||||
self, where: Dict[str, Any], data: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
self.update_many_calls.append({"where": where, "data": data})
|
||||
return {"count": 1}
|
||||
|
||||
|
||||
class MockLiteLLMEndUserTable:
|
||||
def __init__(self):
|
||||
self.find_many_calls: List[Dict[str, Any]] = []
|
||||
@ -57,6 +97,8 @@ class MockDB:
|
||||
self.litellm_teammembership = MockLiteLLMTeamMembership()
|
||||
self.litellm_verificationtoken = MockLiteLLMVerificationToken()
|
||||
self.litellm_endusertable = MockLiteLLMEndUserTable()
|
||||
self.litellm_organizationtable = MockLiteLLMOrganizationTable()
|
||||
self.litellm_tagtable = MockLiteLLMTagTable()
|
||||
|
||||
|
||||
class MockPrismaClient:
|
||||
@ -459,6 +501,100 @@ def test_reset_budget_for_keys_linked_to_budgets_empty(
|
||||
assert len(calls) == 0
|
||||
|
||||
|
||||
def test_reset_budget_for_orgs_linked_to_budgets(reset_budget_job, mock_prisma_client):
|
||||
"""
|
||||
Test that when a budget tier is reset, orgs linked to that budget
|
||||
(via budget_id) also get their spend reset.
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
test_budget = type(
|
||||
"LiteLLM_BudgetTableFull",
|
||||
(),
|
||||
{
|
||||
"max_budget": 100.0,
|
||||
"budget_duration": "30d",
|
||||
"budget_reset_at": now - timedelta(hours=1),
|
||||
"budget_id": "30d-org-budget",
|
||||
"created_at": now - timedelta(days=30),
|
||||
},
|
||||
)
|
||||
|
||||
asyncio.run(
|
||||
reset_budget_job.reset_budget_for_orgs_linked_to_budgets(
|
||||
budgets_to_reset=[test_budget]
|
||||
)
|
||||
)
|
||||
|
||||
calls = mock_prisma_client.db.litellm_organizationtable.update_many_calls
|
||||
assert len(calls) == 1
|
||||
call = calls[0]
|
||||
assert call["where"]["budget_id"] == {"in": ["30d-org-budget"]}
|
||||
assert call["where"]["spend"] == {"gt": 0}
|
||||
assert call["data"]["spend"] == 0
|
||||
|
||||
|
||||
def test_reset_budget_for_orgs_linked_to_budgets_empty(
|
||||
reset_budget_job, mock_prisma_client
|
||||
):
|
||||
"""
|
||||
Test that when there are no budgets to reset, no update is performed
|
||||
on the organization table.
|
||||
"""
|
||||
asyncio.run(
|
||||
reset_budget_job.reset_budget_for_orgs_linked_to_budgets(budgets_to_reset=[])
|
||||
)
|
||||
calls = mock_prisma_client.db.litellm_organizationtable.update_many_calls
|
||||
assert len(calls) == 0
|
||||
|
||||
|
||||
def test_reset_budget_for_tags_linked_to_budgets(reset_budget_job, mock_prisma_client):
|
||||
"""
|
||||
Test that when a budget tier is reset, tags linked to that budget
|
||||
(via budget_id) also get their spend reset.
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
test_budget = type(
|
||||
"LiteLLM_BudgetTableFull",
|
||||
(),
|
||||
{
|
||||
"max_budget": 50.0,
|
||||
"budget_duration": "30d",
|
||||
"budget_reset_at": now - timedelta(hours=1),
|
||||
"budget_id": "30d-tag-budget",
|
||||
"created_at": now - timedelta(days=30),
|
||||
},
|
||||
)
|
||||
|
||||
asyncio.run(
|
||||
reset_budget_job.reset_budget_for_tags_linked_to_budgets(
|
||||
budgets_to_reset=[test_budget]
|
||||
)
|
||||
)
|
||||
|
||||
calls = mock_prisma_client.db.litellm_tagtable.update_many_calls
|
||||
assert len(calls) == 1
|
||||
call = calls[0]
|
||||
assert call["where"]["budget_id"] == {"in": ["30d-tag-budget"]}
|
||||
assert call["where"]["spend"] == {"gt": 0}
|
||||
assert call["data"]["spend"] == 0
|
||||
|
||||
|
||||
def test_reset_budget_for_tags_linked_to_budgets_empty(
|
||||
reset_budget_job, mock_prisma_client
|
||||
):
|
||||
"""
|
||||
Test that when there are no budgets to reset, no update is performed
|
||||
on the tag table.
|
||||
"""
|
||||
asyncio.run(
|
||||
reset_budget_job.reset_budget_for_tags_linked_to_budgets(budgets_to_reset=[])
|
||||
)
|
||||
calls = mock_prisma_client.db.litellm_tagtable.update_many_calls
|
||||
assert len(calls) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"budget_duration, expected_day, expected_month",
|
||||
[
|
||||
@ -618,6 +754,75 @@ def test_budget_table_reset_also_resets_linked_keys(
|
||||
assert calls[0]["data"]["spend"] == 0
|
||||
|
||||
|
||||
def test_budget_table_reset_also_resets_linked_orgs(
|
||||
reset_budget_job, mock_prisma_client
|
||||
):
|
||||
"""
|
||||
Integration-style test: when reset_budget_for_litellm_budget_table runs,
|
||||
it should also reset spend for orgs linked to the expiring budget tiers
|
||||
(in addition to end-users, team members, and keys).
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
test_budget = type(
|
||||
"LiteLLM_BudgetTableFull",
|
||||
(),
|
||||
{
|
||||
"max_budget": 100.0,
|
||||
"budget_duration": "30d",
|
||||
"budget_reset_at": now - timedelta(hours=1),
|
||||
"budget_id": "30d-org-budget",
|
||||
"created_at": now - timedelta(days=30),
|
||||
},
|
||||
)
|
||||
|
||||
mock_prisma_client.data["budget"] = [test_budget]
|
||||
|
||||
asyncio.run(reset_budget_job.reset_budget_for_litellm_budget_table())
|
||||
|
||||
calls = mock_prisma_client.db.litellm_organizationtable.update_many_calls
|
||||
assert len(calls) == 1, (
|
||||
"Expected reset_budget_for_litellm_budget_table to also reset orgs "
|
||||
f"linked to expiring budgets, but got {len(calls)} update_many calls"
|
||||
)
|
||||
assert calls[0]["where"]["budget_id"] == {"in": ["30d-org-budget"]}
|
||||
assert calls[0]["data"]["spend"] == 0
|
||||
|
||||
|
||||
def test_budget_table_reset_also_resets_linked_tags(
|
||||
reset_budget_job, mock_prisma_client
|
||||
):
|
||||
"""
|
||||
Integration-style test: when reset_budget_for_litellm_budget_table runs,
|
||||
it should also reset spend for tags linked to the expiring budget tiers.
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
test_budget = type(
|
||||
"LiteLLM_BudgetTableFull",
|
||||
(),
|
||||
{
|
||||
"max_budget": 50.0,
|
||||
"budget_duration": "30d",
|
||||
"budget_reset_at": now - timedelta(hours=1),
|
||||
"budget_id": "30d-tag-budget",
|
||||
"created_at": now - timedelta(days=30),
|
||||
},
|
||||
)
|
||||
|
||||
mock_prisma_client.data["budget"] = [test_budget]
|
||||
|
||||
asyncio.run(reset_budget_job.reset_budget_for_litellm_budget_table())
|
||||
|
||||
calls = mock_prisma_client.db.litellm_tagtable.update_many_calls
|
||||
assert len(calls) == 1, (
|
||||
"Expected reset_budget_for_litellm_budget_table to also reset tags "
|
||||
f"linked to expiring budgets, but got {len(calls)} update_many calls"
|
||||
)
|
||||
assert calls[0]["where"]["budget_id"] == {"in": ["30d-tag-budget"]}
|
||||
assert calls[0]["data"]["spend"] == 0
|
||||
|
||||
|
||||
def test_reset_budget_resets_endusers_with_null_budget_id(
|
||||
reset_budget_job, mock_prisma_client
|
||||
):
|
||||
@ -1057,16 +1262,26 @@ def test_reset_budget_windows_query_error_does_not_break_team_path(monkeypatch):
|
||||
|
||||
|
||||
def _make_counter_invalidation_job(monkeypatch):
|
||||
"""Stub spend_counter_cache so we can observe invalidation calls."""
|
||||
"""Stub spend_counter_cache (and user_api_key_cache) so we can observe
|
||||
invalidation calls.
|
||||
|
||||
Both caches are looked up via ``from litellm.proxy.proxy_server import
|
||||
<name>`` inside the reset job, so we publish them on a fake module.
|
||||
"""
|
||||
spend_counter_cache = MagicMock()
|
||||
spend_counter_cache.in_memory_cache.set_cache = MagicMock()
|
||||
spend_counter_cache.redis_cache = MagicMock()
|
||||
spend_counter_cache.redis_cache.async_set_cache = AsyncMock()
|
||||
|
||||
user_api_key_cache = MagicMock()
|
||||
user_api_key_cache.async_delete_cache = AsyncMock()
|
||||
|
||||
fake_module = types.ModuleType("litellm.proxy.proxy_server")
|
||||
fake_module.spend_counter_cache = spend_counter_cache
|
||||
fake_module.user_api_key_cache = user_api_key_cache
|
||||
monkeypatch.setitem(sys.modules, "litellm.proxy.proxy_server", fake_module)
|
||||
|
||||
spend_counter_cache.user_api_key_cache = user_api_key_cache
|
||||
return spend_counter_cache
|
||||
|
||||
|
||||
@ -1205,3 +1420,136 @@ def test_reset_budget_for_keys_linked_to_budgets_invalidates_redis_counter(monke
|
||||
counter_cache.in_memory_cache.set_cache.assert_any_call(
|
||||
key="spend:key:sk-linked", value=0.0, ttl=60
|
||||
)
|
||||
|
||||
|
||||
def test_reset_budget_for_orgs_linked_to_budgets_invalidates_redis_counter(monkeypatch):
|
||||
"""Resetting orgs via budget tier must clear each linked org's counter."""
|
||||
counter_cache = _make_counter_invalidation_job(monkeypatch)
|
||||
|
||||
expired_budget = type("B", (), {"budget_id": "budget-1"})
|
||||
linked_org = type("Org", (), {"organization_id": "org-acme"})
|
||||
|
||||
prisma_client = MagicMock()
|
||||
prisma_client.db.litellm_organizationtable.find_many = AsyncMock(
|
||||
return_value=[linked_org]
|
||||
)
|
||||
prisma_client.db.litellm_organizationtable.update_many = AsyncMock(
|
||||
return_value={"count": 1}
|
||||
)
|
||||
|
||||
job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
|
||||
asyncio.run(job.reset_budget_for_orgs_linked_to_budgets([expired_budget]))
|
||||
|
||||
counter_cache.in_memory_cache.set_cache.assert_any_call(
|
||||
key="spend:org:org-acme", value=0.0, ttl=60
|
||||
)
|
||||
counter_cache.redis_cache.async_set_cache.assert_any_await(
|
||||
key="spend:org:org-acme", value=0.0, ttl=60
|
||||
)
|
||||
|
||||
|
||||
def test_reset_budget_for_tags_linked_to_budgets_invalidates_redis_counter(monkeypatch):
|
||||
"""Resetting tags via budget tier must clear each linked tag's counter."""
|
||||
counter_cache = _make_counter_invalidation_job(monkeypatch)
|
||||
|
||||
expired_budget = type("B", (), {"budget_id": "budget-1"})
|
||||
linked_tag = type("Tag", (), {"tag_name": "tenant-42"})
|
||||
|
||||
prisma_client = MagicMock()
|
||||
prisma_client.db.litellm_tagtable.find_many = AsyncMock(return_value=[linked_tag])
|
||||
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 1})
|
||||
|
||||
job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
|
||||
asyncio.run(job.reset_budget_for_tags_linked_to_budgets([expired_budget]))
|
||||
|
||||
counter_cache.in_memory_cache.set_cache.assert_any_call(
|
||||
key="spend:tag:tenant-42", value=0.0, ttl=60
|
||||
)
|
||||
counter_cache.redis_cache.async_set_cache.assert_any_await(
|
||||
key="spend:tag:tenant-42", value=0.0, ttl=60
|
||||
)
|
||||
|
||||
|
||||
def test_reset_budget_for_tags_linked_to_budgets_invalidates_management_cache(
|
||||
monkeypatch,
|
||||
):
|
||||
"""Regression guard for the bug where tag spend stayed frozen across cycles.
|
||||
|
||||
``SpendCounterReseed.from_db`` returns ``None`` for ``spend:tag:*`` keys,
|
||||
so once the spend counter expires the tag budget check falls back to the
|
||||
cached ``LiteLLM_TagTable.spend``. If we don't drop the management cache
|
||||
entry on reset, that cached object lingers (TTL 60s) with the pre-reset
|
||||
spend, and ``_tag_max_budget_check`` keeps returning HTTP 400 even though
|
||||
the DB row has been zeroed.
|
||||
"""
|
||||
counter_cache = _make_counter_invalidation_job(monkeypatch)
|
||||
|
||||
expired_budget = type("B", (), {"budget_id": "budget-1"})
|
||||
linked_tag = type("Tag", (), {"tag_name": "tenant-42"})
|
||||
|
||||
prisma_client = MagicMock()
|
||||
prisma_client.db.litellm_tagtable.find_many = AsyncMock(return_value=[linked_tag])
|
||||
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 1})
|
||||
|
||||
job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
|
||||
asyncio.run(job.reset_budget_for_tags_linked_to_budgets([expired_budget]))
|
||||
|
||||
counter_cache.user_api_key_cache.async_delete_cache.assert_any_await(
|
||||
key="tag:tenant-42"
|
||||
)
|
||||
|
||||
|
||||
def test_reset_budget_for_tags_linked_to_budgets_invalidates_each_tag_management_cache(
|
||||
monkeypatch,
|
||||
):
|
||||
"""When multiple tags share the expired budget tier, every one of them
|
||||
has its ``user_api_key_cache`` entry dropped — not just the first."""
|
||||
counter_cache = _make_counter_invalidation_job(monkeypatch)
|
||||
|
||||
expired_budget = type("B", (), {"budget_id": "budget-1"})
|
||||
linked_tags = [
|
||||
type("Tag", (), {"tag_name": "tenant-a"}),
|
||||
type("Tag", (), {"tag_name": "tenant-b"}),
|
||||
type("Tag", (), {"tag_name": "tenant-c"}),
|
||||
]
|
||||
|
||||
prisma_client = MagicMock()
|
||||
prisma_client.db.litellm_tagtable.find_many = AsyncMock(return_value=linked_tags)
|
||||
prisma_client.db.litellm_tagtable.update_many = AsyncMock(return_value={"count": 3})
|
||||
|
||||
job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
|
||||
asyncio.run(job.reset_budget_for_tags_linked_to_budgets([expired_budget]))
|
||||
|
||||
deleted_keys = {
|
||||
call.kwargs.get("key")
|
||||
for call in counter_cache.user_api_key_cache.async_delete_cache.await_args_list
|
||||
}
|
||||
assert deleted_keys == {"tag:tenant-a", "tag:tenant-b", "tag:tenant-c"}
|
||||
|
||||
|
||||
def test_reset_budget_for_keys_linked_to_budgets_does_not_touch_management_cache(
|
||||
monkeypatch,
|
||||
):
|
||||
"""Cache invalidation is opt-in: keys / orgs / team-members rely on
|
||||
``SpendCounterReseed.from_db`` (which DOES handle their counter keys),
|
||||
so the cache_key_fn hook is intentionally not wired for them. This test
|
||||
locks in that no-op so a future refactor doesn't accidentally start
|
||||
clobbering the key cache (which would cost an extra DB round-trip per
|
||||
reset cycle without fixing anything)."""
|
||||
counter_cache = _make_counter_invalidation_job(monkeypatch)
|
||||
|
||||
expired_budget = type("B", (), {"budget_id": "budget-1"})
|
||||
linked_key = type("Key", (), {"token": "sk-linked"})
|
||||
|
||||
prisma_client = MagicMock()
|
||||
prisma_client.db.litellm_verificationtoken.find_many = AsyncMock(
|
||||
return_value=[linked_key]
|
||||
)
|
||||
prisma_client.db.litellm_verificationtoken.update_many = AsyncMock(
|
||||
return_value={"count": 1}
|
||||
)
|
||||
|
||||
job = ResetBudgetJob(proxy_logging_obj=MagicMock(), prisma_client=prisma_client)
|
||||
asyncio.run(job.reset_budget_for_keys_linked_to_budgets([expired_budget]))
|
||||
|
||||
counter_cache.user_api_key_cache.async_delete_cache.assert_not_awaited()
|
||||
|
||||
@ -1728,6 +1728,67 @@ async def test_add_proxy_budget_to_db_only_creates_user_no_keys():
|
||||
assert call_args.kwargs["query_type"] == "update_data"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_proxy_budget_to_db_backfills_budget_reset_at():
|
||||
"""
|
||||
Test that _upsert_proxy_budget_with_reset_at_backfill issues a conditional
|
||||
update_many with `WHERE budget_reset_at IS NULL` to backfill the column on
|
||||
rows that pre-existed without a reset schedule. Without this, the proxy
|
||||
admin row stays at NULL and reset_budget_for_litellm_users never matches
|
||||
it (NULL < now() is unknown in SQL), so the global proxy budget never
|
||||
resets.
|
||||
"""
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import litellm
|
||||
from litellm.proxy.proxy_server import ProxyStartupEvent
|
||||
|
||||
litellm.budget_duration = "30d"
|
||||
litellm.max_budget = 100.0
|
||||
litellm_proxy_budget_name = "litellm-proxy-budget"
|
||||
|
||||
mock_prisma = MagicMock()
|
||||
mock_prisma.db.litellm_usertable.update_many = AsyncMock(return_value={"count": 1})
|
||||
|
||||
mock_generate_key_helper = AsyncMock(
|
||||
return_value={
|
||||
"user_id": litellm_proxy_budget_name,
|
||||
"max_budget": 100.0,
|
||||
"budget_duration": "30d",
|
||||
"spend": 0,
|
||||
"models": [],
|
||||
}
|
||||
)
|
||||
|
||||
with (
|
||||
patch(
|
||||
"litellm.proxy.proxy_server.generate_key_helper_fn",
|
||||
mock_generate_key_helper,
|
||||
),
|
||||
patch("litellm.proxy.proxy_server.prisma_client", mock_prisma),
|
||||
):
|
||||
await ProxyStartupEvent._upsert_proxy_budget_with_reset_at_backfill(
|
||||
litellm_proxy_budget_name
|
||||
)
|
||||
|
||||
# Upsert ran with the configured budget
|
||||
mock_generate_key_helper.assert_called_once()
|
||||
|
||||
# Backfill update_many ran with the conditional WHERE
|
||||
mock_prisma.db.litellm_usertable.update_many.assert_called_once()
|
||||
backfill_call = mock_prisma.db.litellm_usertable.update_many.call_args
|
||||
assert backfill_call.kwargs["where"]["user_id"] == litellm_proxy_budget_name
|
||||
assert backfill_call.kwargs["where"]["budget_reset_at"] is None
|
||||
|
||||
# The backfilled value must be a real future datetime — anything else and
|
||||
# reset_budget_for_litellm_users would still skip the row.
|
||||
from datetime import datetime, timezone
|
||||
|
||||
backfilled_reset_at = backfill_call.kwargs["data"]["budget_reset_at"]
|
||||
assert isinstance(backfilled_reset_at, datetime)
|
||||
assert backfilled_reset_at > datetime.now(timezone.utc)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_custom_ui_sso_sign_in_handler_config_loading():
|
||||
"""
|
||||
|
||||
@ -303,7 +303,7 @@ async def test_chat_completion():
|
||||
api_key=key_gen["key"],
|
||||
api_version="2024-02-15-preview",
|
||||
)
|
||||
with pytest.raises(openai.AuthenticationError) as e:
|
||||
with pytest.raises(openai.PermissionDeniedError) as e:
|
||||
response = await azure_client.chat.completions.create(
|
||||
model="gpt-4",
|
||||
messages=[{"role": "user", "content": "Hello!"}],
|
||||
|
||||
@ -302,14 +302,14 @@ async def test_user_model_access():
|
||||
model="good-model",
|
||||
)
|
||||
|
||||
with pytest.raises(openai.AuthenticationError):
|
||||
with pytest.raises(openai.PermissionDeniedError):
|
||||
await chat_completion(
|
||||
session=session,
|
||||
key=key,
|
||||
model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
|
||||
)
|
||||
|
||||
with pytest.raises(openai.AuthenticationError):
|
||||
with pytest.raises(openai.PermissionDeniedError):
|
||||
await chat_completion(
|
||||
session=session,
|
||||
key=key,
|
||||
|
||||
2
uv.lock
generated
2
uv.lock
generated
@ -3405,7 +3405,7 @@ requires-dist = [
|
||||
{ name = "gunicorn", marker = "extra == 'proxy'", specifier = "==23.0.0" },
|
||||
{ name = "httpx", specifier = ">=0.28.0,<1.0" },
|
||||
{ name = "importlib-metadata", specifier = ">=8.0.0,<9.0" },
|
||||
{ name = "jinja2", specifier = ">=3.1.0,<4.0" },
|
||||
{ name = "jinja2", specifier = ">=3.1.6,<4.0" },
|
||||
{ name = "jsonschema", specifier = ">=4.0.0,<5.0" },
|
||||
{ name = "langfuse", marker = "extra == 'proxy-runtime'", specifier = "==2.59.7" },
|
||||
{ name = "litellm-enterprise", marker = "extra == 'proxy'", editable = "enterprise" },
|
||||
|
||||
Loading…
Reference in New Issue
Block a user