Revert "docs: add v1.82.3 release notes and update provider_endpoints_support…" (#23817)

This reverts commit 966124966f.
This commit is contained in:
Krish Dholakia 2026-03-16 22:26:45 -07:00 committed by GitHub
parent 966124966f
commit 245a3d2b26
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 23 additions and 406 deletions

View File

@ -1,374 +0,0 @@
---
title: "v1.82.3 - Nebius AI, gpt-5.4, Gemini 3.x, FLUX Kontext, and 116 New Models"
slug: "v1-82-3"
date: 2026-03-16T00:00:00
authors:
- name: Krrish Dholakia
title: CEO, LiteLLM
url: https://www.linkedin.com/in/krish-d/
image_url: https://pbs.twimg.com/profile_images/1298587542745358340/DZv3Oj-h_400x400.jpg
- name: Ishaan Jaff
title: CTO, LiteLLM
url: https://www.linkedin.com/in/reffajnaahsi/
image_url: https://pbs.twimg.com/profile_images/1613813310264340481/lz54oEiB_400x400.jpg
hide_table_of_contents: false
---
## Deploy this version
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
<Tabs>
<TabItem value="docker" label="Docker">
``` showLineNumbers title="docker run litellm"
docker run \
-e STORE_MODEL_IN_DB=True \
-p 4000:4000 \
ghcr.io/berriai/litellm:main-1.82.3-stable
```
</TabItem>
<TabItem value="pip" label="Pip">
``` showLineNumbers title="pip install litellm"
pip install litellm==1.82.3
```
</TabItem>
</Tabs>
## Key Highlights
- **Nebius AI — new provider** — [30 models across DeepSeek, Qwen, Llama, Mistral, NVIDIA, and BAAI available via Nebius AI cloud](../../docs/providers/nebius) - [PR #21542](https://github.com/BerriAI/litellm/pull/21542)
- **OpenAI gpt-5.4 / gpt-5.4-pro — day 0** — Full pricing and routing support for `gpt-5.4` (1M context, $2.50/$15.00) and `gpt-5.4-pro` ($30.00/$180.00) on OpenAI and Azure
- **Gemini 3.x models**`gemini-3-flash-preview`, `gemini-3.1-pro-preview`, `gemini-3.1-flash-image-preview`, and `gemini-embedding-2-preview` added to cost map for Google AI and Vertex AI
- **FLUX Kontext image editing**`flux-kontext-pro` and `flux-kontext-max` added to Black Forest Labs, alongside `flux-pro-1.0-fill` and `flux-pro-1.0-expand` for inpainting and outpainting
- **116 new models, 132 deprecated models cleaned up** — Major model map refresh including Mistral Magistral, Dashscope Qwen3 VL, xAI Grok via Azure AI, ZAI GLM-5, Serper Search; removal of OpenAI GPT-3.5/GPT-4 legacy variants, Gemini 1.5, and Vertex AI PaLM2
- **SageMaker Nova provider** — [New `sagemaker_nova` provider for Amazon Nova models on SageMaker](../../docs/providers/aws_sagemaker) - [PR #21542](https://github.com/BerriAI/litellm/pull/21542)
- **Secret redaction in logs** — API keys, tokens, and credentials automatically scrubbed from all proxy log output. Enabled by default; opt out with `LITELLM_DISABLE_REDACT_SECRETS=true` - [PR #23668](https://github.com/BerriAI/litellm/pull/23668)
- **Streaming stability fix** — Critical fix for `RuntimeError: Cannot send a request, as the client has been closed.` crashes after ~1 hour in production - [PR #22926](https://github.com/BerriAI/litellm/pull/22926)
---
## New Providers and Endpoints
### New Providers (4 new providers)
| Provider | Supported LiteLLM Endpoints | Description |
| -------- | --------------------------- | ----------- |
| [Nebius AI](../../docs/providers/nebius) (`nebius/`) | `/chat/completions`, `/embeddings` | EU-based AI cloud with 30+ open models — DeepSeek, Qwen3, Llama 3.1/3.3, NVIDIA Nemotron, BAAI embeddings |
| [ZAI](../../docs/providers/openai_compatible) (`zai/`) | `/chat/completions` | ZhipuAI GLM-5 models via ZAI cloud |
| [Black Forest Labs](../../docs/providers/black_forest_labs) (`black_forest_labs/`) | `/images/generations`, `/images/edits` | FLUX image generation and editing — Kontext Pro/Max, Pro 1.0 Fill/Expand |
| [Serper](../../docs/providers/serper) (`serper/`) | `/search` | Web search via Serper API |
| [SageMaker Nova](../../docs/providers/aws_sagemaker) (`sagemaker_nova/`) | `/chat/completions` | Amazon Nova models via SageMaker endpoint |
---
## New Models / Updated Models
#### New Model Support (116 new models)
| Provider | Model | Context Window | Input ($/1M tokens) | Output ($/1M tokens) | Features |
| -------- | ----- | -------------- | ------------------- | -------------------- | -------- |
| OpenAI | `gpt-5.4` | 1.05M | $2.50 | $15.00 | chat, vision, tools, reasoning |
| OpenAI | `gpt-5.4-pro` | 1.05M | $30.00 | $180.00 | responses, vision, tools, reasoning |
| OpenAI | `gpt-5.3-chat-latest` | 128K | $1.75 | $14.00 | chat, vision, tools, reasoning |
| Azure OpenAI | `azure/gpt-5.4` | 1.05M | $2.50 | $15.00 | chat, vision, tools, reasoning |
| Azure OpenAI | `azure/gpt-5.4-pro` | 1.05M | $30.00 | $180.00 | responses, vision, tools, reasoning |
| Azure OpenAI | `azure/gpt-5.3-chat` | 128K | $1.75 | $14.00 | chat, vision, tools, reasoning |
| Google Gemini | `gemini/gemini-3-flash-preview` | 1M | $0.50 | $3.00 | chat, vision, tools, reasoning |
| Google Gemini | `gemini/gemini-3.1-pro-preview` | 1M | $2.00 | $12.00 | chat, vision, tools, reasoning |
| Google Gemini | `gemini/gemini-3.1-flash-image-preview` | 65K | $0.25 | $1.50 | image generation, vision |
| Google Gemini | `gemini/gemini-3.1-flash-lite-preview` | - | - | - | chat |
| Google Gemini | `gemini/gemini-3-pro-image-preview` | - | - | - | image generation |
| Google Gemini | `gemini/gemini-embedding-2-preview` | 8K | $0.20 | - | embeddings |
| Google Vertex AI | `vertex_ai/gemini-3-flash-preview` | - | - | - | chat |
| Google Vertex AI | `vertex_ai/gemini-3.1-pro-preview` | - | - | - | chat |
| Google Vertex AI | `vertex_ai/gemini-3.1-flash-lite-preview` | - | - | - | chat |
| Google Vertex AI | `vertex_ai/gemini-embedding-2-preview` | - | $0.20 | - | embeddings |
| Mistral | `mistral/magistral-medium-1-2-2509` | 40K | $2.00 | $5.00 | chat, tools, reasoning |
| Mistral | `mistral/magistral-small-1-2-2509` | 40K | $0.50 | $1.50 | chat, tools, reasoning |
| Mistral | `mistral/mistral-large-2512` | 262K | $0.50 | $1.50 | chat, vision, tools |
| Mistral | `mistral/mistral-medium-3-1-2508` | - | - | - | chat |
| Mistral | `mistral/mistral-small-3-2-2506` | - | - | - | chat |
| Mistral | `mistral/ministral-3-3b-2512` | - | - | - | chat |
| Mistral | `mistral/ministral-3-8b-2512` | - | - | - | chat |
| Mistral | `mistral/ministral-3-14b-2512` | - | - | - | chat |
| Black Forest Labs | `black_forest_labs/flux-kontext-pro` | - | - | - | image editing |
| Black Forest Labs | `black_forest_labs/flux-kontext-max` | - | - | - | image editing |
| Black Forest Labs | `black_forest_labs/flux-pro-1.0-fill` | - | - | - | image editing (inpaint) |
| Black Forest Labs | `black_forest_labs/flux-pro-1.0-expand` | - | - | - | image editing (outpaint) |
| Black Forest Labs | `black_forest_labs/flux-pro-1.1` | - | - | - | image generation |
| Black Forest Labs | `black_forest_labs/flux-pro-1.1-ultra` | - | - | - | image generation |
| Black Forest Labs | `black_forest_labs/flux-dev` | - | - | - | image generation |
| Black Forest Labs | `black_forest_labs/flux-pro` | - | - | - | image generation |
| Azure AI | `azure_ai/grok-4-1-fast-non-reasoning` | 131K | $0.20 | $0.50 | chat, tools |
| Azure AI | `azure_ai/grok-4-1-fast-reasoning` | 131K | $0.20 | $0.50 | chat, tools, reasoning |
| Azure AI | `azure_ai/mistral-document-ai-2512` | - | - | - | OCR |
| Dashscope | `dashscope/qwen3-next-80b-a3b-instruct` | 262K | $0.15 | $1.20 | chat |
| Dashscope | `dashscope/qwen3-next-80b-a3b-thinking` | 262K | $0.15 | $1.20 | chat, reasoning |
| Dashscope | `dashscope/qwen3-vl-235b-a22b-instruct` | 131K | $0.40 | $1.60 | chat, vision |
| Dashscope | `dashscope/qwen3-vl-235b-a22b-thinking` | 131K | $0.40 | $4.00 | chat, vision, reasoning |
| Dashscope | `dashscope/qwen3-vl-32b-instruct` | 131K | $0.16 | $0.64 | chat, vision |
| Dashscope | `dashscope/qwen3-vl-32b-thinking` | 131K | $0.16 | $2.87 | chat, vision, reasoning |
| Dashscope | `dashscope/qwen3-vl-plus` | 260K | - | - | chat, vision |
| Dashscope | `dashscope/qwen3.5-plus` | 992K | - | - | chat |
| Dashscope | `dashscope/qwen3-max-2026-01-23` | 258K | - | - | chat |
| Nebius AI | `nebius/deepseek-ai/DeepSeek-R1` | 128K | $0.80 | $2.40 | chat, reasoning |
| Nebius AI | `nebius/deepseek-ai/DeepSeek-R1-0528` | 164K | $0.80 | $2.40 | chat, reasoning |
| Nebius AI | `nebius/deepseek-ai/DeepSeek-V3` | 128K | $0.50 | $1.50 | chat |
| Nebius AI | `nebius/deepseek-ai/DeepSeek-V3-0324` | 128K | $0.50 | $1.50 | chat |
| Nebius AI | `nebius/deepseek-ai/DeepSeek-R1-Distill-Llama-70B` | 128K | $0.25 | $0.75 | chat |
| Nebius AI | `nebius/Qwen/Qwen3-235B-A22B` | 262K | $0.20 | $0.60 | chat |
| Nebius AI | `nebius/Qwen/Qwen3-32B` | 32K | $0.10 | $0.30 | chat |
| Nebius AI | `nebius/Qwen/Qwen3-30B-A3B` | 32K | $0.10 | $0.30 | chat |
| Nebius AI | `nebius/Qwen/Qwen3-14B` | 32K | $0.08 | $0.24 | chat |
| Nebius AI | `nebius/Qwen/Qwen3-4B` | 32K | $0.08 | $0.24 | chat |
| Nebius AI | `nebius/Qwen/QwQ-32B` | 32K | $0.15 | $0.45 | chat |
| Nebius AI | `nebius/Qwen/Qwen2.5-72B-Instruct` | 128K | $0.13 | $0.40 | chat |
| Nebius AI | `nebius/Qwen/Qwen2.5-32B-Instruct` | 128K | $0.06 | $0.20 | chat |
| Nebius AI | `nebius/Qwen/Qwen2.5-VL-72B-Instruct` | 131K | $0.13 | $0.40 | chat, vision |
| Nebius AI | `nebius/Qwen/Qwen2-VL-72B-Instruct` | 131K | $0.13 | $0.40 | chat, vision |
| Nebius AI | `nebius/Qwen/Qwen2-VL-7B-Instruct` | 131K | $0.02 | $0.06 | chat, vision |
| Nebius AI | `nebius/meta-llama/Meta-Llama-3.1-405B-Instruct` | 128K | $1.00 | $3.00 | chat |
| Nebius AI | `nebius/meta-llama/Meta-Llama-3.1-70B-Instruct` | 128K | $0.13 | $0.40 | chat |
| Nebius AI | `nebius/meta-llama/Meta-Llama-3.1-8B-Instruct` | 128K | $0.02 | $0.06 | chat |
| Nebius AI | `nebius/meta-llama/Llama-3.3-70B-Instruct` | 128K | $0.13 | $0.40 | chat |
| Nebius AI | `nebius/meta-llama/Llama-Guard-3-8B` | 128K | $0.02 | $0.06 | chat |
| Nebius AI | `nebius/nvidia/Llama-3.1-Nemotron-Ultra-253B-v1` | 128K | $0.60 | $1.80 | chat |
| Nebius AI | `nebius/nvidia/Llama-3.3-Nemotron-Super-49B-v1` | 131K | $0.10 | $0.40 | chat |
| Nebius AI | `nebius/NousResearch/Hermes-3-Llama-3.1-405B` | 128K | $1.00 | $3.00 | chat |
| Nebius AI | `nebius/google/gemma-3-27b-it` | 128K | $0.06 | $0.20 | chat |
| Nebius AI | `nebius/mistralai/Mistral-Nemo-Instruct-2407` | 128K | $0.04 | $0.12 | chat |
| Nebius AI | `nebius/Qwen/Qwen2.5-Coder-7B` | 32K | $0.01 | $0.03 | chat |
| Nebius AI | `nebius/BAAI/bge-en-icl` | 32K | $0.01 | - | embeddings |
| Nebius AI | `nebius/BAAI/bge-multilingual-gemma2` | 8K | $0.01 | - | embeddings |
| Nebius AI | `nebius/intfloat/e5-mistral-7b-instruct` | 32K | $0.01 | - | embeddings |
| AWS Bedrock | `mistral.devstral-2-123b` | 256K | $0.40 | $2.00 | chat, tools |
| AWS Bedrock | `zai.glm-4.7-flash` | 200K | $0.07 | $0.40 | chat, tools, reasoning |
| ZAI | `zai/glm-5` | 200K | $1.00 | $3.20 | chat, tools, reasoning |
| ZAI | `zai/glm-5-code` | 200K | $1.20 | $5.00 | chat, tools, reasoning |
| OpenRouter | `openrouter/anthropic/claude-sonnet-4.6` | - | - | - | chat |
| OpenRouter | `openrouter/google/gemini-3.1-pro-preview` | - | - | - | chat |
| OpenRouter | `openrouter/openai/gpt-5.1-codex-max` | - | - | - | chat |
| OpenRouter | `openrouter/qwen/qwen3-coder-plus` | - | - | - | chat |
| OpenRouter | `openrouter/qwen/qwen3.5-*` (5 models) | - | - | - | chat |
| OpenRouter | `openrouter/z-ai/glm-5` | - | - | - | chat |
| Together AI | `together_ai/Qwen/Qwen3.5-397B-A17B` | - | - | - | chat |
| Perplexity | `perplexity/pplx-embed-v1-0.6b` | 32K | $0.00 | - | embeddings |
| Perplexity | `perplexity/pplx-embed-v1-4b` | 32K | $0.03 | - | embeddings |
| Serper | `serper/search` | - | - | - | search |
#### Updated Models
- **[AWS Bedrock](../../docs/providers/bedrock)**
- Add `cache_read_input_token_cost` and `cache_creation_input_token_cost` to Bedrock-hosted Anthropic models (`claude-3-opus`, `claude-3-sonnet`, `claude-3-haiku`, and APAC/EU variants) — prompt caching is now tracked for cost estimation
- Rename `apac.anthropic.claude-sonnet-4-6``au.anthropic.claude-sonnet-4-6` to reflect correct regional identifier
- **[Azure OpenAI](../../docs/providers/azure)**
- Add `supports_none_reasoning_effort` to all `gpt-5.1-chat`, `gpt-5.1-codex`, and `gpt-5.4` variants (global, EU, standard deployments) — allows passing `reasoning_effort: null` to disable reasoning
- **[Azure OpenAI](../../docs/providers/azure)** — Removed deprecated models
- Remove `azure/gpt-35-turbo-0301` (deprecated 2025-02-13)
- Remove `azure/gpt-35-turbo-0613` (deprecated 2025-02-13)
#### Features
- **[OpenAI](../../docs/providers/openai)**
- Day 0 support for `gpt-5.4` and `gpt-5.4-pro` on OpenAI and Azure
- **[Google Gemini](../../docs/providers/gemini)**
- Add Gemini 3.x model cost map entries — `gemini-3-flash-preview`, `gemini-3.1-pro-preview`, `gemini-3.1-flash-lite-preview`, `gemini-3-pro-image-preview`, `gemini-embedding-2-preview`
- Add Gemini 2.0 Flash and Flash Lite to cost map (re-added with updated pricing)
- **[Google Vertex AI](../../docs/providers/vertex)**
- Add `gemini-3-flash-preview`, `gemini-3.1-flash-lite-preview`, `gemini-flash-experimental`, and `gemini-embedding-2-preview` to Vertex AI model cost map
- **[Mistral](../../docs/providers/mistral)**
- Add Magistral reasoning models (`magistral-medium-1-2-2509`, `magistral-small-1-2-2509`)
- Add `mistral-large-2512`, `mistral-medium-3-1-2508`, `mistral-small-3-2-2506`, `ministral-3-*` variants
- **[Dashscope / Qwen](../../docs/providers/dashscope)**
- Add Qwen3 VL multimodal models (`qwen3-vl-235b`, `qwen3-vl-32b` — instruct and thinking variants)
- Add `qwen3-next-80b-a3b` (instruct + thinking), `qwen3.5-plus`, `qwen3-max-2026-01-23`
- **[Black Forest Labs](../../docs/providers/black_forest_labs)**
- Add FLUX Kontext image editing models (`flux-kontext-pro`, `flux-kontext-max`)
- Add FLUX Pro 1.0 Fill (inpainting) and Expand (outpainting)
- Add `flux-pro-1.1`, `flux-pro-1.1-ultra`, `flux-dev`, `flux-pro`
- **[Azure AI](../../docs/providers/azure_ai)**
- Add xAI Grok models via Azure AI Foundry (`grok-4-1-fast-non-reasoning`, `grok-4-1-fast-reasoning`)
- Add Mistral Document AI (`mistral-document-ai-2512`) — OCR mode
- **[AWS Bedrock](../../docs/providers/bedrock)**
- Add `mistral.devstral-2-123b` (256K context, tools)
- Add `zai.glm-4.7-flash` via Bedrock Converse (200K context, tools, reasoning)
- **[SageMaker](../../docs/providers/aws_sagemaker)**
- Add `sagemaker_nova` provider for Amazon Nova models on SageMaker - [PR #21542](https://github.com/BerriAI/litellm/pull/21542)
#### Deprecated / Removed Models
**OpenAI** — Legacy models removed from cost map:
- `gpt-3.5-turbo-0301`, `gpt-3.5-turbo-0613`, `gpt-3.5-turbo-16k-0613`
- `gpt-4-0314`, `gpt-4-32k`, `gpt-4-32k-0314`, `gpt-4-32k-0613`, `gpt-4-1106-vision-preview`, `gpt-4-vision-preview`
- `gpt-4.5-preview`, `gpt-4.5-preview-2025-02-27`
- `gpt-4o-audio-preview-2024-10-01`, `gpt-4o-realtime-preview-2024-10-01`
- `o1-mini`, `o1-mini-2024-09-12`, `o1-preview`, `o1-preview-2024-09-12`
**Google Gemini** — Gemini 1.5 and legacy 2.0 variants removed:
- All `gemini-1.5-*` variants (flash, flash-8b, pro, and dated versions)
- `gemini-2.0-flash-exp`, `gemini-2.0-pro-exp-02-05`, `gemini-2.5-flash-preview-04-17`, `gemini-2.5-flash-preview-05-20`
**Google Vertex AI** — PaLM 2 / legacy models removed:
- All `chat-bison`, `text-bison`, `codechat-bison`, `code-bison`, `code-gecko` variants
- Gemini 1.0 Pro, 1.5 Flash/Pro, 2.0 Flash experimental, and preview variants
**Perplexity** — Legacy Llama-sonar models removed:
- `llama-3.1-sonar-huge-128k-online`, `llama-3.1-sonar-large/small-128k-chat/online`
---
## LLM API Endpoints
#### Features
- **[Responses API](../../docs/response_api)**
- Handle `response.failed`, `response.incomplete`, and `response.cancelled` terminal event types in background streaming — previously only `response.completed` was handled - [PR #23492](https://github.com/BerriAI/litellm/pull/23492)
#### Bug Fixes
- **[Anthropic](../../docs/providers/anthropic)**
- Preserve native tool format (web_search, bash, tool_search, etc.) when guardrails convert tools for the Anthropic Messages API - [PR #23526](https://github.com/BerriAI/litellm/pull/23526)
- **[Moonshot / Kimi](../../docs/providers/openai_compatible)**
- Auto-fill `reasoning_content` for Moonshot Kimi reasoning models - [PR #23580](https://github.com/BerriAI/litellm/pull/23580)
- **[HuggingFace](../../docs/providers/huggingface)**
- Forward `extra_headers` to HuggingFace embedding API - [PR #23525](https://github.com/BerriAI/litellm/pull/23525)
- **General**
- Normalize `content_filtered` finish reason across providers - [PR #23564](https://github.com/BerriAI/litellm/pull/23564)
- Fix custom cost tracking on deployments for `/v1/messages` and `/v1/responses` - [PR #23647](https://github.com/BerriAI/litellm/pull/23647)
- Fix per-request custom pricing when `router_model_id` has no pricing data — now falls back to model name
---
## Management Endpoints / UI
#### Features
- **Virtual Keys**
- Add Organization dropdown to Create/Edit Key form — `organization_id` is now a first-class field in Key Ownership - [PR #23595](https://github.com/BerriAI/litellm/pull/23595)
- Allow setting `organization_id` on `/key/update` — keys can be assigned or moved to a different organization after creation - [PR #23557](https://github.com/BerriAI/litellm/pull/23557)
- **Internal Users**
- Add/Remove Team Membership directly from the Internal Users info page — includes searchable dropdown and role selector; no longer requires navigating to each team - [PR #23638](https://github.com/BerriAI/litellm/pull/23638)
- **Default Team Settings**
- Modernize page to antd (consistent with rest of app) - [PR #23614](https://github.com/BerriAI/litellm/pull/23614)
- Fix: default team params (budget, duration, tpm, rpm, permissions) now correctly applied on `/team/new` - [PR #23614](https://github.com/BerriAI/litellm/pull/23614)
- Fix: settings persist across proxy restarts (`default_team_params` added to `LITELLM_SETTINGS_SAFE_DB_OVERRIDES`) - [PR #23614](https://github.com/BerriAI/litellm/pull/23614)
- Fix: resolved race condition in `_update_litellm_setting` where `get_config()` could overwrite freshly saved values - [PR #23614](https://github.com/BerriAI/litellm/pull/23614)
- **Usage**
- Auto-paginate daily spend data — all entity views (teams, orgs, customers, tags, agents, users) fetch pages progressively with charts updating after each page - [PR #23622](https://github.com/BerriAI/litellm/pull/23622)
- **Models / Cost**
- Azure Model Router cost breakdown in UI — show per-sub-model `additional_costs` from `hidden_params` in `CostBreakdownViewer` - [PR #23550](https://github.com/BerriAI/litellm/pull/23550)
- **User Management**
- New `/user/info/v2` endpoint — scoped, paginated replacement for the existing god endpoint that caused memory and stability issues on large installs - [PR #23437](https://github.com/BerriAI/litellm/pull/23437)
#### Bugs
- Fix Tag list endpoint returning 500 due to invalid Prisma `group_by` kwargs - [PR #23606](https://github.com/BerriAI/litellm/pull/23606)
- Fix Team Admin getting 403 on `/user/filter/ui` when `scope_user_search_to_org` is enabled - [PR #23671](https://github.com/BerriAI/litellm/pull/23671)
- Fix Public Model Hub not showing config-defined models after save - [PR #23501](https://github.com/BerriAI/litellm/pull/23501)
- Fix fallback popup model dropdown z-index issue - [PR #23516](https://github.com/BerriAI/litellm/pull/23516)
- Fix double-counting bug in org/team key limit checks on `/key/update`
---
## AI Integrations
### Logging
- **[Vantage](https://vantage.sh)**
- Add Vantage integration for FOCUS 1.2 CSV export — export LiteLLM proxy spend data as FinOps Open Cost & Usage Specification reports, with time-windowed filenames to prevent overwrites - [PR #23333](https://github.com/BerriAI/litellm/pull/23333)
- **General**
- Fix silent metrics race condition causing metric collision across experiments - [PR #23542](https://github.com/BerriAI/litellm/pull/23542)
### Guardrails
No major guardrail changes in this release.
### Prompt Management
No major prompt management changes in this release.
### Secret Managers
No major secret manager changes in this release.
---
## Performance / Loadbalancing / Reliability improvements
- **Fix streaming crashes after ~1 hour**`LLMClientCache._remove_key()` no longer calls `close()`/`aclose()` on evicted HTTP/SDK clients. In-flight requests were crashing with `RuntimeError: Cannot send a request, as the client has been closed.` after the 1-hour TTL expired. Cleanup now happens only at shutdown via `close_litellm_async_clients()` - [PR #22926](https://github.com/BerriAI/litellm/pull/22926)
- **Fix OOM / Prisma connection loss** on large installs — unbounded managed-object poll was exhausting Prisma connections after ~6070 minutes on instances with 336K+ queued response rows - [PR #23472](https://github.com/BerriAI/litellm/pull/23472)
- **Centralize logging kwarg updates** — root cause fix migrating all logging updates to a single function, eliminating kwarg inconsistencies across logging paths - [PR #23659](https://github.com/BerriAI/litellm/pull/23659)
- **Fix tiktoken cache for non-root offline containers** — tiktoken cache now works correctly in offline environments running as non-root users - [PR #23498](https://github.com/BerriAI/litellm/pull/23498)
- **Add CodSpeed continuous performance benchmarks** — automated performance regression tracking on CI - [PR #23676](https://github.com/BerriAI/litellm/pull/23676)
---
## Security
- **Secret redaction in proxy logs** — Adds a `SecretRedactionFilter` to all LiteLLM loggers that scrubs API keys, tokens, and credentials from log messages, format args, exception tracebacks, and extra fields. Enabled by default; opt out with `LITELLM_DISABLE_REDACT_SECRETS=true` - [PR #23668](https://github.com/BerriAI/litellm/pull/23668), [PR #23667](https://github.com/BerriAI/litellm/pull/23667)
- **Bump PyJWT to `^2.12.0`** — addresses security vulnerability in `^2.10.1` - [PR #23678](https://github.com/BerriAI/litellm/pull/23678)
- **Bump `tar` to 7.5.11 and `tornado` to 6.5.5** — addresses CVEs in transitive dependencies - [PR #23602](https://github.com/BerriAI/litellm/pull/23602)
---
## Database / Proxy Operations
- **Fix Prisma migrate deploy on pre-existing instances** — resolved multiple bugs in migration recovery logic: missing return in the P3018 idempotent error handler and unhandled exceptions in `_roll_back_migration` that caused silent failures even after successful recovery - [PR #23655](https://github.com/BerriAI/litellm/pull/23655)
- **Make DB migration failure exit opt-in** — proxy no longer exits on `prisma migrate deploy` failure by default; enable with `--enforce_prisma_migration_check` - [PR #23675](https://github.com/BerriAI/litellm/pull/23675)
---
## New Contributors
* @ryanh-ai made their first contribution in [PR #21542](https://github.com/BerriAI/litellm/pull/21542)
* @ryan-crabbe made their first contribution in [PR #23668](https://github.com/BerriAI/litellm/pull/23668)
* @Jah-yee made their first contribution in [PR #23525](https://github.com/BerriAI/litellm/pull/23525)
* @gambletan made their first contribution in [PR #23516](https://github.com/BerriAI/litellm/pull/23516)
* @awais786 made their first contribution in [PR #23183](https://github.com/BerriAI/litellm/pull/23183)
* @pradyyadav made their first contribution in [PR #23580](https://github.com/BerriAI/litellm/pull/23580)
* @xianzongxie-stripe made their first contribution in [PR #23492](https://github.com/BerriAI/litellm/pull/23492)
* @Harshit28j made their first contribution in [PR #23333](https://github.com/BerriAI/litellm/pull/23333)
* @codspeed-hq[bot] made their first contribution in [PR #23676](https://github.com/BerriAI/litellm/pull/23676)
---
## Diff Summary
## 03/16/2026
* New Providers: 5
* New Models / Updated Models: 116 new, 132 removed
* LLM API Endpoints: 5
* Management Endpoints / UI: 11
* AI Integrations: 2
* Performance / Reliability: 5
* Security: 3
* Database / Proxy Operations: 2
---
## Full Changelog
[v1.82.0-stable...v1.82.3-stable](https://github.com/BerriAI/litellm/compare/v1.82.0-stable...v1.82.3-stable)

View File

@ -471,7 +471,9 @@
"audio_speech": false,
"moderations": false,
"batches": false,
"rerank": false
"rerank": false,
"a2a": false,
"interactions": false
}
},
"chutes": {
@ -1484,12 +1486,12 @@
}
},
"nebius": {
"display_name": "Nebius AI (`nebius`)",
"display_name": "Nebius AI Studio (`nebius`)",
"url": "https://docs.litellm.ai/docs/providers/nebius",
"endpoints": {
"chat_completions": true,
"messages": false,
"responses": false,
"messages": true,
"responses": true,
"embeddings": true,
"image_generations": false,
"audio_transcriptions": false,
@ -1497,7 +1499,8 @@
"moderations": false,
"batches": false,
"rerank": false,
"a2a": false
"a2a": true,
"interactions": true
}
},
"nlp_cloud": {
@ -2078,20 +2081,9 @@
},
"serper": {
"display_name": "Serper (`serper`)",
"url": "https://docs.litellm.ai/docs/providers/serper",
"url": "https://docs.litellm.ai/docs/search/serper",
"endpoints": {
"chat_completions": false,
"messages": false,
"responses": false,
"embeddings": false,
"image_generations": false,
"audio_transcriptions": false,
"audio_speech": false,
"moderations": false,
"batches": false,
"rerank": false,
"search": true,
"a2a": false
"search": true
}
},
"triton": {
@ -2273,12 +2265,12 @@
}
},
"zai": {
"display_name": "ZAI (`zai`)",
"url": "https://docs.litellm.ai/docs/providers/openai_compatible",
"display_name": "Z.AI (Zhipu AI) (`zai`)",
"url": "https://docs.litellm.ai/docs/providers/zai",
"endpoints": {
"chat_completions": true,
"messages": false,
"responses": false,
"messages": true,
"responses": true,
"embeddings": false,
"image_generations": false,
"audio_transcriptions": false,
@ -2286,7 +2278,8 @@
"moderations": false,
"batches": false,
"rerank": false,
"a2a": false
"a2a": true,
"interactions": true
}
},
"ragflow": {
@ -2558,25 +2551,23 @@
"audio_speech": false,
"moderations": false,
"batches": false,
"rerank": false,
"a2a": false
"rerank": false
}
},
"sagemaker_nova": {
"display_name": "AWS SageMaker Nova (`sagemaker_nova`)",
"url": "https://docs.litellm.ai/docs/providers/aws_sagemaker",
"charity_engine": {
"display_name": "Charity Engine (`charity_engine`)",
"url": "https://docs.litellm.ai/docs/providers/charity_engine",
"endpoints": {
"chat_completions": true,
"messages": false,
"responses": false,
"messages": true,
"responses": true,
"embeddings": false,
"image_generations": false,
"audio_transcriptions": false,
"audio_speech": false,
"moderations": false,
"batches": false,
"rerank": false,
"a2a": false
"rerank": false
}
}
},