* test(ci): record/replay OpenAI image gen so the spend E2E isn't outage-bound The dockerized spend test test_key_info_spend_values_image_generation curls the proxy for a gpt-image-1 image, which wildcard-routes to real api.openai.com on every commit; an OpenAI outage then reddens unrelated PRs and each run pays for an image. Add an in-repo record/replay reverse proxy (tests/_openai_record_replay_proxy.py) that sits between the proxy and OpenAI. The first run, and the first after the recording lapses, records live; subsequent runs replay from the shared Redis cassette store. The proxy keeps its real separate-process HTTP topology; only the image model's api_base is pointed at the recorder in CI via IMAGE_GEN_RECORDER_BASE_URL, which is unset elsewhere so it falls back to api.openai.com. Recordings lapse 24h after write and are never refreshed on read, matching the VCR persister contract, so provider drift is still caught. Replayed responses drop upstream framing/server headers (content-length, transfer-encoding, content-encoding, date, server) so the re-serving layer recomputes them, honoring the Bedrock content-length lesson. * test(ci): close recorder http client on app shutdown Add a Starlette lifespan that closes the self-created httpx.AsyncClient on teardown, and leave caller-injected clients untouched so reuse across create_app calls is not broken. Covers the unclosed-client ResourceWarning raised in review.
240 lines
8.7 KiB
YAML
240 lines
8.7 KiB
YAML
model_list:
|
|
- model_name: gpt-5-mini-end-user-test
|
|
litellm_params:
|
|
model: gpt-5-mini
|
|
region_name: "eu"
|
|
model_info:
|
|
id: "1"
|
|
- model_name: gpt-5-mini-end-user-test
|
|
litellm_params:
|
|
model: openai/gpt-5-mini
|
|
api_key: os.environ/OPENAI_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
|
- model_name: gpt-3.5-turbo
|
|
litellm_params:
|
|
model: openai/gpt-4.1-mini
|
|
api_key: os.environ/OPENAI_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
|
- model_name: gpt-3.5-turbo-large
|
|
litellm_params:
|
|
model: "gpt-4.1"
|
|
api_key: os.environ/OPENAI_API_KEY
|
|
rpm: 480
|
|
timeout: 300
|
|
stream_timeout: 60
|
|
- model_name: gpt-4
|
|
litellm_params:
|
|
model: openai/gpt-4.1
|
|
api_key: os.environ/OPENAI_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
|
rpm: 480
|
|
timeout: 300
|
|
stream_timeout: 60
|
|
- model_name: sagemaker-completion-model
|
|
litellm_params:
|
|
model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4
|
|
input_cost_per_second: 0.000420
|
|
- model_name: text-embedding-ada-002
|
|
litellm_params:
|
|
model: openai/text-embedding-3-small
|
|
api_key: os.environ/OPENAI_API_KEY
|
|
model_info:
|
|
mode: embedding
|
|
base_model: text-embedding-3-small
|
|
- model_name: dall-e-2 # dall-e-2 and dall-e-3 were deprecated 2026-05-12; alias to gpt-image-1
|
|
litellm_params:
|
|
model: openai/gpt-image-1
|
|
- model_name: openai-dall-e-3 # dall-e-3 deprecated 2026-05-12; underlying now gpt-image-1
|
|
litellm_params:
|
|
model: gpt-image-1
|
|
# In CI, IMAGE_GEN_RECORDER_BASE_URL points this at the record/replay proxy
|
|
# (tests/_openai_record_replay_proxy.py) so the image spend E2E doesn't depend
|
|
# on OpenAI's uptime every commit. Unset elsewhere, so it resolves to None and
|
|
# falls back to api.openai.com.
|
|
- model_name: gpt-image-1
|
|
litellm_params:
|
|
model: openai/gpt-image-1
|
|
api_key: os.environ/OPENAI_API_KEY
|
|
api_base: os.environ/IMAGE_GEN_RECORDER_BASE_URL
|
|
- model_name: fake-openai-endpoint
|
|
litellm_params:
|
|
model: openai/gpt-5-mini
|
|
api_key: fake-key
|
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
- model_name: fake-openai-endpoint-2
|
|
litellm_params:
|
|
model: openai/my-fake-model
|
|
api_key: my-fake-key
|
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
stream_timeout: 0.001
|
|
rpm: 1
|
|
- model_name: fake-openai-endpoint-3
|
|
litellm_params:
|
|
model: openai/my-fake-model
|
|
api_key: my-fake-key
|
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
stream_timeout: 0.001
|
|
rpm: 1000
|
|
- model_name: fake-openai-endpoint-4
|
|
litellm_params:
|
|
model: openai/my-fake-model
|
|
api_key: my-fake-key
|
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
num_retries: 50
|
|
- model_name: fake-openai-endpoint-3
|
|
litellm_params:
|
|
model: openai/my-fake-model-2
|
|
api_key: my-fake-key
|
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
stream_timeout: 0.001
|
|
rpm: 1000
|
|
- model_name: bad-model
|
|
litellm_params:
|
|
model: openai/bad-model
|
|
api_key: os.environ/OPENAI_API_KEY
|
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
mock_timeout: True
|
|
timeout: 60
|
|
rpm: 1000
|
|
model_info:
|
|
health_check_timeout: 1
|
|
- model_name: good-model
|
|
litellm_params:
|
|
model: openai/bad-model
|
|
api_key: os.environ/OPENAI_API_KEY
|
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
rpm: 1000
|
|
model_info:
|
|
health_check_timeout: 1
|
|
- model_name: "*"
|
|
litellm_params:
|
|
model: openai/*
|
|
api_key: os.environ/OPENAI_API_KEY
|
|
- model_name: realtime-v1
|
|
litellm_params:
|
|
model: azure/gpt-realtime-20250828-standard
|
|
api_version: "2025-08-28"
|
|
realtime_protocol: GA # Possible values: "GA"/ "v1", "beta"
|
|
|
|
- model_name: realtime-beta
|
|
litellm_params:
|
|
model: azure/gpt-realtime-20250828-standard
|
|
api_version: 2025-04-01-preview
|
|
|
|
|
|
# provider specific wildcard routing
|
|
- model_name: "anthropic/*"
|
|
litellm_params:
|
|
model: "anthropic/*"
|
|
api_key: os.environ/ANTHROPIC_API_KEY
|
|
- model_name: "bedrock/*"
|
|
litellm_params:
|
|
model: "bedrock/*"
|
|
- model_name: "groq/*"
|
|
litellm_params:
|
|
model: "groq/*"
|
|
api_key: os.environ/GROQ_API_KEY
|
|
- model_name: mistral-embed
|
|
litellm_params:
|
|
model: mistral/mistral-embed
|
|
- model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model
|
|
litellm_params:
|
|
model: text-completion-openai/gpt-3.5-turbo-instruct
|
|
- model_name: fake-openai-endpoint-5
|
|
litellm_params:
|
|
model: openai/my-fake-model
|
|
api_key: my-fake-key
|
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
|
timeout: 1
|
|
- model_name: badly-configured-openai-endpoint
|
|
litellm_params:
|
|
model: openai/my-fake-model
|
|
api_key: my-fake-key
|
|
api_base: https://exampleopenaiendpoint-production.up.railway.appxxxx/
|
|
- model_name: gemini-2.5-flash
|
|
litellm_params:
|
|
model: gemini/gemini-2.5-flash
|
|
api_key: os.environ/GOOGLE_API_KEY
|
|
- model_name: gpt-5.5
|
|
litellm_params:
|
|
model: gpt-5.5
|
|
api_key: os.environ/OPENAI_API_KEY
|
|
|
|
|
|
litellm_settings:
|
|
# set_verbose: True # Uncomment this if you want to see verbose logs; not recommended in production
|
|
drop_params: True
|
|
success_callback: ["prometheus"]
|
|
# max_budget: 100
|
|
# budget_duration: 30d
|
|
num_retries: 5
|
|
request_timeout: 600
|
|
telemetry: False
|
|
context_window_fallbacks: [{"gpt-3.5-turbo": ["gpt-3.5-turbo-large"]}]
|
|
default_team_settings:
|
|
- team_id: team-1
|
|
success_callback: ["langfuse"]
|
|
failure_callback: ["langfuse"]
|
|
langfuse_public_key: os.environ/LANGFUSE_PROJECT1_PUBLIC # Project 1
|
|
langfuse_secret: os.environ/LANGFUSE_PROJECT1_SECRET # Project 1
|
|
- team_id: team-2
|
|
success_callback: ["langfuse"]
|
|
failure_callback: ["langfuse"]
|
|
langfuse_public_key: os.environ/LANGFUSE_PROJECT2_PUBLIC # Project 2
|
|
langfuse_secret: os.environ/LANGFUSE_PROJECT2_SECRET # Project 2
|
|
langfuse_host: https://us.cloud.langfuse.com
|
|
# cache: true # [OPTIONAL] use for caching responses
|
|
# enable_caching_on_provider_specific_optional_params: True # Include provider-specific params in cache keys
|
|
# cache_params: # And for shared health check
|
|
# type: redis
|
|
# host: localhost
|
|
# port: 6379
|
|
|
|
# For /fine_tuning/jobs endpoints
|
|
finetune_settings:
|
|
- custom_llm_provider: azure
|
|
api_base: os.environ/AZURE_API_BASE
|
|
api_key: os.environ/AZURE_API_KEY
|
|
api_version: "2023-03-15-preview"
|
|
- custom_llm_provider: openai
|
|
api_key: os.environ/OPENAI_API_KEY
|
|
|
|
# for /files endpoints
|
|
files_settings:
|
|
- custom_llm_provider: azure
|
|
api_base: os.environ/AZURE_API_BASE
|
|
api_key: os.environ/AZURE_API_KEY
|
|
api_version: "2023-03-15-preview"
|
|
- custom_llm_provider: openai
|
|
api_key: os.environ/OPENAI_API_KEY
|
|
|
|
router_settings:
|
|
routing_strategy: usage-based-routing-v2
|
|
redis_host: os.environ/REDIS_HOST
|
|
redis_password: os.environ/REDIS_PASSWORD
|
|
redis_port: os.environ/REDIS_PORT
|
|
enable_pre_call_checks: true
|
|
model_group_alias: {"my-special-fake-model-alias-name": "fake-openai-endpoint-3"}
|
|
|
|
general_settings:
|
|
master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys
|
|
store_model_in_db: True
|
|
proxy_budget_rescheduler_min_time: 60
|
|
proxy_budget_rescheduler_max_time: 64
|
|
proxy_batch_write_at: 1
|
|
database_connection_pool_limit: 10
|
|
# background_health_checks: true
|
|
# use_shared_health_check: true
|
|
# health_check_interval: 30
|
|
# database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy
|
|
|
|
pass_through_endpoints:
|
|
- path: "/v1/rerank" # route you want to add to LiteLLM Proxy Server
|
|
target: "https://api.cohere.com/v1/rerank" # URL this route should forward requests to
|
|
headers: # headers to forward to this URL
|
|
content-type: application/json # (Optional) Extra Headers to pass to this endpoint
|
|
accept: application/json
|
|
forward_headers: True
|
|
|
|
# environment_variables:
|
|
# settings for using redis caching
|
|
# REDIS_HOST: redis-16337.c322.us-east-1-2.ec2.cloud.redislabs.com
|
|
# REDIS_PORT: "16337"
|
|
# REDIS_PASSWORD: |