revert proxy config
This commit is contained in:
parent
77d48e739d
commit
0543c59af6
@ -1,7 +1,231 @@
|
||||
model_list:
|
||||
# Gemini 2.5 Flash Native Audio (Latest - recommended)
|
||||
- model_name: gpt-5.3-codex
|
||||
- model_name: gpt-3.5-turbo-end-user-test
|
||||
litellm_params:
|
||||
model: openai/gpt-5.3-codex
|
||||
model: gpt-3.5-turbo
|
||||
region_name: "eu"
|
||||
model_info:
|
||||
id: "1"
|
||||
- model_name: gpt-3.5-turbo-end-user-test
|
||||
litellm_params:
|
||||
model: openai/gpt-4.1-mini
|
||||
api_key: os.environ/OPENAI_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
||||
- model_name: gpt-3.5-turbo
|
||||
litellm_params:
|
||||
model: openai/gpt-4.1-mini
|
||||
api_key: os.environ/OPENAI_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
||||
- model_name: gpt-3.5-turbo-large
|
||||
litellm_params:
|
||||
model: "gpt-3.5-turbo-1106"
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
rpm: 480
|
||||
timeout: 300
|
||||
stream_timeout: 60
|
||||
- model_name: gpt-4
|
||||
litellm_params:
|
||||
model: openai/gpt-4.1-mini
|
||||
api_key: os.environ/OPENAI_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
|
||||
rpm: 480
|
||||
timeout: 300
|
||||
stream_timeout: 60
|
||||
- model_name: sagemaker-completion-model
|
||||
litellm_params:
|
||||
model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4
|
||||
input_cost_per_second: 0.000420
|
||||
- model_name: text-embedding-ada-002
|
||||
litellm_params:
|
||||
model: openai/text-embedding-ada-002
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
model_info:
|
||||
mode: embedding
|
||||
base_model: text-embedding-ada-002
|
||||
- model_name: dall-e-2 # some tests use dall-e-2 which is now deprecated, alias to dall-e-3
|
||||
litellm_params:
|
||||
model: openai/dall-e-3
|
||||
- model_name: openai-dall-e-3
|
||||
litellm_params:
|
||||
model: dall-e-3
|
||||
- model_name: fake-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/gpt-3.5-turbo
|
||||
api_key: fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
- model_name: fake-openai-endpoint-2
|
||||
litellm_params:
|
||||
model: openai/my-fake-model
|
||||
api_key: my-fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
stream_timeout: 0.001
|
||||
rpm: 1
|
||||
- model_name: fake-openai-endpoint-3
|
||||
litellm_params:
|
||||
model: openai/my-fake-model
|
||||
api_key: my-fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
stream_timeout: 0.001
|
||||
rpm: 1000
|
||||
- model_name: fake-openai-endpoint-4
|
||||
litellm_params:
|
||||
model: openai/my-fake-model
|
||||
api_key: my-fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
num_retries: 50
|
||||
- model_name: fake-openai-endpoint-3
|
||||
litellm_params:
|
||||
model: openai/my-fake-model-2
|
||||
api_key: my-fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
stream_timeout: 0.001
|
||||
rpm: 1000
|
||||
- model_name: bad-model
|
||||
litellm_params:
|
||||
model: openai/bad-model
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
mock_timeout: True
|
||||
timeout: 60
|
||||
rpm: 1000
|
||||
model_info:
|
||||
health_check_timeout: 1
|
||||
- model_name: good-model
|
||||
litellm_params:
|
||||
model: openai/bad-model
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
rpm: 1000
|
||||
model_info:
|
||||
health_check_timeout: 1
|
||||
- model_name: "*"
|
||||
litellm_params:
|
||||
model: openai/*
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
- model_name: realtime-v1
|
||||
litellm_params:
|
||||
model: azure/gpt-realtime-20250828-standard
|
||||
api_version: "2025-08-28"
|
||||
realtime_protocol: GA # Possible values: "GA"/ "v1", "beta"
|
||||
|
||||
- model_name: realtime-beta
|
||||
litellm_params:
|
||||
model: azure/gpt-realtime-20250828-standard
|
||||
api_version: 2025-04-01-preview
|
||||
|
||||
|
||||
# provider specific wildcard routing
|
||||
- model_name: "anthropic/*"
|
||||
litellm_params:
|
||||
model: "anthropic/*"
|
||||
api_key: os.environ/ANTHROPIC_API_KEY
|
||||
- model_name: "bedrock/*"
|
||||
litellm_params:
|
||||
model: "bedrock/*"
|
||||
- model_name: "groq/*"
|
||||
litellm_params:
|
||||
model: "groq/*"
|
||||
api_key: os.environ/GROQ_API_KEY
|
||||
- model_name: mistral-embed
|
||||
litellm_params:
|
||||
model: mistral/mistral-embed
|
||||
- model_name: gpt-instruct # [PROD TEST] - tests if `/health` automatically infers this to be a text completion model
|
||||
litellm_params:
|
||||
model: text-completion-openai/gpt-3.5-turbo-instruct
|
||||
- model_name: fake-openai-endpoint-5
|
||||
litellm_params:
|
||||
model: openai/my-fake-model
|
||||
api_key: my-fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||
timeout: 1
|
||||
- model_name: badly-configured-openai-endpoint
|
||||
litellm_params:
|
||||
model: openai/my-fake-model
|
||||
api_key: my-fake-key
|
||||
api_base: https://exampleopenaiendpoint-production.up.railway.appxxxx/
|
||||
- model_name: gemini-1.5-flash
|
||||
litellm_params:
|
||||
model: gemini/gemini-1.5-flash
|
||||
api_key: os.environ/GOOGLE_API_KEY
|
||||
- model_name: gpt-4o
|
||||
litellm_params:
|
||||
model: gpt-4o
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
|
||||
litellm_settings:
|
||||
# set_verbose: True # Uncomment this if you want to see verbose logs; not recommended in production
|
||||
drop_params: True
|
||||
success_callback: ["prometheus"]
|
||||
# max_budget: 100
|
||||
# budget_duration: 30d
|
||||
num_retries: 5
|
||||
request_timeout: 600
|
||||
telemetry: False
|
||||
context_window_fallbacks: [{"gpt-3.5-turbo": ["gpt-3.5-turbo-large"]}]
|
||||
default_team_settings:
|
||||
- team_id: team-1
|
||||
success_callback: ["langfuse"]
|
||||
failure_callback: ["langfuse"]
|
||||
langfuse_public_key: os.environ/LANGFUSE_PROJECT1_PUBLIC # Project 1
|
||||
langfuse_secret: os.environ/LANGFUSE_PROJECT1_SECRET # Project 1
|
||||
- team_id: team-2
|
||||
success_callback: ["langfuse"]
|
||||
failure_callback: ["langfuse"]
|
||||
langfuse_public_key: os.environ/LANGFUSE_PROJECT2_PUBLIC # Project 2
|
||||
langfuse_secret: os.environ/LANGFUSE_PROJECT2_SECRET # Project 2
|
||||
langfuse_host: https://us.cloud.langfuse.com
|
||||
# cache: true # [OPTIONAL] use for caching responses
|
||||
# enable_caching_on_provider_specific_optional_params: True # Include provider-specific params in cache keys
|
||||
# cache_params: # And for shared health check
|
||||
# type: redis
|
||||
# host: localhost
|
||||
# port: 6379
|
||||
|
||||
# For /fine_tuning/jobs endpoints
|
||||
finetune_settings:
|
||||
- custom_llm_provider: azure
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2023-03-15-preview"
|
||||
- custom_llm_provider: openai
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
# for /files endpoints
|
||||
files_settings:
|
||||
- custom_llm_provider: azure
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_version: "2023-03-15-preview"
|
||||
- custom_llm_provider: openai
|
||||
api_key: os.environ/OPENAI_API_KEY
|
||||
|
||||
router_settings:
|
||||
routing_strategy: usage-based-routing-v2
|
||||
redis_host: os.environ/REDIS_HOST
|
||||
redis_password: os.environ/REDIS_PASSWORD
|
||||
redis_port: os.environ/REDIS_PORT
|
||||
enable_pre_call_checks: true
|
||||
model_group_alias: {"my-special-fake-model-alias-name": "fake-openai-endpoint-3"}
|
||||
|
||||
general_settings:
|
||||
master_key: sk-1234 # [OPTIONAL] Use to enforce auth on proxy. See - https://docs.litellm.ai/docs/proxy/virtual_keys
|
||||
store_model_in_db: True
|
||||
proxy_budget_rescheduler_min_time: 60
|
||||
proxy_budget_rescheduler_max_time: 64
|
||||
proxy_batch_write_at: 1
|
||||
database_connection_pool_limit: 10
|
||||
# background_health_checks: true
|
||||
# use_shared_health_check: true
|
||||
# health_check_interval: 30
|
||||
# database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy
|
||||
|
||||
pass_through_endpoints:
|
||||
- path: "/v1/rerank" # route you want to add to LiteLLM Proxy Server
|
||||
target: "https://api.cohere.com/v1/rerank" # URL this route should forward requests to
|
||||
headers: # headers to forward to this URL
|
||||
content-type: application/json # (Optional) Extra Headers to pass to this endpoint
|
||||
accept: application/json
|
||||
forward_headers: True
|
||||
|
||||
# environment_variables:
|
||||
# settings for using redis caching
|
||||
# REDIS_HOST: redis-16337.c322.us-east-1-2.ec2.cloud.redislabs.com
|
||||
# REDIS_PORT: "16337"
|
||||
# REDIS_PASSWORD:
|
||||
Loading…
Reference in New Issue
Block a user