diff --git a/enterprise/dist/litellm_enterprise-0.1.6-py3-none-any.whl b/enterprise/dist/litellm_enterprise-0.1.6-py3-none-any.whl
new file mode 100644
index 0000000000..c212c7e5a3
Binary files /dev/null and b/enterprise/dist/litellm_enterprise-0.1.6-py3-none-any.whl differ
diff --git a/enterprise/dist/litellm_enterprise-0.1.6.tar.gz b/enterprise/dist/litellm_enterprise-0.1.6.tar.gz
new file mode 100644
index 0000000000..698a9da209
Binary files /dev/null and b/enterprise/dist/litellm_enterprise-0.1.6.tar.gz differ
diff --git a/litellm/constants.py b/litellm/constants.py
index 43a8eba3c1..fba57a2bf9 100644
--- a/litellm/constants.py
+++ b/litellm/constants.py
@@ -282,6 +282,46 @@ OPENAI_CHAT_COMPLETION_PARAMS = [
"web_search_options",
]
+DEFAULT_CHAT_COMPLETION_PARAM_VALUES = {
+ "functions": None,
+ "function_call": None,
+ "temperature": None,
+ "top_p": None,
+ "n": None,
+ "stream": None,
+ "stream_options": None,
+ "stop": None,
+ "max_tokens": None,
+ "max_completion_tokens": None,
+ "modalities": None,
+ "prediction": None,
+ "audio": None,
+ "presence_penalty": None,
+ "frequency_penalty": None,
+ "logit_bias": None,
+ "user": None,
+ "model": None,
+ "custom_llm_provider": "",
+ "response_format": None,
+ "seed": None,
+ "tools": None,
+ "tool_choice": None,
+ "max_retries": None,
+ "logprobs": None,
+ "top_logprobs": None,
+ "extra_headers": None,
+ "api_version": None,
+ "parallel_tool_calls": None,
+ "drop_params": None,
+ "allowed_openai_params": None,
+ "additional_drop_params": None,
+ "messages": None,
+ "reasoning_effort": None,
+ "thinking": None,
+ "web_search_options": None,
+}
+
+
openai_compatible_endpoints: List = [
"api.perplexity.ai",
"api.endpoints.anyscale.com/v1",
diff --git a/litellm/main.py b/litellm/main.py
index 44611e203f..7f1ecd737f 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -86,6 +86,7 @@ from litellm.utils import (
ProviderConfigManager,
Usage,
add_openai_metadata,
+ add_provider_specific_params_to_optional_params,
async_mock_completion_streaming_obj,
convert_to_model_response_object,
create_pretrained_tokenizer,
@@ -99,6 +100,7 @@ from litellm.utils import (
get_secret,
get_standard_openai_params,
mock_completion_streaming_obj,
+ pre_process_non_default_params,
read_config_args,
supports_httpx_timeout,
token_counter,
@@ -1140,42 +1142,55 @@ def completion( # type: ignore # noqa: PLR0915
if dynamic_api_key is not None:
api_key = dynamic_api_key
# check if user passed in any of the OpenAI optional params
- optional_params = get_optional_params(
- functions=functions,
- function_call=function_call,
- temperature=temperature,
- top_p=top_p,
- n=n,
- stream=stream,
- stream_options=stream_options,
- stop=stop,
- max_tokens=max_tokens,
- max_completion_tokens=max_completion_tokens,
- modalities=modalities,
- prediction=prediction,
- audio=audio,
- presence_penalty=presence_penalty,
- frequency_penalty=frequency_penalty,
- logit_bias=logit_bias,
- user=user,
+ optional_param_args = {
+ "functions": functions,
+ "function_call": function_call,
+ "temperature": temperature,
+ "top_p": top_p,
+ "n": n,
+ "stream": stream,
+ "stream_options": stream_options,
+ "stop": stop,
+ "max_tokens": max_tokens,
+ "max_completion_tokens": max_completion_tokens,
+ "modalities": modalities,
+ "prediction": prediction,
+ "audio": audio,
+ "presence_penalty": presence_penalty,
+ "frequency_penalty": frequency_penalty,
+ "logit_bias": logit_bias,
+ "user": user,
# params to identify the model
+ "model": model,
+ "custom_llm_provider": custom_llm_provider,
+ "response_format": response_format,
+ "seed": seed,
+ "tools": tools,
+ "tool_choice": tool_choice,
+ "max_retries": max_retries,
+ "logprobs": logprobs,
+ "top_logprobs": top_logprobs,
+ "api_version": api_version,
+ "parallel_tool_calls": parallel_tool_calls,
+ "messages": messages,
+ "reasoning_effort": reasoning_effort,
+ "thinking": thinking,
+ "web_search_options": web_search_options,
+ "allowed_openai_params": kwargs.get("allowed_openai_params"),
+ }
+ optional_params = get_optional_params(
+ **optional_param_args, **non_default_params
+ )
+ processed_non_default_params = pre_process_non_default_params(
model=model,
+ passed_params=optional_param_args,
+ special_params=non_default_params,
custom_llm_provider=custom_llm_provider,
- response_format=response_format,
- seed=seed,
- tools=tools,
- tool_choice=tool_choice,
- max_retries=max_retries,
- logprobs=logprobs,
- top_logprobs=top_logprobs,
- api_version=api_version,
- parallel_tool_calls=parallel_tool_calls,
- messages=messages,
- reasoning_effort=reasoning_effort,
- thinking=thinking,
- web_search_options=web_search_options,
- allowed_openai_params=kwargs.get("allowed_openai_params"),
- **non_default_params,
+ additional_drop_params=kwargs.get("additional_drop_params"),
+ )
+ processed_non_default_params = add_provider_specific_params_to_optional_params(
+ optional_params=processed_non_default_params,
+ passed_params=non_default_params,
)
if litellm.add_function_to_prompt and optional_params.get(
@@ -1241,10 +1256,7 @@ def completion( # type: ignore # noqa: PLR0915
cast(LiteLLMLoggingObj, logging).update_environment_variables(
model=model,
user=user,
- optional_params={
- **standard_openai_params,
- **non_default_params,
- }, # [IMPORTANT] - using standard_openai_params ensures consistent params logged to langfuse for finetuning / eval datasets.
+ optional_params=processed_non_default_params, # [IMPORTANT] - using processed_non_default_params ensures consistent params logged to langfuse for finetuning / eval datasets.
litellm_params=litellm_params,
custom_llm_provider=custom_llm_provider,
)
@@ -4027,9 +4039,7 @@ def embedding( # noqa: PLR0915
client=client,
aembedding=aembedding,
)
- elif (
- custom_llm_provider in litellm._custom_providers
- ):
+ elif custom_llm_provider in litellm._custom_providers:
custom_handler: Optional[CustomLLM] = None
for item in litellm.custom_provider_map:
if item["provider"] == custom_llm_provider:
@@ -4040,7 +4050,11 @@ def embedding( # noqa: PLR0915
model=model, custom_llm_provider=custom_llm_provider
)
- handler_fn = custom_handler.embedding if not aembedding else custom_handler.aembedding
+ handler_fn = (
+ custom_handler.embedding
+ if not aembedding
+ else custom_handler.aembedding
+ )
response = handler_fn(
model=model,
@@ -4049,7 +4063,7 @@ def embedding( # noqa: PLR0915
optional_params=optional_params,
model_response=EmbeddingResponse(),
print_verbose=print_verbose,
- litellm_params=litellm_params
+ litellm_params=litellm_params,
)
else:
raise LiteLLMUnknownProvider(
diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html
deleted file mode 100644
index 82cebe3a37..0000000000
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ /dev/null
@@ -1 +0,0 @@
-
LiteLLM Dashboard
\ No newline at end of file
diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml
index 5fa3e618c2..58b40e8f0c 100644
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@@ -79,7 +79,7 @@ model_list:
api_key: os.environ/OPENAI_API_KEY
litellm_settings:
- store_audit_logs: true
+ callbacks: ["langfuse"]
general_settings:
store_model_in_db: true
diff --git a/litellm/utils.py b/litellm/utils.py
index 65d825c979..f4f3707ebf 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -63,6 +63,7 @@ from litellm.caching._internal_lru_cache import lru_cache_wrapper
from litellm.caching.caching import DualCache
from litellm.caching.caching_handler import CachingHandlerResponse, LLMCachingHandler
from litellm.constants import (
+ DEFAULT_CHAT_COMPLETION_PARAM_VALUES,
DEFAULT_MAX_LRU_CACHE_SIZE,
DEFAULT_TRIM_RATIO,
FUNCTION_DEFINITION_TOKEN_COUNT,
@@ -2671,50 +2672,18 @@ def _remove_unsupported_params(
return non_default_params
-def get_optional_params( # noqa: PLR0915
- # use the openai defaults
- # https://platform.openai.com/docs/api-reference/chat/create
+def pre_process_non_default_params(
+ passed_params: dict,
+ special_params: dict,
+ custom_llm_provider: str,
+ additional_drop_params: Optional[List[str]],
model: str,
- functions=None,
- function_call=None,
- temperature=None,
- top_p=None,
- n=None,
- stream=False,
- stream_options=None,
- stop=None,
- max_tokens=None,
- max_completion_tokens=None,
- modalities=None,
- prediction=None,
- audio=None,
- presence_penalty=None,
- frequency_penalty=None,
- logit_bias=None,
- user=None,
- custom_llm_provider="",
- response_format=None,
- seed=None,
- tools=None,
- tool_choice=None,
- max_retries=None,
- logprobs=None,
- top_logprobs=None,
- extra_headers=None,
- api_version=None,
- parallel_tool_calls=None,
- drop_params=None,
- allowed_openai_params: Optional[List[str]] = None,
- reasoning_effort=None,
- additional_drop_params=None,
- messages: Optional[List[AllMessageValues]] = None,
- thinking: Optional[AnthropicThinkingParam] = None,
- web_search_options: Optional[OpenAIWebSearchOptions] = None,
- **kwargs,
-):
+) -> dict:
+ """
+ Pre-process non-default params to a standardized format
+ """
# retrieve all parameters passed to the function
- passed_params = locals().copy()
- special_params = passed_params.pop("kwargs")
+
for k, v in special_params.items():
if k.startswith("aws_") and (
custom_llm_provider != "bedrock"
@@ -2731,6 +2700,72 @@ def get_optional_params( # noqa: PLR0915
continue
passed_params[k] = v
+ # filter out those parameters that were passed with non-default values
+
+ non_default_params = {
+ k: v
+ for k, v in passed_params.items()
+ if (
+ k != "model"
+ and k != "custom_llm_provider"
+ and k != "api_version"
+ and k != "drop_params"
+ and k != "allowed_openai_params"
+ and k != "additional_drop_params"
+ and k != "messages"
+ and k in DEFAULT_CHAT_COMPLETION_PARAM_VALUES
+ and v != DEFAULT_CHAT_COMPLETION_PARAM_VALUES[k]
+ and _should_drop_param(k=k, additional_drop_params=additional_drop_params)
+ is False
+ )
+ }
+
+ provider_config: Optional[BaseConfig] = None
+ if custom_llm_provider is not None and custom_llm_provider in [
+ provider.value for provider in LlmProviders
+ ]:
+ provider_config = ProviderConfigManager.get_provider_chat_config(
+ model=model, provider=LlmProviders(custom_llm_provider)
+ )
+
+ if "response_format" in non_default_params:
+ if provider_config is not None:
+ non_default_params[
+ "response_format"
+ ] = provider_config.get_json_schema_from_pydantic_object(
+ response_format=non_default_params["response_format"]
+ )
+ else:
+ non_default_params["response_format"] = type_to_response_format_param(
+ response_format=non_default_params["response_format"]
+ )
+
+ if "tools" in non_default_params and isinstance(
+ non_default_params, list
+ ): # fixes https://github.com/BerriAI/litellm/issues/4933
+ tools = non_default_params["tools"]
+ for (
+ tool
+ ) in (
+ tools
+ ): # clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240
+ tool_function = tool.get("function", {})
+ parameters = tool_function.get("parameters", None)
+ if parameters is not None:
+ new_parameters = copy.deepcopy(parameters)
+ if (
+ "additionalProperties" in new_parameters
+ and new_parameters["additionalProperties"] is False
+ ):
+ new_parameters.pop("additionalProperties", None)
+ tool_function["parameters"] = new_parameters
+ return non_default_params
+
+
+def pre_process_optional_params(
+ passed_params: dict, non_default_params: dict, custom_llm_provider: str
+) -> dict:
+ """For .completion(), preprocess optional params"""
optional_params: Dict = {}
common_auth_dict = litellm.common_cloud_provider_auth_params
@@ -2761,65 +2796,6 @@ def get_optional_params( # noqa: PLR0915
non_default_params=passed_params, optional_params=optional_params
)
- default_params = {
- "functions": None,
- "function_call": None,
- "temperature": None,
- "top_p": None,
- "n": None,
- "stream": None,
- "stream_options": None,
- "stop": None,
- "max_tokens": None,
- "max_completion_tokens": None,
- "modalities": None,
- "prediction": None,
- "audio": None,
- "presence_penalty": None,
- "frequency_penalty": None,
- "logit_bias": None,
- "user": None,
- "model": None,
- "custom_llm_provider": "",
- "response_format": None,
- "seed": None,
- "tools": None,
- "tool_choice": None,
- "max_retries": None,
- "logprobs": None,
- "top_logprobs": None,
- "extra_headers": None,
- "api_version": None,
- "parallel_tool_calls": None,
- "drop_params": None,
- "allowed_openai_params": None,
- "additional_drop_params": None,
- "messages": None,
- "reasoning_effort": None,
- "thinking": None,
- "web_search_options": None,
- }
-
- # filter out those parameters that were passed with non-default values
-
- non_default_params = {
- k: v
- for k, v in passed_params.items()
- if (
- k != "model"
- and k != "custom_llm_provider"
- and k != "api_version"
- and k != "drop_params"
- and k != "allowed_openai_params"
- and k != "additional_drop_params"
- and k != "messages"
- and k in default_params
- and v != default_params[k]
- and _should_drop_param(k=k, additional_drop_params=additional_drop_params)
- is False
- )
- }
-
## raise exception if function calling passed in for a provider that doesn't support it
if (
"functions" in non_default_params
@@ -2879,6 +2855,64 @@ def get_optional_params( # noqa: PLR0915
message=f"Function calling is not supported by {custom_llm_provider}.",
)
+ return optional_params
+
+
+def get_optional_params( # noqa: PLR0915
+ # use the openai defaults
+ # https://platform.openai.com/docs/api-reference/chat/create
+ model: str,
+ functions=None,
+ function_call=None,
+ temperature=None,
+ top_p=None,
+ n=None,
+ stream=False,
+ stream_options=None,
+ stop=None,
+ max_tokens=None,
+ max_completion_tokens=None,
+ modalities=None,
+ prediction=None,
+ audio=None,
+ presence_penalty=None,
+ frequency_penalty=None,
+ logit_bias=None,
+ user=None,
+ custom_llm_provider="",
+ response_format=None,
+ seed=None,
+ tools=None,
+ tool_choice=None,
+ max_retries=None,
+ logprobs=None,
+ top_logprobs=None,
+ extra_headers=None,
+ api_version=None,
+ parallel_tool_calls=None,
+ drop_params=None,
+ allowed_openai_params: Optional[List[str]] = None,
+ reasoning_effort=None,
+ additional_drop_params=None,
+ messages: Optional[List[AllMessageValues]] = None,
+ thinking: Optional[AnthropicThinkingParam] = None,
+ web_search_options: Optional[OpenAIWebSearchOptions] = None,
+ **kwargs,
+):
+ passed_params = locals().copy()
+ special_params = passed_params.pop("kwargs")
+ non_default_params = pre_process_non_default_params(
+ passed_params=passed_params,
+ special_params=special_params,
+ custom_llm_provider=custom_llm_provider,
+ additional_drop_params=additional_drop_params,
+ model=model,
+ )
+ optional_params = pre_process_optional_params(
+ passed_params=passed_params,
+ non_default_params=non_default_params,
+ custom_llm_provider=custom_llm_provider,
+ )
provider_config: Optional[BaseConfig] = None
if custom_llm_provider is not None and custom_llm_provider in [
provider.value for provider in LlmProviders
@@ -2887,38 +2921,6 @@ def get_optional_params( # noqa: PLR0915
model=model, provider=LlmProviders(custom_llm_provider)
)
- if "response_format" in non_default_params:
- if provider_config is not None:
- non_default_params[
- "response_format"
- ] = provider_config.get_json_schema_from_pydantic_object(
- response_format=non_default_params["response_format"]
- )
- else:
- non_default_params["response_format"] = type_to_response_format_param(
- response_format=non_default_params["response_format"]
- )
-
- if "tools" in non_default_params and isinstance(
- non_default_params, list
- ): # fixes https://github.com/BerriAI/litellm/issues/4933
- tools = non_default_params["tools"]
- for (
- tool
- ) in (
- tools
- ): # clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240
- tool_function = tool.get("function", {})
- parameters = tool_function.get("parameters", None)
- if parameters is not None:
- new_parameters = copy.deepcopy(parameters)
- if (
- "additionalProperties" in new_parameters
- and new_parameters["additionalProperties"] is False
- ):
- new_parameters.pop("additionalProperties", None)
- tool_function["parameters"] = new_parameters
-
def _check_valid_arg(supported_params: List[str]):
"""
Check if the params passed to completion() are supported by the provider
@@ -3608,7 +3610,7 @@ def get_optional_params( # noqa: PLR0915
):
extra_body = passed_params.pop("extra_body", {})
for k in passed_params.keys():
- if k not in default_params.keys():
+ if k not in DEFAULT_CHAT_COMPLETION_PARAM_VALUES.keys():
extra_body[k] = passed_params[k]
optional_params.setdefault("extra_body", {})
optional_params["extra_body"] = {
@@ -3621,9 +3623,9 @@ def get_optional_params( # noqa: PLR0915
)
else:
# if user passed in non-default kwargs for specific providers/models, pass them along
- for k in passed_params.keys():
- if k not in default_params.keys():
- optional_params[k] = passed_params[k]
+ optional_params = add_provider_specific_params_to_optional_params(
+ optional_params=optional_params, passed_params=passed_params
+ )
print_verbose(f"Final returned optional params: {optional_params}")
optional_params = _apply_openai_param_overrides(
optional_params=optional_params,
@@ -3633,6 +3635,18 @@ def get_optional_params( # noqa: PLR0915
return optional_params
+def add_provider_specific_params_to_optional_params(
+ optional_params: dict, passed_params: dict
+) -> dict:
+ """
+ Add provider specific params to optional_params
+ """
+ for k in passed_params.keys():
+ if k not in DEFAULT_CHAT_COMPLETION_PARAM_VALUES.keys():
+ optional_params[k] = passed_params[k]
+ return optional_params
+
+
def _apply_openai_param_overrides(
optional_params: dict, non_default_params: dict, allowed_openai_params: list
):
@@ -3649,31 +3663,6 @@ def _apply_openai_param_overrides(
def get_non_default_params(passed_params: dict) -> dict:
- default_params = {
- "functions": None,
- "function_call": None,
- "temperature": None,
- "top_p": None,
- "n": None,
- "stream": None,
- "stream_options": None,
- "stop": None,
- "max_tokens": None,
- "presence_penalty": None,
- "frequency_penalty": None,
- "logit_bias": None,
- "user": None,
- "model": None,
- "custom_llm_provider": "",
- "response_format": None,
- "seed": None,
- "tools": None,
- "tool_choice": None,
- "max_retries": None,
- "logprobs": None,
- "top_logprobs": None,
- "extra_headers": None,
- }
# filter out those parameters that were passed with non-default values
non_default_params = {
k: v
@@ -3681,8 +3670,8 @@ def get_non_default_params(passed_params: dict) -> dict:
if (
k != "model"
and k != "custom_llm_provider"
- and k in default_params
- and v != default_params[k]
+ and k in DEFAULT_CHAT_COMPLETION_PARAM_VALUES
+ and v != DEFAULT_CHAT_COMPLETION_PARAM_VALUES[k]
)
}
@@ -6604,6 +6593,7 @@ class ProviderConfigManager:
elif LlmProviders.OLLAMA == provider or LlmProviders.OLLAMA_CHAT == provider:
# Dynamic model listing for Ollama server
from litellm.llms.ollama.common_utils import OllamaModelInfo
+
return OllamaModelInfo()
elif LlmProviders.VLLM == provider:
from litellm.llms.vllm.common_utils import (
diff --git a/tests/litellm/test_utils.py b/tests/litellm/test_utils.py
index 7a2b6aa010..eccd5b798d 100644
--- a/tests/litellm/test_utils.py
+++ b/tests/litellm/test_utils.py
@@ -701,6 +701,7 @@ def test_supports_computer_use_utility():
Tests the litellm.utils.supports_computer_use utility function.
"""
from litellm.utils import supports_computer_use
+
# Ensure LITELLM_LOCAL_MODEL_COST_MAP is set for consistent test behavior,
# as supports_computer_use relies on get_model_info.
# This also requires litellm.model_cost to be populated.
@@ -708,11 +709,13 @@ def test_supports_computer_use_utility():
original_model_cost = getattr(litellm, "model_cost", None)
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
- litellm.model_cost = litellm.get_model_cost_map(url="") # Load with local/backup
-
+ litellm.model_cost = litellm.get_model_cost_map(url="") # Load with local/backup
+
try:
# Test a model known to support computer_use from backup JSON
- supports_cu_anthropic = supports_computer_use(model="anthropic/claude-3-7-sonnet-20250219")
+ supports_cu_anthropic = supports_computer_use(
+ model="anthropic/claude-3-7-sonnet-20250219"
+ )
assert supports_cu_anthropic is True
# Test a model known not to have the flag or set to false (defaults to False via get_model_info)
@@ -724,12 +727,13 @@ def test_supports_computer_use_utility():
del os.environ["LITELLM_LOCAL_MODEL_COST_MAP"]
else:
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = original_env_var
-
+
if original_model_cost is not None:
litellm.model_cost = original_model_cost
elif hasattr(litellm, "model_cost"):
delattr(litellm, "model_cost")
+
def test_get_model_info_shows_supports_computer_use():
"""
Tests if 'supports_computer_use' is correctly retrieved by get_model_info.
@@ -739,13 +743,13 @@ def test_get_model_info_shows_supports_computer_use():
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
# Ensure litellm.model_cost is loaded, relying on the backup mechanism if primary fails
# as per previous debugging.
- litellm.model_cost = litellm.get_model_cost_map(url="")
-
+ litellm.model_cost = litellm.get_model_cost_map(url="")
+
# This model should have 'supports_computer_use': True in the backup JSON
model_known_to_support_computer_use = "claude-3-7-sonnet-20250219"
info = litellm.get_model_info(model_known_to_support_computer_use)
print(f"Info for {model_known_to_support_computer_use}: {info}")
-
+
# After the fix in utils.py, this should now be present and True
assert info.get("supports_computer_use") is True
@@ -754,4 +758,57 @@ def test_get_model_info_shows_supports_computer_use():
model_known_not_to_support_computer_use = "gpt-3.5-turbo"
info_gpt = litellm.get_model_info(model_known_not_to_support_computer_use)
print(f"Info for {model_known_not_to_support_computer_use}: {info_gpt}")
- assert info_gpt.get("supports_computer_use") is False # Expecting False due to the default in ModelInfoBase
\ No newline at end of file
+ assert (
+ info_gpt.get("supports_computer_use") is False
+ ) # Expecting False due to the default in ModelInfoBase
+
+
+@pytest.mark.parametrize(
+ "model, custom_llm_provider",
+ [
+ ("gpt-3.5-turbo", "openai"),
+ ("anthropic.claude-3-7-sonnet-20250219-v1:0", "bedrock"),
+ ("gemini-1.5-pro", "vertex_ai"),
+ ],
+)
+def test_pre_process_non_default_params(model, custom_llm_provider):
+ from pydantic import BaseModel
+
+ from litellm.utils import pre_process_non_default_params
+
+ class ResponseFormat(BaseModel):
+ x: str
+ y: str
+
+ passed_params = {
+ "model": "gpt-3.5-turbo",
+ "response_format": ResponseFormat,
+ }
+ special_params = {}
+ processed_non_default_params = pre_process_non_default_params(
+ model=model,
+ passed_params=passed_params,
+ special_params=special_params,
+ custom_llm_provider=custom_llm_provider,
+ additional_drop_params=None,
+ )
+ print(processed_non_default_params)
+ assert processed_non_default_params == {
+ "response_format": {
+ "type": "json_schema",
+ "json_schema": {
+ "schema": {
+ "properties": {
+ "x": {"title": "X", "type": "string"},
+ "y": {"title": "Y", "type": "string"},
+ },
+ "required": ["x", "y"],
+ "title": "ResponseFormat",
+ "type": "object",
+ "additionalProperties": False,
+ },
+ "name": "ResponseFormat",
+ "strict": True,
+ },
+ }
+ }
diff --git a/tests/llm_translation/test_optional_params.py b/tests/llm_translation/test_optional_params.py
index b29d20ba40..4ce18be8ad 100644
--- a/tests/llm_translation/test_optional_params.py
+++ b/tests/llm_translation/test_optional_params.py
@@ -460,6 +460,26 @@ def test_dynamic_drop_params_e2e():
print(mock_response.call_args.kwargs["data"])
assert "response_format" not in mock_response.call_args.kwargs["data"]
+def test_dynamic_pass_additional_params():
+ with patch(
+ "litellm.llms.custom_httpx.http_handler.HTTPHandler.post", new=MagicMock()
+ ) as mock_response:
+ try:
+ response = litellm.completion(
+ model="command-r",
+ messages=[{"role": "user", "content": "Hey, how's it going?"}],
+ custom_param="test",
+ api_key="my-custom-key",
+ )
+ except Exception as e:
+ print(f"Error occurred: {e}")
+ pass
+
+ mock_response.assert_called_once()
+ print(mock_response.call_args.kwargs["data"])
+ assert "custom_param" in mock_response.call_args.kwargs["data"]
+ assert "api_key" not in mock_response.call_args.kwargs["data"]
+
@pytest.mark.parametrize(
"model, provider, should_drop",
@@ -559,6 +579,7 @@ def test_dynamic_drop_additional_params_e2e():
additional_drop_params=["response_format"],
)
except Exception as e:
+ print(f"Error occurred: {e}")
pass
mock_response.assert_called_once()
diff --git a/tests/local_testing/test_ollama.py b/tests/local_testing/test_ollama.py
index 09c50315e0..2c4ceb3baf 100644
--- a/tests/local_testing/test_ollama.py
+++ b/tests/local_testing/test_ollama.py
@@ -32,12 +32,14 @@ def test_get_ollama_params():
temperature=0.5,
stream=True,
)
- print("Converted params", converted_params)
- assert converted_params == {
+ expected_params = {
"num_predict": 20,
"stream": True,
"temperature": 0.5,
- }, f"{converted_params} != {'num_predict': 20, 'stream': True, 'temperature': 0.5}"
+ }
+ print("Converted params", converted_params)
+ for key in expected_params.keys():
+ assert expected_params[key] == converted_params[key], f"{converted_params} != {expected_params}"
except Exception as e:
pytest.fail(f"Error occurred: {e}")
diff --git a/tests/logging_callback_tests/langfuse_expected_request_body/completion_with_router.json b/tests/logging_callback_tests/langfuse_expected_request_body/completion_with_router.json
new file mode 100644
index 0000000000..e5059d873a
--- /dev/null
+++ b/tests/logging_callback_tests/langfuse_expected_request_body/completion_with_router.json
@@ -0,0 +1,87 @@
+{
+ "batch": [
+ {
+ "id": "45eb9b25-605c-4c4a-b2b3-8241e079cd31",
+ "type": "trace-create",
+ "body": {
+ "id": "litellm-test-32702f3d-8a1c-4912-a3d6-286e59a9c568",
+ "timestamp": "2025-05-24T17:01:19.408179Z",
+ "name": "litellm-acompletion",
+ "input": {
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hello!"
+ }
+ ]
+ },
+ "tags": []
+ },
+ "timestamp": "2025-05-24T17:01:19.408284Z"
+ },
+ {
+ "id": "9f5e9b7d-0cea-4776-b4b9-5c2e8f4bad3c",
+ "type": "generation-create",
+ "body": {
+ "traceId": "litellm-test-32702f3d-8a1c-4912-a3d6-286e59a9c568",
+ "name": "litellm-acompletion",
+ "startTime": "2025-05-24T10:01:19.142356-07:00",
+ "metadata": {
+ "model_group": "gpt-3.5-turbo",
+ "model_group_size": 1,
+ "deployment": "gpt-3.5-turbo",
+ "model_info": {
+ "id": "0f1cd8f9e6a22e499303d479486395563ea04decade83fe7334dc2f079a857c2",
+ "db_model": false
+ },
+ "api_base": null,
+ "hidden_params": {
+ "model_id": "0f1cd8f9e6a22e499303d479486395563ea04decade83fe7334dc2f079a857c2",
+ "cache_key": null,
+ "api_base": "https://api.openai.com",
+ "response_cost": 3.5e-05,
+ "additional_headers": {},
+ "litellm_overhead_time_ms": null,
+ "batch_models": null,
+ "litellm_model_name": "gpt-3.5-turbo",
+ "usage_object": null
+ },
+ "litellm_response_cost": 3.5e-05,
+ "cache_hit": false,
+ "requester_metadata": {}
+ },
+ "input": {
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hello!"
+ }
+ ]
+ },
+ "level": "DEFAULT",
+ "id": "time-10-01-19-142356_chatcmpl-16b215b7-e51e-47b0-8fe5-9dd6f226fda1",
+ "endTime": "2025-05-24T10:01:19.406531-07:00",
+ "completionStartTime": "2025-05-24T10:01:19.406531-07:00",
+ "model": "gpt-3.5-turbo",
+ "modelParameters": {
+ "stream": false,
+ "max_retries": 0
+ },
+ "usage": {
+ "input": 10,
+ "output": 10,
+ "unit": "TOKENS",
+ "totalCost": 3.5e-05
+ }
+ },
+ "timestamp": "2025-05-24T17:01:19.408586Z"
+ }
+ ],
+ "metadata": {
+ "batch_size": 2,
+ "sdk_integration": "litellm",
+ "sdk_name": "python",
+ "sdk_version": "2.44.1",
+ "public_key": "pk-lf-3bfc4db9-217f-48e9-92e0-142566e3c204"
+ }
+}
diff --git a/tests/logging_callback_tests/test_langfuse_e2e_test.py b/tests/logging_callback_tests/test_langfuse_e2e_test.py
index b46d8764dd..12aa2f38bd 100644
--- a/tests/logging_callback_tests/test_langfuse_e2e_test.py
+++ b/tests/logging_callback_tests/test_langfuse_e2e_test.py
@@ -381,3 +381,42 @@ class TestLangfuseLogging:
await self._verify_langfuse_call(
setup["mock_post"], "completion_with_no_choices.json", setup["trace_id"]
)
+
+ @pytest.mark.asyncio
+ async def test_langfuse_logging_with_router(self, mock_setup):
+ """Test Langfuse logging with router"""
+ setup = await mock_setup # Await the fixture
+ litellm._turn_on_debug()
+ router = litellm.Router(
+ model_list=[
+ {
+ "model_name": "gpt-3.5-turbo",
+ "litellm_params": {
+ "model": "gpt-3.5-turbo",
+ "mock_response": "Hello! How can I assist you today?",
+ "api_key": "test_api_key",
+ }
+ }
+ ]
+ )
+ with patch("httpx.Client.post", setup["mock_post"]):
+ mock_response = litellm.ModelResponse(
+ choices=[],
+ usage=litellm.Usage(
+ prompt_tokens=10,
+ completion_tokens=10,
+ total_tokens=20,
+ ),
+ model="gpt-3.5-turbo",
+ object="chat.completion",
+ created=1723081200,
+ ).model_dump()
+ await router.acompletion(
+ model="gpt-3.5-turbo",
+ messages=[{"role": "user", "content": "Hello!"}],
+ mock_response=mock_response,
+ metadata={"trace_id": setup["trace_id"]},
+ )
+ await self._verify_langfuse_call(
+ setup["mock_post"], "completion_with_router.json", setup["trace_id"]
+ )
\ No newline at end of file