diff --git a/enterprise/dist/litellm_enterprise-0.1.6-py3-none-any.whl b/enterprise/dist/litellm_enterprise-0.1.6-py3-none-any.whl new file mode 100644 index 0000000000..c212c7e5a3 Binary files /dev/null and b/enterprise/dist/litellm_enterprise-0.1.6-py3-none-any.whl differ diff --git a/enterprise/dist/litellm_enterprise-0.1.6.tar.gz b/enterprise/dist/litellm_enterprise-0.1.6.tar.gz new file mode 100644 index 0000000000..698a9da209 Binary files /dev/null and b/enterprise/dist/litellm_enterprise-0.1.6.tar.gz differ diff --git a/litellm/constants.py b/litellm/constants.py index 43a8eba3c1..fba57a2bf9 100644 --- a/litellm/constants.py +++ b/litellm/constants.py @@ -282,6 +282,46 @@ OPENAI_CHAT_COMPLETION_PARAMS = [ "web_search_options", ] +DEFAULT_CHAT_COMPLETION_PARAM_VALUES = { + "functions": None, + "function_call": None, + "temperature": None, + "top_p": None, + "n": None, + "stream": None, + "stream_options": None, + "stop": None, + "max_tokens": None, + "max_completion_tokens": None, + "modalities": None, + "prediction": None, + "audio": None, + "presence_penalty": None, + "frequency_penalty": None, + "logit_bias": None, + "user": None, + "model": None, + "custom_llm_provider": "", + "response_format": None, + "seed": None, + "tools": None, + "tool_choice": None, + "max_retries": None, + "logprobs": None, + "top_logprobs": None, + "extra_headers": None, + "api_version": None, + "parallel_tool_calls": None, + "drop_params": None, + "allowed_openai_params": None, + "additional_drop_params": None, + "messages": None, + "reasoning_effort": None, + "thinking": None, + "web_search_options": None, +} + + openai_compatible_endpoints: List = [ "api.perplexity.ai", "api.endpoints.anyscale.com/v1", diff --git a/litellm/main.py b/litellm/main.py index 44611e203f..7f1ecd737f 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -86,6 +86,7 @@ from litellm.utils import ( ProviderConfigManager, Usage, add_openai_metadata, + add_provider_specific_params_to_optional_params, async_mock_completion_streaming_obj, convert_to_model_response_object, create_pretrained_tokenizer, @@ -99,6 +100,7 @@ from litellm.utils import ( get_secret, get_standard_openai_params, mock_completion_streaming_obj, + pre_process_non_default_params, read_config_args, supports_httpx_timeout, token_counter, @@ -1140,42 +1142,55 @@ def completion( # type: ignore # noqa: PLR0915 if dynamic_api_key is not None: api_key = dynamic_api_key # check if user passed in any of the OpenAI optional params - optional_params = get_optional_params( - functions=functions, - function_call=function_call, - temperature=temperature, - top_p=top_p, - n=n, - stream=stream, - stream_options=stream_options, - stop=stop, - max_tokens=max_tokens, - max_completion_tokens=max_completion_tokens, - modalities=modalities, - prediction=prediction, - audio=audio, - presence_penalty=presence_penalty, - frequency_penalty=frequency_penalty, - logit_bias=logit_bias, - user=user, + optional_param_args = { + "functions": functions, + "function_call": function_call, + "temperature": temperature, + "top_p": top_p, + "n": n, + "stream": stream, + "stream_options": stream_options, + "stop": stop, + "max_tokens": max_tokens, + "max_completion_tokens": max_completion_tokens, + "modalities": modalities, + "prediction": prediction, + "audio": audio, + "presence_penalty": presence_penalty, + "frequency_penalty": frequency_penalty, + "logit_bias": logit_bias, + "user": user, # params to identify the model + "model": model, + "custom_llm_provider": custom_llm_provider, + "response_format": response_format, + "seed": seed, + "tools": tools, + "tool_choice": tool_choice, + "max_retries": max_retries, + "logprobs": logprobs, + "top_logprobs": top_logprobs, + "api_version": api_version, + "parallel_tool_calls": parallel_tool_calls, + "messages": messages, + "reasoning_effort": reasoning_effort, + "thinking": thinking, + "web_search_options": web_search_options, + "allowed_openai_params": kwargs.get("allowed_openai_params"), + } + optional_params = get_optional_params( + **optional_param_args, **non_default_params + ) + processed_non_default_params = pre_process_non_default_params( model=model, + passed_params=optional_param_args, + special_params=non_default_params, custom_llm_provider=custom_llm_provider, - response_format=response_format, - seed=seed, - tools=tools, - tool_choice=tool_choice, - max_retries=max_retries, - logprobs=logprobs, - top_logprobs=top_logprobs, - api_version=api_version, - parallel_tool_calls=parallel_tool_calls, - messages=messages, - reasoning_effort=reasoning_effort, - thinking=thinking, - web_search_options=web_search_options, - allowed_openai_params=kwargs.get("allowed_openai_params"), - **non_default_params, + additional_drop_params=kwargs.get("additional_drop_params"), + ) + processed_non_default_params = add_provider_specific_params_to_optional_params( + optional_params=processed_non_default_params, + passed_params=non_default_params, ) if litellm.add_function_to_prompt and optional_params.get( @@ -1241,10 +1256,7 @@ def completion( # type: ignore # noqa: PLR0915 cast(LiteLLMLoggingObj, logging).update_environment_variables( model=model, user=user, - optional_params={ - **standard_openai_params, - **non_default_params, - }, # [IMPORTANT] - using standard_openai_params ensures consistent params logged to langfuse for finetuning / eval datasets. + optional_params=processed_non_default_params, # [IMPORTANT] - using processed_non_default_params ensures consistent params logged to langfuse for finetuning / eval datasets. litellm_params=litellm_params, custom_llm_provider=custom_llm_provider, ) @@ -4027,9 +4039,7 @@ def embedding( # noqa: PLR0915 client=client, aembedding=aembedding, ) - elif ( - custom_llm_provider in litellm._custom_providers - ): + elif custom_llm_provider in litellm._custom_providers: custom_handler: Optional[CustomLLM] = None for item in litellm.custom_provider_map: if item["provider"] == custom_llm_provider: @@ -4040,7 +4050,11 @@ def embedding( # noqa: PLR0915 model=model, custom_llm_provider=custom_llm_provider ) - handler_fn = custom_handler.embedding if not aembedding else custom_handler.aembedding + handler_fn = ( + custom_handler.embedding + if not aembedding + else custom_handler.aembedding + ) response = handler_fn( model=model, @@ -4049,7 +4063,7 @@ def embedding( # noqa: PLR0915 optional_params=optional_params, model_response=EmbeddingResponse(), print_verbose=print_verbose, - litellm_params=litellm_params + litellm_params=litellm_params, ) else: raise LiteLLMUnknownProvider( diff --git a/litellm/proxy/_experimental/out/onboarding.html b/litellm/proxy/_experimental/out/onboarding.html deleted file mode 100644 index 82cebe3a37..0000000000 --- a/litellm/proxy/_experimental/out/onboarding.html +++ /dev/null @@ -1 +0,0 @@ -LiteLLM Dashboard \ No newline at end of file diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 5fa3e618c2..58b40e8f0c 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -79,7 +79,7 @@ model_list: api_key: os.environ/OPENAI_API_KEY litellm_settings: - store_audit_logs: true + callbacks: ["langfuse"] general_settings: store_model_in_db: true diff --git a/litellm/utils.py b/litellm/utils.py index 65d825c979..f4f3707ebf 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -63,6 +63,7 @@ from litellm.caching._internal_lru_cache import lru_cache_wrapper from litellm.caching.caching import DualCache from litellm.caching.caching_handler import CachingHandlerResponse, LLMCachingHandler from litellm.constants import ( + DEFAULT_CHAT_COMPLETION_PARAM_VALUES, DEFAULT_MAX_LRU_CACHE_SIZE, DEFAULT_TRIM_RATIO, FUNCTION_DEFINITION_TOKEN_COUNT, @@ -2671,50 +2672,18 @@ def _remove_unsupported_params( return non_default_params -def get_optional_params( # noqa: PLR0915 - # use the openai defaults - # https://platform.openai.com/docs/api-reference/chat/create +def pre_process_non_default_params( + passed_params: dict, + special_params: dict, + custom_llm_provider: str, + additional_drop_params: Optional[List[str]], model: str, - functions=None, - function_call=None, - temperature=None, - top_p=None, - n=None, - stream=False, - stream_options=None, - stop=None, - max_tokens=None, - max_completion_tokens=None, - modalities=None, - prediction=None, - audio=None, - presence_penalty=None, - frequency_penalty=None, - logit_bias=None, - user=None, - custom_llm_provider="", - response_format=None, - seed=None, - tools=None, - tool_choice=None, - max_retries=None, - logprobs=None, - top_logprobs=None, - extra_headers=None, - api_version=None, - parallel_tool_calls=None, - drop_params=None, - allowed_openai_params: Optional[List[str]] = None, - reasoning_effort=None, - additional_drop_params=None, - messages: Optional[List[AllMessageValues]] = None, - thinking: Optional[AnthropicThinkingParam] = None, - web_search_options: Optional[OpenAIWebSearchOptions] = None, - **kwargs, -): +) -> dict: + """ + Pre-process non-default params to a standardized format + """ # retrieve all parameters passed to the function - passed_params = locals().copy() - special_params = passed_params.pop("kwargs") + for k, v in special_params.items(): if k.startswith("aws_") and ( custom_llm_provider != "bedrock" @@ -2731,6 +2700,72 @@ def get_optional_params( # noqa: PLR0915 continue passed_params[k] = v + # filter out those parameters that were passed with non-default values + + non_default_params = { + k: v + for k, v in passed_params.items() + if ( + k != "model" + and k != "custom_llm_provider" + and k != "api_version" + and k != "drop_params" + and k != "allowed_openai_params" + and k != "additional_drop_params" + and k != "messages" + and k in DEFAULT_CHAT_COMPLETION_PARAM_VALUES + and v != DEFAULT_CHAT_COMPLETION_PARAM_VALUES[k] + and _should_drop_param(k=k, additional_drop_params=additional_drop_params) + is False + ) + } + + provider_config: Optional[BaseConfig] = None + if custom_llm_provider is not None and custom_llm_provider in [ + provider.value for provider in LlmProviders + ]: + provider_config = ProviderConfigManager.get_provider_chat_config( + model=model, provider=LlmProviders(custom_llm_provider) + ) + + if "response_format" in non_default_params: + if provider_config is not None: + non_default_params[ + "response_format" + ] = provider_config.get_json_schema_from_pydantic_object( + response_format=non_default_params["response_format"] + ) + else: + non_default_params["response_format"] = type_to_response_format_param( + response_format=non_default_params["response_format"] + ) + + if "tools" in non_default_params and isinstance( + non_default_params, list + ): # fixes https://github.com/BerriAI/litellm/issues/4933 + tools = non_default_params["tools"] + for ( + tool + ) in ( + tools + ): # clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240 + tool_function = tool.get("function", {}) + parameters = tool_function.get("parameters", None) + if parameters is not None: + new_parameters = copy.deepcopy(parameters) + if ( + "additionalProperties" in new_parameters + and new_parameters["additionalProperties"] is False + ): + new_parameters.pop("additionalProperties", None) + tool_function["parameters"] = new_parameters + return non_default_params + + +def pre_process_optional_params( + passed_params: dict, non_default_params: dict, custom_llm_provider: str +) -> dict: + """For .completion(), preprocess optional params""" optional_params: Dict = {} common_auth_dict = litellm.common_cloud_provider_auth_params @@ -2761,65 +2796,6 @@ def get_optional_params( # noqa: PLR0915 non_default_params=passed_params, optional_params=optional_params ) - default_params = { - "functions": None, - "function_call": None, - "temperature": None, - "top_p": None, - "n": None, - "stream": None, - "stream_options": None, - "stop": None, - "max_tokens": None, - "max_completion_tokens": None, - "modalities": None, - "prediction": None, - "audio": None, - "presence_penalty": None, - "frequency_penalty": None, - "logit_bias": None, - "user": None, - "model": None, - "custom_llm_provider": "", - "response_format": None, - "seed": None, - "tools": None, - "tool_choice": None, - "max_retries": None, - "logprobs": None, - "top_logprobs": None, - "extra_headers": None, - "api_version": None, - "parallel_tool_calls": None, - "drop_params": None, - "allowed_openai_params": None, - "additional_drop_params": None, - "messages": None, - "reasoning_effort": None, - "thinking": None, - "web_search_options": None, - } - - # filter out those parameters that were passed with non-default values - - non_default_params = { - k: v - for k, v in passed_params.items() - if ( - k != "model" - and k != "custom_llm_provider" - and k != "api_version" - and k != "drop_params" - and k != "allowed_openai_params" - and k != "additional_drop_params" - and k != "messages" - and k in default_params - and v != default_params[k] - and _should_drop_param(k=k, additional_drop_params=additional_drop_params) - is False - ) - } - ## raise exception if function calling passed in for a provider that doesn't support it if ( "functions" in non_default_params @@ -2879,6 +2855,64 @@ def get_optional_params( # noqa: PLR0915 message=f"Function calling is not supported by {custom_llm_provider}.", ) + return optional_params + + +def get_optional_params( # noqa: PLR0915 + # use the openai defaults + # https://platform.openai.com/docs/api-reference/chat/create + model: str, + functions=None, + function_call=None, + temperature=None, + top_p=None, + n=None, + stream=False, + stream_options=None, + stop=None, + max_tokens=None, + max_completion_tokens=None, + modalities=None, + prediction=None, + audio=None, + presence_penalty=None, + frequency_penalty=None, + logit_bias=None, + user=None, + custom_llm_provider="", + response_format=None, + seed=None, + tools=None, + tool_choice=None, + max_retries=None, + logprobs=None, + top_logprobs=None, + extra_headers=None, + api_version=None, + parallel_tool_calls=None, + drop_params=None, + allowed_openai_params: Optional[List[str]] = None, + reasoning_effort=None, + additional_drop_params=None, + messages: Optional[List[AllMessageValues]] = None, + thinking: Optional[AnthropicThinkingParam] = None, + web_search_options: Optional[OpenAIWebSearchOptions] = None, + **kwargs, +): + passed_params = locals().copy() + special_params = passed_params.pop("kwargs") + non_default_params = pre_process_non_default_params( + passed_params=passed_params, + special_params=special_params, + custom_llm_provider=custom_llm_provider, + additional_drop_params=additional_drop_params, + model=model, + ) + optional_params = pre_process_optional_params( + passed_params=passed_params, + non_default_params=non_default_params, + custom_llm_provider=custom_llm_provider, + ) provider_config: Optional[BaseConfig] = None if custom_llm_provider is not None and custom_llm_provider in [ provider.value for provider in LlmProviders @@ -2887,38 +2921,6 @@ def get_optional_params( # noqa: PLR0915 model=model, provider=LlmProviders(custom_llm_provider) ) - if "response_format" in non_default_params: - if provider_config is not None: - non_default_params[ - "response_format" - ] = provider_config.get_json_schema_from_pydantic_object( - response_format=non_default_params["response_format"] - ) - else: - non_default_params["response_format"] = type_to_response_format_param( - response_format=non_default_params["response_format"] - ) - - if "tools" in non_default_params and isinstance( - non_default_params, list - ): # fixes https://github.com/BerriAI/litellm/issues/4933 - tools = non_default_params["tools"] - for ( - tool - ) in ( - tools - ): # clean out 'additionalProperties = False'. Causes vertexai/gemini OpenAI API Schema errors - https://github.com/langchain-ai/langchainjs/issues/5240 - tool_function = tool.get("function", {}) - parameters = tool_function.get("parameters", None) - if parameters is not None: - new_parameters = copy.deepcopy(parameters) - if ( - "additionalProperties" in new_parameters - and new_parameters["additionalProperties"] is False - ): - new_parameters.pop("additionalProperties", None) - tool_function["parameters"] = new_parameters - def _check_valid_arg(supported_params: List[str]): """ Check if the params passed to completion() are supported by the provider @@ -3608,7 +3610,7 @@ def get_optional_params( # noqa: PLR0915 ): extra_body = passed_params.pop("extra_body", {}) for k in passed_params.keys(): - if k not in default_params.keys(): + if k not in DEFAULT_CHAT_COMPLETION_PARAM_VALUES.keys(): extra_body[k] = passed_params[k] optional_params.setdefault("extra_body", {}) optional_params["extra_body"] = { @@ -3621,9 +3623,9 @@ def get_optional_params( # noqa: PLR0915 ) else: # if user passed in non-default kwargs for specific providers/models, pass them along - for k in passed_params.keys(): - if k not in default_params.keys(): - optional_params[k] = passed_params[k] + optional_params = add_provider_specific_params_to_optional_params( + optional_params=optional_params, passed_params=passed_params + ) print_verbose(f"Final returned optional params: {optional_params}") optional_params = _apply_openai_param_overrides( optional_params=optional_params, @@ -3633,6 +3635,18 @@ def get_optional_params( # noqa: PLR0915 return optional_params +def add_provider_specific_params_to_optional_params( + optional_params: dict, passed_params: dict +) -> dict: + """ + Add provider specific params to optional_params + """ + for k in passed_params.keys(): + if k not in DEFAULT_CHAT_COMPLETION_PARAM_VALUES.keys(): + optional_params[k] = passed_params[k] + return optional_params + + def _apply_openai_param_overrides( optional_params: dict, non_default_params: dict, allowed_openai_params: list ): @@ -3649,31 +3663,6 @@ def _apply_openai_param_overrides( def get_non_default_params(passed_params: dict) -> dict: - default_params = { - "functions": None, - "function_call": None, - "temperature": None, - "top_p": None, - "n": None, - "stream": None, - "stream_options": None, - "stop": None, - "max_tokens": None, - "presence_penalty": None, - "frequency_penalty": None, - "logit_bias": None, - "user": None, - "model": None, - "custom_llm_provider": "", - "response_format": None, - "seed": None, - "tools": None, - "tool_choice": None, - "max_retries": None, - "logprobs": None, - "top_logprobs": None, - "extra_headers": None, - } # filter out those parameters that were passed with non-default values non_default_params = { k: v @@ -3681,8 +3670,8 @@ def get_non_default_params(passed_params: dict) -> dict: if ( k != "model" and k != "custom_llm_provider" - and k in default_params - and v != default_params[k] + and k in DEFAULT_CHAT_COMPLETION_PARAM_VALUES + and v != DEFAULT_CHAT_COMPLETION_PARAM_VALUES[k] ) } @@ -6604,6 +6593,7 @@ class ProviderConfigManager: elif LlmProviders.OLLAMA == provider or LlmProviders.OLLAMA_CHAT == provider: # Dynamic model listing for Ollama server from litellm.llms.ollama.common_utils import OllamaModelInfo + return OllamaModelInfo() elif LlmProviders.VLLM == provider: from litellm.llms.vllm.common_utils import ( diff --git a/tests/litellm/test_utils.py b/tests/litellm/test_utils.py index 7a2b6aa010..eccd5b798d 100644 --- a/tests/litellm/test_utils.py +++ b/tests/litellm/test_utils.py @@ -701,6 +701,7 @@ def test_supports_computer_use_utility(): Tests the litellm.utils.supports_computer_use utility function. """ from litellm.utils import supports_computer_use + # Ensure LITELLM_LOCAL_MODEL_COST_MAP is set for consistent test behavior, # as supports_computer_use relies on get_model_info. # This also requires litellm.model_cost to be populated. @@ -708,11 +709,13 @@ def test_supports_computer_use_utility(): original_model_cost = getattr(litellm, "model_cost", None) os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" - litellm.model_cost = litellm.get_model_cost_map(url="") # Load with local/backup - + litellm.model_cost = litellm.get_model_cost_map(url="") # Load with local/backup + try: # Test a model known to support computer_use from backup JSON - supports_cu_anthropic = supports_computer_use(model="anthropic/claude-3-7-sonnet-20250219") + supports_cu_anthropic = supports_computer_use( + model="anthropic/claude-3-7-sonnet-20250219" + ) assert supports_cu_anthropic is True # Test a model known not to have the flag or set to false (defaults to False via get_model_info) @@ -724,12 +727,13 @@ def test_supports_computer_use_utility(): del os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] else: os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = original_env_var - + if original_model_cost is not None: litellm.model_cost = original_model_cost elif hasattr(litellm, "model_cost"): delattr(litellm, "model_cost") + def test_get_model_info_shows_supports_computer_use(): """ Tests if 'supports_computer_use' is correctly retrieved by get_model_info. @@ -739,13 +743,13 @@ def test_get_model_info_shows_supports_computer_use(): os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" # Ensure litellm.model_cost is loaded, relying on the backup mechanism if primary fails # as per previous debugging. - litellm.model_cost = litellm.get_model_cost_map(url="") - + litellm.model_cost = litellm.get_model_cost_map(url="") + # This model should have 'supports_computer_use': True in the backup JSON model_known_to_support_computer_use = "claude-3-7-sonnet-20250219" info = litellm.get_model_info(model_known_to_support_computer_use) print(f"Info for {model_known_to_support_computer_use}: {info}") - + # After the fix in utils.py, this should now be present and True assert info.get("supports_computer_use") is True @@ -754,4 +758,57 @@ def test_get_model_info_shows_supports_computer_use(): model_known_not_to_support_computer_use = "gpt-3.5-turbo" info_gpt = litellm.get_model_info(model_known_not_to_support_computer_use) print(f"Info for {model_known_not_to_support_computer_use}: {info_gpt}") - assert info_gpt.get("supports_computer_use") is False # Expecting False due to the default in ModelInfoBase \ No newline at end of file + assert ( + info_gpt.get("supports_computer_use") is False + ) # Expecting False due to the default in ModelInfoBase + + +@pytest.mark.parametrize( + "model, custom_llm_provider", + [ + ("gpt-3.5-turbo", "openai"), + ("anthropic.claude-3-7-sonnet-20250219-v1:0", "bedrock"), + ("gemini-1.5-pro", "vertex_ai"), + ], +) +def test_pre_process_non_default_params(model, custom_llm_provider): + from pydantic import BaseModel + + from litellm.utils import pre_process_non_default_params + + class ResponseFormat(BaseModel): + x: str + y: str + + passed_params = { + "model": "gpt-3.5-turbo", + "response_format": ResponseFormat, + } + special_params = {} + processed_non_default_params = pre_process_non_default_params( + model=model, + passed_params=passed_params, + special_params=special_params, + custom_llm_provider=custom_llm_provider, + additional_drop_params=None, + ) + print(processed_non_default_params) + assert processed_non_default_params == { + "response_format": { + "type": "json_schema", + "json_schema": { + "schema": { + "properties": { + "x": {"title": "X", "type": "string"}, + "y": {"title": "Y", "type": "string"}, + }, + "required": ["x", "y"], + "title": "ResponseFormat", + "type": "object", + "additionalProperties": False, + }, + "name": "ResponseFormat", + "strict": True, + }, + } + } diff --git a/tests/llm_translation/test_optional_params.py b/tests/llm_translation/test_optional_params.py index b29d20ba40..4ce18be8ad 100644 --- a/tests/llm_translation/test_optional_params.py +++ b/tests/llm_translation/test_optional_params.py @@ -460,6 +460,26 @@ def test_dynamic_drop_params_e2e(): print(mock_response.call_args.kwargs["data"]) assert "response_format" not in mock_response.call_args.kwargs["data"] +def test_dynamic_pass_additional_params(): + with patch( + "litellm.llms.custom_httpx.http_handler.HTTPHandler.post", new=MagicMock() + ) as mock_response: + try: + response = litellm.completion( + model="command-r", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + custom_param="test", + api_key="my-custom-key", + ) + except Exception as e: + print(f"Error occurred: {e}") + pass + + mock_response.assert_called_once() + print(mock_response.call_args.kwargs["data"]) + assert "custom_param" in mock_response.call_args.kwargs["data"] + assert "api_key" not in mock_response.call_args.kwargs["data"] + @pytest.mark.parametrize( "model, provider, should_drop", @@ -559,6 +579,7 @@ def test_dynamic_drop_additional_params_e2e(): additional_drop_params=["response_format"], ) except Exception as e: + print(f"Error occurred: {e}") pass mock_response.assert_called_once() diff --git a/tests/local_testing/test_ollama.py b/tests/local_testing/test_ollama.py index 09c50315e0..2c4ceb3baf 100644 --- a/tests/local_testing/test_ollama.py +++ b/tests/local_testing/test_ollama.py @@ -32,12 +32,14 @@ def test_get_ollama_params(): temperature=0.5, stream=True, ) - print("Converted params", converted_params) - assert converted_params == { + expected_params = { "num_predict": 20, "stream": True, "temperature": 0.5, - }, f"{converted_params} != {'num_predict': 20, 'stream': True, 'temperature': 0.5}" + } + print("Converted params", converted_params) + for key in expected_params.keys(): + assert expected_params[key] == converted_params[key], f"{converted_params} != {expected_params}" except Exception as e: pytest.fail(f"Error occurred: {e}") diff --git a/tests/logging_callback_tests/langfuse_expected_request_body/completion_with_router.json b/tests/logging_callback_tests/langfuse_expected_request_body/completion_with_router.json new file mode 100644 index 0000000000..e5059d873a --- /dev/null +++ b/tests/logging_callback_tests/langfuse_expected_request_body/completion_with_router.json @@ -0,0 +1,87 @@ +{ + "batch": [ + { + "id": "45eb9b25-605c-4c4a-b2b3-8241e079cd31", + "type": "trace-create", + "body": { + "id": "litellm-test-32702f3d-8a1c-4912-a3d6-286e59a9c568", + "timestamp": "2025-05-24T17:01:19.408179Z", + "name": "litellm-acompletion", + "input": { + "messages": [ + { + "role": "user", + "content": "Hello!" + } + ] + }, + "tags": [] + }, + "timestamp": "2025-05-24T17:01:19.408284Z" + }, + { + "id": "9f5e9b7d-0cea-4776-b4b9-5c2e8f4bad3c", + "type": "generation-create", + "body": { + "traceId": "litellm-test-32702f3d-8a1c-4912-a3d6-286e59a9c568", + "name": "litellm-acompletion", + "startTime": "2025-05-24T10:01:19.142356-07:00", + "metadata": { + "model_group": "gpt-3.5-turbo", + "model_group_size": 1, + "deployment": "gpt-3.5-turbo", + "model_info": { + "id": "0f1cd8f9e6a22e499303d479486395563ea04decade83fe7334dc2f079a857c2", + "db_model": false + }, + "api_base": null, + "hidden_params": { + "model_id": "0f1cd8f9e6a22e499303d479486395563ea04decade83fe7334dc2f079a857c2", + "cache_key": null, + "api_base": "https://api.openai.com", + "response_cost": 3.5e-05, + "additional_headers": {}, + "litellm_overhead_time_ms": null, + "batch_models": null, + "litellm_model_name": "gpt-3.5-turbo", + "usage_object": null + }, + "litellm_response_cost": 3.5e-05, + "cache_hit": false, + "requester_metadata": {} + }, + "input": { + "messages": [ + { + "role": "user", + "content": "Hello!" + } + ] + }, + "level": "DEFAULT", + "id": "time-10-01-19-142356_chatcmpl-16b215b7-e51e-47b0-8fe5-9dd6f226fda1", + "endTime": "2025-05-24T10:01:19.406531-07:00", + "completionStartTime": "2025-05-24T10:01:19.406531-07:00", + "model": "gpt-3.5-turbo", + "modelParameters": { + "stream": false, + "max_retries": 0 + }, + "usage": { + "input": 10, + "output": 10, + "unit": "TOKENS", + "totalCost": 3.5e-05 + } + }, + "timestamp": "2025-05-24T17:01:19.408586Z" + } + ], + "metadata": { + "batch_size": 2, + "sdk_integration": "litellm", + "sdk_name": "python", + "sdk_version": "2.44.1", + "public_key": "pk-lf-3bfc4db9-217f-48e9-92e0-142566e3c204" + } +} diff --git a/tests/logging_callback_tests/test_langfuse_e2e_test.py b/tests/logging_callback_tests/test_langfuse_e2e_test.py index b46d8764dd..12aa2f38bd 100644 --- a/tests/logging_callback_tests/test_langfuse_e2e_test.py +++ b/tests/logging_callback_tests/test_langfuse_e2e_test.py @@ -381,3 +381,42 @@ class TestLangfuseLogging: await self._verify_langfuse_call( setup["mock_post"], "completion_with_no_choices.json", setup["trace_id"] ) + + @pytest.mark.asyncio + async def test_langfuse_logging_with_router(self, mock_setup): + """Test Langfuse logging with router""" + setup = await mock_setup # Await the fixture + litellm._turn_on_debug() + router = litellm.Router( + model_list=[ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo", + "mock_response": "Hello! How can I assist you today?", + "api_key": "test_api_key", + } + } + ] + ) + with patch("httpx.Client.post", setup["mock_post"]): + mock_response = litellm.ModelResponse( + choices=[], + usage=litellm.Usage( + prompt_tokens=10, + completion_tokens=10, + total_tokens=20, + ), + model="gpt-3.5-turbo", + object="chat.completion", + created=1723081200, + ).model_dump() + await router.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hello!"}], + mock_response=mock_response, + metadata={"trace_id": setup["trace_id"]}, + ) + await self._verify_langfuse_call( + setup["mock_post"], "completion_with_router.json", setup["trace_id"] + ) \ No newline at end of file