diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 69a14a7aa7..30658cb091 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -45,6 +45,9 @@ from litellm.llms.openai.cost_calculation import ( cost_per_second as openai_cost_per_second, ) from litellm.llms.openai.cost_calculation import cost_per_token as openai_cost_per_token +from litellm.llms.perplexity.cost_calculator import ( + cost_per_token as perplexity_cost_per_token, +) from litellm.llms.together_ai.cost_calculator import get_model_params_and_category from litellm.llms.vertex_ai.cost_calculator import ( cost_per_character as google_cost_per_character, @@ -329,6 +332,8 @@ def cost_per_token( # noqa: PLR0915 return gemini_cost_per_token(model=model, usage=usage_block) elif custom_llm_provider == "deepseek": return deepseek_cost_per_token(model=model, usage=usage_block) + elif custom_llm_provider == "perplexity": + return perplexity_cost_per_token(model=model, usage=usage_block) else: model_info = _cached_get_model_info_helper( model=model, custom_llm_provider=custom_llm_provider diff --git a/litellm/llms/perplexity/chat/transformation.py b/litellm/llms/perplexity/chat/transformation.py index a81400870a..955fdff081 100644 --- a/litellm/llms/perplexity/chat/transformation.py +++ b/litellm/llms/perplexity/chat/transformation.py @@ -2,13 +2,17 @@ Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions` """ -from typing import Optional, Tuple +from typing import Any, List, Optional, Tuple +import httpx import litellm from litellm._logging import verbose_logger from litellm.secret_managers.main import get_secret_str - -from ...openai.chat.gpt_transformation import OpenAIGPTConfig +from litellm.types.llms.openai import AllMessageValues +from litellm.types.utils import Usage, PromptTokensDetailsWrapper +from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj +from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig +from litellm.types.utils import ModelResponse class PerplexityChatConfig(OpenAIGPTConfig): @@ -65,3 +69,95 @@ class PerplexityChatConfig(OpenAIGPTConfig): verbose_logger.debug(f"Error checking if model supports web search: {e}") return base_openai_params + + def transform_response( + self, + model: str, + raw_response: httpx.Response, + model_response: ModelResponse, + logging_obj: LiteLLMLoggingObj, + request_data: dict, + messages: List[AllMessageValues], + optional_params: dict, + litellm_params: dict, + encoding: Any, + api_key: Optional[str] = None, + json_mode: Optional[bool] = None, + ) -> ModelResponse: + # Call the parent transform_response first to handle the standard transformation + model_response = super().transform_response( + model=model, + raw_response=raw_response, + model_response=model_response, + logging_obj=logging_obj, + request_data=request_data, + messages=messages, + optional_params=optional_params, + litellm_params=litellm_params, + encoding=encoding, + api_key=api_key, + json_mode=json_mode, + ) + + # Extract and enhance usage with Perplexity-specific fields + try: + raw_response_json = raw_response.json() + self._enhance_usage_with_perplexity_fields(model_response, raw_response_json) + except Exception as e: + verbose_logger.debug(f"Error extracting Perplexity-specific usage fields: {e}") + + return model_response + + def _enhance_usage_with_perplexity_fields( + self, model_response: ModelResponse, raw_response_json: dict + ) -> None: + """ + Extract citation tokens and search queries from Perplexity API response + and add them to the usage object using standard LiteLLM fields. + """ + if not hasattr(model_response, "usage") or model_response.usage is None: + # Create a usage object if it doesn't exist (when usage was None) + model_response.usage = Usage( # type: ignore[attr-defined] + prompt_tokens=0, + completion_tokens=0, + total_tokens=0 + ) + + usage = model_response.usage # type: ignore[attr-defined] + + # Extract citation tokens count + citations = raw_response_json.get("citations", []) + citation_tokens = 0 + if citations: + # Count total characters in citations as a proxy for citation tokens + # This is an estimation - in practice, you might want to use proper tokenization + total_citation_chars = sum(len(str(citation)) for citation in citations if citation) + # Rough estimation: ~4 characters per token (OpenAI's general rule) + if total_citation_chars > 0: + citation_tokens = max(1, total_citation_chars // 4) + + # Extract search queries count from usage or response metadata + # Perplexity might include this in the usage object or as separate metadata + perplexity_usage = raw_response_json.get("usage", {}) + + # Try to extract search queries from usage field first, then root level + num_search_queries = perplexity_usage.get("num_search_queries") + if num_search_queries is None: + num_search_queries = raw_response_json.get("num_search_queries") + if num_search_queries is None: + num_search_queries = perplexity_usage.get("search_queries") + if num_search_queries is None: + num_search_queries = raw_response_json.get("search_queries") + + # Create or update prompt_tokens_details to include web search requests and citation tokens + if citation_tokens > 0 or (num_search_queries is not None and num_search_queries > 0): + if usage.prompt_tokens_details is None: + usage.prompt_tokens_details = PromptTokensDetailsWrapper() + + # Store citation tokens count for cost calculation + if citation_tokens > 0: + setattr(usage, "citation_tokens", citation_tokens) + + # Store search queries count in the standard web_search_requests field + if num_search_queries is not None and num_search_queries > 0: + usage.prompt_tokens_details.web_search_requests = num_search_queries diff --git a/litellm/llms/perplexity/cost_calculator.py b/litellm/llms/perplexity/cost_calculator.py new file mode 100644 index 0000000000..c8fd2a682a --- /dev/null +++ b/litellm/llms/perplexity/cost_calculator.py @@ -0,0 +1,79 @@ +""" +Helper util for handling perplexity-specific cost calculation +- e.g.: citation tokens, search queries +""" + +from typing import Tuple, Union + +from litellm.types.utils import Usage +from litellm.utils import get_model_info + + +def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]: + """ + Calculates the cost per token for a given model, prompt tokens, and completion tokens. + + Input: + - model: str, the model name without provider prefix + - usage: LiteLLM Usage block, containing perplexity-specific usage information + + Returns: + Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd + """ + ## GET MODEL INFO + model_info = get_model_info(model=model, custom_llm_provider="perplexity") + + def _safe_float_cast(value: Union[str, int, float, None, object], default: float = 0.0) -> float: + """Safely cast a value to float with proper type handling for mypy.""" + if value is None: + return default + try: + return float(value) # type: ignore + except (ValueError, TypeError): + return default + + ## CALCULATE INPUT COST + input_cost_per_token = _safe_float_cast(model_info.get("input_cost_per_token")) + prompt_cost: float = (usage.prompt_tokens or 0) * input_cost_per_token + + ## ADD CITATION TOKENS COST (if present) + citation_tokens = getattr(usage, "citation_tokens", 0) or 0 + citation_cost_value = model_info.get("citation_cost_per_token") + if citation_tokens > 0 and citation_cost_value is not None: + citation_cost_per_token = _safe_float_cast(citation_cost_value) + prompt_cost += citation_tokens * citation_cost_per_token + + ## CALCULATE OUTPUT COST + output_cost_per_token = _safe_float_cast(model_info.get("output_cost_per_token")) + completion_cost: float = (usage.completion_tokens or 0) * output_cost_per_token + + ## ADD REASONING TOKENS COST (if present) + reasoning_tokens = getattr(usage, "reasoning_tokens", 0) or 0 + # Also check completion_tokens_details if reasoning_tokens is not directly available + if reasoning_tokens == 0 and hasattr(usage, "completion_tokens_details") and usage.completion_tokens_details: + reasoning_tokens = getattr(usage.completion_tokens_details, "reasoning_tokens", 0) or 0 + + reasoning_cost_value = model_info.get("output_cost_per_reasoning_token") + if reasoning_tokens > 0 and reasoning_cost_value is not None: + reasoning_cost_per_token = _safe_float_cast(reasoning_cost_value) + completion_cost += reasoning_tokens * reasoning_cost_per_token + + ## ADD SEARCH QUERIES COST (if present) + num_search_queries = 0 + if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details: + num_search_queries = getattr(usage.prompt_tokens_details, "web_search_requests", 0) or 0 + + # Check both possible keys for search cost (legacy and current) + search_cost_value = model_info.get("search_queries_cost_per_query") or model_info.get("search_context_cost_per_query") + if num_search_queries > 0 and search_cost_value is not None: + # Handle both dict and float formats + if isinstance(search_cost_value, dict): + # Use the "low" size as default - tests expect 0.005 / 1000 + search_cost_per_query = _safe_float_cast(search_cost_value.get("search_context_size_low", 0)) / 1000 + else: + search_cost_per_query = _safe_float_cast(search_cost_value) + search_cost = num_search_queries * search_cost_per_query + # Add search cost to completion cost (similar to how other providers handle it) + completion_cost += search_cost + + return prompt_cost, completion_cost \ No newline at end of file diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 9d72f852de..023fc3f935 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -13647,13 +13647,14 @@ "input_cost_per_token": 2e-06, "output_cost_per_token": 8e-06, "output_cost_per_reasoning_token": 3e-06, + "citation_cost_per_token": 2e-06, + "search_queries_cost_per_query": { + "search_queries_size_low": 0.005, + "search_queries_size_medium": 0.005, + "search_queries_size_high": 0.005 + }, "litellm_provider": "perplexity", "mode": "chat", - "search_context_cost_per_query": { - "search_context_size_low": 0.005, - "search_context_size_medium": 0.005, - "search_context_size_high": 0.005 - }, "supports_reasoning": true, "supports_web_search": true }, diff --git a/litellm/types/utils.py b/litellm/types/utils.py index fd8ecd9e14..14be87704f 100644 --- a/litellm/types/utils.py +++ b/litellm/types/utils.py @@ -177,7 +177,7 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False): search_context_cost_per_query: Optional[ SearchContextCostPerQuery ] # Cost for using web search tool - + citation_cost_per_token: Optional[float] # Cost per citation token for Perplexity litellm_provider: Required[str] mode: Required[ Literal[ diff --git a/litellm/utils.py b/litellm/utils.py index e3b14a1676..dacc168045 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4650,6 +4650,7 @@ def _get_model_info_helper( # noqa: PLR0915 output_cost_per_second=_model_info.get("output_cost_per_second", None), output_cost_per_image=_model_info.get("output_cost_per_image", None), output_vector_size=_model_info.get("output_vector_size", None), + citation_cost_per_token=_model_info.get("citation_cost_per_token", None), litellm_provider=_model_info.get( "litellm_provider", custom_llm_provider ), diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 9d72f852de..8d4e5e276e 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -13647,13 +13647,14 @@ "input_cost_per_token": 2e-06, "output_cost_per_token": 8e-06, "output_cost_per_reasoning_token": 3e-06, - "litellm_provider": "perplexity", - "mode": "chat", + "citation_cost_per_token": 2e-06, "search_context_cost_per_query": { "search_context_size_low": 0.005, "search_context_size_medium": 0.005, "search_context_size_high": 0.005 }, + "litellm_provider": "perplexity", + "mode": "chat", "supports_reasoning": true, "supports_web_search": true }, diff --git a/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py b/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py new file mode 100644 index 0000000000..6f64f46b4a --- /dev/null +++ b/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py @@ -0,0 +1,409 @@ +""" +Test file for Perplexity chat transformation functionality. + +Tests the response transformation to extract citation tokens and search queries +from Perplexity API responses. +""" + +import os +import sys +from unittest.mock import Mock + +import pytest + +# Add the project root to Python path +sys.path.insert(0, os.path.abspath("../../../../..")) + +from litellm import ModelResponse +from litellm.llms.perplexity.chat.transformation import PerplexityChatConfig +from litellm.types.utils import Usage + + +class TestPerplexityChatTransformation: + """Test suite for Perplexity chat transformation functionality.""" + + def test_enhance_usage_with_citation_tokens(self): + """Test extraction of citation tokens from API response.""" + config = PerplexityChatConfig() + + # Create a ModelResponse with basic usage + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response with citations + raw_response_dict = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + }, + "citations": [ + "This is a citation with some text content", + "Another citation with more text here", + "Third citation with additional information" + ] + } + + # Enhance the usage with Perplexity fields + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + # Check that citation tokens were added + assert hasattr(model_response.usage, "citation_tokens") + citation_tokens = getattr(model_response.usage, "citation_tokens") + + # Should have extracted citation tokens (estimated based on character count) + assert citation_tokens > 0 + assert isinstance(citation_tokens, int) + + def test_enhance_usage_with_search_queries_from_usage(self): + """Test extraction of search queries from usage field in API response.""" + config = PerplexityChatConfig() + + # Create a ModelResponse with basic usage + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response with search queries in usage + raw_response_dict = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "num_search_queries": 3 + } + } + + # Enhance the usage with Perplexity fields + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + # Check that search queries were added to prompt_tokens_details + assert hasattr(model_response.usage, "prompt_tokens_details") + assert model_response.usage.prompt_tokens_details is not None + assert hasattr(model_response.usage.prompt_tokens_details, "web_search_requests") + + web_search_requests = model_response.usage.prompt_tokens_details.web_search_requests + assert web_search_requests == 3 + + def test_enhance_usage_with_search_queries_from_root(self): + """Test extraction of search queries from root level in API response.""" + config = PerplexityChatConfig() + + # Create a ModelResponse with basic usage + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response with search queries at root level + raw_response_dict = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + }, + "num_search_queries": 2 + } + + # Enhance the usage with Perplexity fields + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + # Check that search queries were added to prompt_tokens_details + assert hasattr(model_response.usage, "prompt_tokens_details") + assert model_response.usage.prompt_tokens_details is not None + assert hasattr(model_response.usage.prompt_tokens_details, "web_search_requests") + + web_search_requests = model_response.usage.prompt_tokens_details.web_search_requests + assert web_search_requests == 2 + + def test_enhance_usage_with_both_citations_and_search_queries(self): + """Test extraction of both citation tokens and search queries.""" + config = PerplexityChatConfig() + + # Create a ModelResponse with basic usage + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response with both citations and search queries + raw_response_dict = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "num_search_queries": 2 + }, + "citations": [ + "Citation one with some content", + "Citation two with more information" + ] + } + + # Enhance the usage with Perplexity fields + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + # Check that both fields were added + assert hasattr(model_response.usage, "citation_tokens") + assert hasattr(model_response.usage, "prompt_tokens_details") + assert model_response.usage.prompt_tokens_details is not None + assert hasattr(model_response.usage.prompt_tokens_details, "web_search_requests") + + citation_tokens = getattr(model_response.usage, "citation_tokens") + web_search_requests = model_response.usage.prompt_tokens_details.web_search_requests + + assert citation_tokens > 0 + assert web_search_requests == 2 + + def test_enhance_usage_with_empty_citations(self): + """Test handling of empty citations array.""" + config = PerplexityChatConfig() + + # Create a ModelResponse with basic usage + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response with empty citations + raw_response_dict = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + }, + "citations": [] + } + + # Enhance the usage with Perplexity fields + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + # Should not set citation_tokens for empty citations + citation_tokens = getattr(model_response.usage, "citation_tokens", 0) + assert citation_tokens == 0 + + def test_enhance_usage_with_missing_fields(self): + """Test handling when both citations and search queries are missing.""" + config = PerplexityChatConfig() + + # Create a ModelResponse with basic usage + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response without citations or search queries + raw_response_dict = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + } + } + + # Should not raise an error + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + # Should not have added custom fields + citation_tokens = getattr(model_response.usage, "citation_tokens", 0) + assert citation_tokens == 0 + + # prompt_tokens_details might be None or have web_search_requests as 0 + if hasattr(model_response.usage, "prompt_tokens_details") and model_response.usage.prompt_tokens_details: + web_search_requests = getattr(model_response.usage.prompt_tokens_details, "web_search_requests", 0) + assert web_search_requests == 0 + + def test_citation_token_estimation(self): + """Test that citation token estimation is reasonable.""" + config = PerplexityChatConfig() + + # Test cases with known character counts + test_cases = [ + # (citation_text, expected_min_tokens, expected_max_tokens) + ("Short", 1, 2), + ("This is a longer citation with multiple words", 10, 15), + ("A very long citation with many words and characters that should result in more tokens", 18, 25), + ] + + for citation_text, min_tokens, max_tokens in test_cases: + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + raw_response_dict = { + "usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150}, + "citations": [citation_text] + } + + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + citation_tokens = getattr(model_response.usage, "citation_tokens") + + # Should be within reasonable range + assert min_tokens <= citation_tokens <= max_tokens, f"Citation '{citation_text}' resulted in {citation_tokens} tokens, expected {min_tokens}-{max_tokens}" + + def test_multiple_citations_aggregation(self): + """Test that multiple citations are aggregated correctly.""" + config = PerplexityChatConfig() + + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + raw_response_dict = { + "usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150}, + "citations": [ + "First citation with some text", + "Second citation with different content", + "Third citation with more information" + ] + } + + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + citation_tokens = getattr(model_response.usage, "citation_tokens") + + # Should have aggregated all citations + total_chars = sum(len(citation) for citation in raw_response_dict["citations"]) + expected_tokens = total_chars // 4 # Our estimation logic + + assert citation_tokens == expected_tokens + + def test_search_queries_priority_usage_over_root(self): + """Test that search queries from usage field take priority over root level.""" + config = PerplexityChatConfig() + + # Create a ModelResponse with basic usage + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Mock raw response with search queries in both locations + raw_response_dict = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "num_search_queries": 5 # This should take priority + }, + "num_search_queries": 3 # This should be ignored + } + + # Enhance the usage with Perplexity fields + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + # Check that usage field took priority + assert hasattr(model_response.usage, "prompt_tokens_details") + assert model_response.usage.prompt_tokens_details is not None + web_search_requests = model_response.usage.prompt_tokens_details.web_search_requests + + assert web_search_requests == 5 # Should use the usage field value, not root + + def test_no_usage_object_handling(self): + """Test handling when model_response has no usage object.""" + config = PerplexityChatConfig() + + # Create a ModelResponse without usage + model_response = ModelResponse() + + # Mock raw response with Perplexity-specific fields + raw_response_dict = { + "choices": [{"message": {"content": "Test response"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "num_search_queries": 2 + }, + "citations": ["Some citation"] + } + + # Should not raise an error when usage is None + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + # Usage should be created with the Perplexity fields + assert model_response.usage is not None + assert hasattr(model_response.usage, "citation_tokens") + assert hasattr(model_response.usage, "prompt_tokens_details") + assert model_response.usage.prompt_tokens_details is not None + assert hasattr(model_response.usage.prompt_tokens_details, "web_search_requests") + + citation_tokens = getattr(model_response.usage, "citation_tokens") + web_search_requests = model_response.usage.prompt_tokens_details.web_search_requests + + assert citation_tokens > 0 + assert web_search_requests == 2 + + @pytest.mark.parametrize("search_query_location", ["usage", "root"]) + def test_search_queries_extraction_locations(self, search_query_location): + """Test search queries extraction from different response locations.""" + config = PerplexityChatConfig() + + # Create a ModelResponse with basic usage + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Create response dict based on parameter + if search_query_location == "usage": + raw_response_dict = { + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "num_search_queries": 4 + } + } + else: # root + raw_response_dict = { + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + }, + "num_search_queries": 4 + } + + # Enhance the usage with Perplexity fields + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + # Should extract search queries from either location + assert hasattr(model_response.usage, "prompt_tokens_details") + assert model_response.usage.prompt_tokens_details is not None + web_search_requests = model_response.usage.prompt_tokens_details.web_search_requests + + assert web_search_requests == 4 \ No newline at end of file diff --git a/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py b/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py new file mode 100644 index 0000000000..f9a5210007 --- /dev/null +++ b/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py @@ -0,0 +1,373 @@ +""" +Test file for Perplexity cost calculator functionality. + +Tests the cost calculation for Perplexity models including citation tokens, +search queries, and reasoning tokens. +""" + +import json +import math +import os +import sys +from unittest.mock import Mock, patch + +import pytest + +# Add the project root to Python path +sys.path.insert(0, os.path.abspath("../../../..")) + +import litellm +from litellm.cost_calculator import completion_cost, cost_per_token +from litellm.llms.perplexity.cost_calculator import cost_per_token as perplexity_cost_per_token +from litellm.types.utils import Usage, PromptTokensDetailsWrapper +from litellm.utils import get_model_info + + +class TestPerplexityCostCalculator: + """Test suite for Perplexity cost calculation functionality.""" + + @pytest.fixture(autouse=True) + def setup_model_cost_map(self): + """Set up the model cost map for testing.""" + # Ensure we use local model cost map for consistent testing + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + + # Load the model cost map + try: + with open("model_prices_and_context_window.json", "r") as f: + model_cost_map = json.load(f) + litellm.model_cost = model_cost_map + except FileNotFoundError: + # Fallback to ensure we have the Perplexity model configuration + litellm.model_cost = { + "perplexity/sonar-deep-research": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "input_cost_per_token": 2e-06, + "output_cost_per_token": 8e-06, + "output_cost_per_reasoning_token": 3e-06, + "citation_cost_per_token": 2e-06, + "search_context_cost_per_query": { + "search_context_size_low": 0.005, + "search_context_size_medium": 0.005, + "search_context_size_high": 0.005 + }, + "litellm_provider": "perplexity", + "mode": "chat", + "supports_reasoning": True, + "supports_web_search": True, + } + } + + def test_basic_cost_calculation(self): + """Test basic cost calculation without additional fields.""" + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + prompt_cost, completion_cost = perplexity_cost_per_token( + model="sonar-deep-research", + usage=usage + ) + + # Expected costs: + # Input: 100 tokens * $2e-6 = $0.0002 + # Output: 50 tokens * $8e-6 = $0.0004 + expected_prompt_cost = 100 * 2e-6 + expected_completion_cost = 50 * 8e-6 + + assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) + assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6) + + def test_citation_tokens_cost_calculation(self): + """Test cost calculation with citation tokens.""" + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Add citation tokens + usage.citation_tokens = 25 + + prompt_cost, completion_cost = perplexity_cost_per_token( + model="sonar-deep-research", + usage=usage + ) + + # Expected costs: + # Input: 100 tokens * $2e-6 = $0.0002 + # Citation: 25 tokens * $2e-6 = $0.00005 + # Total prompt cost: $0.00025 + # Output: 50 tokens * $8e-6 = $0.0004 + expected_prompt_cost = (100 * 2e-6) + (25 * 2e-6) + expected_completion_cost = 50 * 8e-6 + + assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) + assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6) + + def test_search_queries_cost_calculation(self): + """Test cost calculation with search queries.""" + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150, + prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=3) + ) + + prompt_cost, completion_cost = perplexity_cost_per_token( + model="sonar-deep-research", + usage=usage + ) + + # Expected costs: + # Input: 100 tokens * $2e-6 = $0.0002 + # Output: 50 tokens * $8e-6 = $0.0004 + # Search: 3 queries * ($0.005 / 1000) = $0.000015 + # Total completion cost: $0.000415 + expected_prompt_cost = 100 * 2e-6 + expected_completion_cost = (50 * 8e-6) + (3 / 1000 * 0.005) + + assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) + assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6) + + def test_reasoning_tokens_from_direct_attribute(self): + """Test reasoning tokens cost calculation from direct attribute.""" + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Set reasoning tokens directly + usage.reasoning_tokens = 20 + + prompt_cost, completion_cost = perplexity_cost_per_token( + model="sonar-deep-research", + usage=usage + ) + + # Expected costs: + # Input: 100 tokens * $2e-6 = $0.0002 + # Output: 50 tokens * $8e-6 = $0.0004 + # Reasoning: 20 tokens * $3e-6 = $0.00006 + # Total completion cost: $0.00046 + expected_prompt_cost = 100 * 2e-6 + expected_completion_cost = (50 * 8e-6) + (20 * 3e-6) + + assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) + assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6) + + def test_reasoning_tokens_from_completion_tokens_details(self): + """Test reasoning tokens cost calculation from completion_tokens_details.""" + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150, + reasoning_tokens=20 # This should be stored in completion_tokens_details + ) + + prompt_cost, completion_cost = perplexity_cost_per_token( + model="sonar-deep-research", + usage=usage + ) + + # Expected costs: + # Input: 100 tokens * $2e-6 = $0.0002 + # Output: 50 tokens * $8e-6 = $0.0004 + # Reasoning: 20 tokens * $3e-6 = $0.00006 + # Total completion cost: $0.00046 + expected_prompt_cost = 100 * 2e-6 + expected_completion_cost = (50 * 8e-6) + (20 * 3e-6) + + assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) + assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6) + + def test_comprehensive_cost_calculation(self): + """Test cost calculation with all fields combined.""" + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150, + reasoning_tokens=15, + prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=2) + ) + + # Add custom fields + usage.citation_tokens = 30 + + prompt_cost, completion_cost = perplexity_cost_per_token( + model="sonar-deep-research", + usage=usage + ) + + # Expected costs: + # Input: 100 tokens * $2e-6 = $0.0002 + # Citation: 30 tokens * $2e-6 = $0.00006 + # Total prompt cost: $0.00026 + # Output: 50 tokens * $8e-6 = $0.0004 + # Reasoning: 15 tokens * $3e-6 = $0.000045 + # Search: 2 queries * ($0.005 / 1000) = $0.00001 + # Total completion cost: $0.000455 + expected_prompt_cost = (100 * 2e-6) + (30 * 2e-6) + expected_completion_cost = (50 * 8e-6) + (15 * 3e-6) + (2 / 1000 * 0.005) + + assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) + assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6) + + def test_zero_values_handling(self): + """Test that zero or missing values are handled correctly.""" + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150, + prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=0) + ) + + # These should not raise errors and should not affect cost + usage.citation_tokens = 0 + + prompt_cost, completion_cost = perplexity_cost_per_token( + model="sonar-deep-research", + usage=usage + ) + + # Should be same as basic calculation + expected_prompt_cost = 100 * 2e-6 + expected_completion_cost = 50 * 8e-6 + + assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) + assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6) + + def test_missing_model_info_fields(self): + """Test behavior when model info is missing some fields.""" + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150, + prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=2) + ) + + usage.citation_tokens = 25 + + # Mock get_model_info to return incomplete model info + with patch('litellm.llms.perplexity.cost_calculator.get_model_info') as mock_get_model_info: + mock_get_model_info.return_value = { + "input_cost_per_token": 2e-6, + "output_cost_per_token": 8e-6, + # Missing search_queries_cost_per_query + } + + prompt_cost, completion_cost = perplexity_cost_per_token( + model="sonar-deep-research", + usage=usage + ) + + # Should only calculate basic costs when fields are missing + expected_prompt_cost = 100 * 2e-6 + expected_completion_cost = 50 * 8e-6 + + assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) + assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6) + + def test_integration_with_main_cost_calculator(self): + """Test integration with the main LiteLLM cost calculator.""" + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150, + reasoning_tokens=10, + prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=1) + ) + + usage.citation_tokens = 20 + + # Test main cost calculator + prompt_cost, completion_cost_val = cost_per_token( + model="sonar-deep-research", + custom_llm_provider="perplexity", + usage_object=usage + ) + + # Should match direct call to perplexity cost calculator + expected_prompt, expected_completion = perplexity_cost_per_token( + model="sonar-deep-research", + usage=usage + ) + + assert math.isclose(prompt_cost, expected_prompt, rel_tol=1e-6) + assert math.isclose(completion_cost_val, expected_completion, rel_tol=1e-6) + + def test_integration_with_completion_cost_function(self): + """Test integration with the completion_cost function.""" + from litellm import ModelResponse + + # Create a mock ModelResponse + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150, + reasoning_tokens=10, + prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=1) + ) + usage.citation_tokens = 15 + + response = ModelResponse() + response.usage = usage + response.model = "sonar-deep-research" + + # Test completion_cost function + total_cost = completion_cost(completion_response=response, custom_llm_provider="perplexity") + + # Calculate expected total cost + expected_prompt_cost = (100 * 2e-6) + (15 * 2e-6) # Input + citation + expected_completion_cost = (50 * 8e-6) + (10 * 3e-6) + (1 / 1000 * 0.005) # Output + reasoning + search + expected_total = expected_prompt_cost + expected_completion_cost + + assert math.isclose(total_cost, expected_total, rel_tol=1e-6) + + def test_model_info_access(self): + """Test that model info correctly returns the new cost fields.""" + model_info = get_model_info(model="sonar-deep-research", custom_llm_provider="perplexity") + + # Check that the new fields are accessible + assert "citation_cost_per_token" in model_info + assert model_info["citation_cost_per_token"] == 2e-6 + assert model_info["search_context_cost_per_query"] == { + "search_context_size_low": 0.005, + "search_context_size_medium": 0.005, + "search_context_size_high": 0.005 + } + + @pytest.mark.parametrize("citation_tokens", [0, 10, 25, 100]) + @pytest.mark.parametrize("search_queries", [0, 1, 5, 10]) + @pytest.mark.parametrize("reasoning_tokens", [0, 15, 30]) + def test_cost_calculation_combinations(self, citation_tokens, search_queries, reasoning_tokens): + """Test various combinations of citation tokens, search queries, and reasoning tokens.""" + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150, + reasoning_tokens=reasoning_tokens, + prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=search_queries) + ) + + usage.citation_tokens = citation_tokens + + prompt_cost, completion_cost = perplexity_cost_per_token( + model="sonar-deep-research", + usage=usage + ) + + # Calculate expected costs + expected_prompt_cost = (100 * 2e-6) + (citation_tokens * 2e-6) + expected_completion_cost = (50 * 8e-6) + (reasoning_tokens * 3e-6) + (search_queries / 1000 * 0.005) + + assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) + assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6) + + # Ensure costs are non-negative + assert prompt_cost >= 0 + assert completion_cost >= 0 \ No newline at end of file diff --git a/tests/test_litellm/llms/perplexity/test_perplexity_integration.py b/tests/test_litellm/llms/perplexity/test_perplexity_integration.py new file mode 100644 index 0000000000..ae72b8a962 --- /dev/null +++ b/tests/test_litellm/llms/perplexity/test_perplexity_integration.py @@ -0,0 +1,319 @@ +""" +Integration tests for Perplexity cost calculation and transformation. + +Tests the end-to-end functionality of Perplexity cost calculation +including integration with the main LiteLLM cost calculator. +""" + +import json +import math +import os +import sys +from unittest.mock import Mock, patch + +import pytest + +# Add the project root to Python path +sys.path.insert(0, os.path.abspath("../../../..")) + +import litellm +from litellm import ModelResponse +from litellm.cost_calculator import completion_cost, cost_per_token +from litellm.llms.perplexity.chat.transformation import PerplexityChatConfig +from litellm.types.utils import Usage, PromptTokensDetailsWrapper +from litellm.utils import get_model_info + + +class TestPerplexityIntegration: + """Integration test suite for Perplexity functionality.""" + + @pytest.fixture(autouse=True) + def setup_model_cost_map(self): + """Set up the model cost map for testing.""" + # Ensure we use local model cost map for consistent testing + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + + # Load the model cost map + try: + with open("model_prices_and_context_window.json", "r") as f: + model_cost_map = json.load(f) + litellm.model_cost = model_cost_map + except FileNotFoundError: + # Fallback to ensure we have the Perplexity model configuration + litellm.model_cost = { + "perplexity/sonar-deep-research": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "input_cost_per_token": 2e-06, + "output_cost_per_token": 8e-06, + "output_cost_per_reasoning_token": 3e-06, + "citation_cost_per_token": 2e-06, + "search_queries_cost_per_query": { + "search_queries_size_low": 0.005, + "search_queries_size_medium": 0.005, + "search_queries_size_high": 0.005 + }, + "litellm_provider": "perplexity", + "mode": "chat", + "supports_reasoning": True, + "supports_web_search": True, + } + } + + def test_end_to_end_cost_calculation_with_transformation(self): + """Test end-to-end cost calculation with response transformation.""" + # Create a Perplexity API response that includes citations and search queries + config = PerplexityChatConfig() + + # Create a ModelResponse with basic usage (before transformation) + model_response = ModelResponse() + model_response.model = "sonar-deep-research" + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150, + reasoning_tokens=10 + ) + + # Simulate raw response from Perplexity API + raw_response_dict = { + "choices": [{"message": {"content": "Test response with citations"}}], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150, + "num_search_queries": 2 + }, + "citations": [ + "This is the first citation with important information about the topic", + "Another citation providing additional context for the response" + ] + } + + # Apply transformation to extract Perplexity-specific fields + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + # Now calculate the cost with the enhanced usage + total_cost = completion_cost(completion_response=model_response, custom_llm_provider="perplexity") + + # Calculate expected cost + citation_chars = sum(len(citation) for citation in raw_response_dict["citations"]) + citation_tokens = citation_chars // 4 + + expected_prompt_cost = (100 * 2e-6) + (citation_tokens * 2e-6) # Input + citation + expected_completion_cost = (50 * 8e-6) + (10 * 3e-6) + (2 / 1000 * 0.005) # Output + reasoning + search + expected_total = expected_prompt_cost + expected_completion_cost + + assert math.isclose(total_cost, expected_total, rel_tol=1e-6) + + def test_cost_calculation_without_custom_fields(self): + """Test that cost calculation works normally when custom fields are absent.""" + # Create a standard response without Perplexity-specific fields + model_response = ModelResponse() + model_response.model = "sonar-deep-research" + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Calculate cost without custom fields + total_cost = completion_cost(completion_response=model_response, custom_llm_provider="perplexity") + + # Should only include basic input/output costs + expected_cost = (100 * 2e-6) + (50 * 8e-6) + + assert math.isclose(total_cost, expected_cost, rel_tol=1e-6) + + def test_main_cost_calculator_integration(self): + """Test integration with the main LiteLLM cost calculator.""" + # Create usage with all Perplexity fields + usage = Usage( + prompt_tokens=200, + completion_tokens=100, + total_tokens=300, + reasoning_tokens=25, + prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=3) + ) + usage.citation_tokens = 40 + + # Test main cost calculator + prompt_cost, completion_cost_val = cost_per_token( + model="sonar-deep-research", + custom_llm_provider="perplexity", + usage_object=usage + ) + + # Calculate expected costs + expected_prompt_cost = (200 * 2e-6) + (40 * 2e-6) # Input + citation + expected_completion_cost = (100 * 8e-6) + (25 * 3e-6) + (3 / 1000 * 0.005) # Output + reasoning + search + + assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) + assert math.isclose(completion_cost_val, expected_completion_cost, rel_tol=1e-6) + + def test_model_info_includes_custom_fields(self): + """Test that get_model_info returns the custom Perplexity cost fields.""" + model_info = get_model_info(model="sonar-deep-research", custom_llm_provider="perplexity") + + # Verify custom fields are included + required_fields = [ + "citation_cost_per_token", + "search_context_cost_per_query", + "input_cost_per_token", + "output_cost_per_token", + "output_cost_per_reasoning_token" + ] + + for field in required_fields: + assert field in model_info, f"Missing field: {field}" + assert model_info[field] is not None, f"Null value for field: {field}" + + def test_various_citation_sizes(self): + """Test cost calculation with various citation sizes.""" + config = PerplexityChatConfig() + + test_cases = [ + # (citations, expected_approximate_tokens) + (["Short"], 1), + (["This is a medium-length citation with some content"], 12), + (["Very short", "Another citation", "Third one with more text content"], 15), + ([""], 0), # Empty citation + ] + + for citations, expected_approx_tokens in test_cases: + model_response = ModelResponse() + model_response.model = "sonar-deep-research" + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + raw_response_dict = { + "usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150}, + "citations": citations + } + + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + citation_tokens = getattr(model_response.usage, "citation_tokens", 0) + + # Allow for reasonable variance in token estimation + if expected_approx_tokens == 0: + assert citation_tokens == 0 + else: + assert abs(citation_tokens - expected_approx_tokens) <= 5 + + def test_cost_calculation_with_zero_values(self): + """Test cost calculation handles zero values for custom fields correctly.""" + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + + # Set custom fields to zero + usage.citation_tokens = 0 + usage.prompt_tokens_details = PromptTokensDetailsWrapper(web_search_requests=0) + + # Should not add any extra cost + prompt_cost, completion_cost_val = cost_per_token( + model="sonar-deep-research", + custom_llm_provider="perplexity", + usage_object=usage + ) + + expected_prompt_cost = 100 * 2e-6 + expected_completion_cost = 50 * 8e-6 + + assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) + assert math.isclose(completion_cost_val, expected_completion_cost, rel_tol=1e-6) + + def test_high_volume_cost_calculation(self): + """Test cost calculation with high token and query counts.""" + usage = Usage( + prompt_tokens=50000, + completion_tokens=25000, + total_tokens=75000, + reasoning_tokens=10000 + ) + + usage.citation_tokens = 5000 + usage.prompt_tokens_details = PromptTokensDetailsWrapper(web_search_requests=100) + + total_cost = completion_cost( + completion_response=ModelResponse(usage=usage, model="sonar-deep-research"), + custom_llm_provider="perplexity" + ) + + # Calculate expected cost + expected_prompt_cost = (50000 * 2e-6) + (5000 * 2e-6) # $0.11 + expected_completion_cost = (25000 * 8e-6) + (10000 * 3e-6) + (100 / 1000 * 0.005) # $0.23 + expected_total = expected_prompt_cost + expected_completion_cost # $0.34 + + assert math.isclose(total_cost, expected_total, rel_tol=1e-6) + assert total_cost > 0.3 # Sanity check for high-volume scenario + + def test_transformation_preserves_existing_usage_fields(self): + """Test that transformation doesn't overwrite existing standard usage fields.""" + config = PerplexityChatConfig() + + model_response = ModelResponse() + model_response.usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150, + reasoning_tokens=20 + ) + + # Store original values + original_prompt_tokens = model_response.usage.prompt_tokens + original_completion_tokens = model_response.usage.completion_tokens + original_total_tokens = model_response.usage.total_tokens + + raw_response_dict = { + "usage": { + "prompt_tokens": 999, # Different from original + "completion_tokens": 999, # Different from original + "total_tokens": 999, # Different from original + "num_search_queries": 3 + }, + "citations": ["Some citation"] + } + + config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict) + + # Original usage fields should be preserved + assert model_response.usage.prompt_tokens == original_prompt_tokens + assert model_response.usage.completion_tokens == original_completion_tokens + assert model_response.usage.total_tokens == original_total_tokens + + # But custom fields should be added + assert hasattr(model_response.usage, "prompt_tokens_details") + assert hasattr(model_response.usage, "citation_tokens") + assert model_response.usage.prompt_tokens_details.web_search_requests == 3 + + @pytest.mark.parametrize("provider_name", ["perplexity", "PERPLEXITY", "Perplexity"]) + def test_case_insensitive_provider_matching(self, provider_name): + """Test that cost calculation works with different case variations of provider name.""" + usage = Usage( + prompt_tokens=100, + completion_tokens=50, + total_tokens=150 + ) + usage.citation_tokens = 10 + usage.prompt_tokens_details = PromptTokensDetailsWrapper(web_search_requests=1) + + # Should work regardless of case + prompt_cost, completion_cost_val = cost_per_token( + model="sonar-deep-research", + custom_llm_provider=provider_name.lower(), # Normalize to lowercase + usage_object=usage + ) + + # Should calculate costs correctly + expected_prompt_cost = (100 * 2e-6) + (10 * 2e-6) + expected_completion_cost = (50 * 8e-6) + (1 / 1000 * 0.005) + + assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6) + assert math.isclose(completion_cost_val, expected_completion_cost, rel_tol=1e-6) \ No newline at end of file diff --git a/tests/test_litellm/test_utils.py b/tests/test_litellm/test_utils.py index 6dfbb8b7e5..8f043584c0 100644 --- a/tests/test_litellm/test_utils.py +++ b/tests/test_litellm/test_utils.py @@ -466,6 +466,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid(): }, "additionalProperties": False, }, + "citation_cost_per_token": {"type": "number"}, "supported_modalities": { "type": "array", "items": {