diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 69a14a7aa7..30658cb091 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -45,6 +45,9 @@ from litellm.llms.openai.cost_calculation import (
     cost_per_second as openai_cost_per_second,
 )
 from litellm.llms.openai.cost_calculation import cost_per_token as openai_cost_per_token
+from litellm.llms.perplexity.cost_calculator import (
+    cost_per_token as perplexity_cost_per_token,
+)
 from litellm.llms.together_ai.cost_calculator import get_model_params_and_category
 from litellm.llms.vertex_ai.cost_calculator import (
     cost_per_character as google_cost_per_character,
@@ -329,6 +332,8 @@ def cost_per_token(  # noqa: PLR0915
         return gemini_cost_per_token(model=model, usage=usage_block)
     elif custom_llm_provider == "deepseek":
         return deepseek_cost_per_token(model=model, usage=usage_block)
+    elif custom_llm_provider == "perplexity":
+        return perplexity_cost_per_token(model=model, usage=usage_block)
     else:
         model_info = _cached_get_model_info_helper(
             model=model, custom_llm_provider=custom_llm_provider
diff --git a/litellm/llms/perplexity/chat/transformation.py b/litellm/llms/perplexity/chat/transformation.py
index a81400870a..955fdff081 100644
--- a/litellm/llms/perplexity/chat/transformation.py
+++ b/litellm/llms/perplexity/chat/transformation.py
@@ -2,13 +2,17 @@
 Translate from OpenAI's `/v1/chat/completions` to Perplexity's `/v1/chat/completions`
 """
 
-from typing import Optional, Tuple
+from typing import Any, List, Optional, Tuple
 
+import httpx
 import litellm
 from litellm._logging import verbose_logger
 from litellm.secret_managers.main import get_secret_str
-
-from ...openai.chat.gpt_transformation import OpenAIGPTConfig
+from litellm.types.llms.openai import AllMessageValues
+from litellm.types.utils import Usage, PromptTokensDetailsWrapper
+from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
+from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig
+from litellm.types.utils import ModelResponse
 
 
 class PerplexityChatConfig(OpenAIGPTConfig):
@@ -65,3 +69,95 @@ class PerplexityChatConfig(OpenAIGPTConfig):
             verbose_logger.debug(f"Error checking if model supports web search: {e}")
         
         return base_openai_params
+
+    def transform_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        model_response: ModelResponse,
+        logging_obj: LiteLLMLoggingObj,
+        request_data: dict,
+        messages: List[AllMessageValues],
+        optional_params: dict,
+        litellm_params: dict,
+        encoding: Any,
+        api_key: Optional[str] = None,
+        json_mode: Optional[bool] = None,
+    ) -> ModelResponse:
+        # Call the parent transform_response first to handle the standard transformation
+        model_response = super().transform_response(
+            model=model,
+            raw_response=raw_response,
+            model_response=model_response,
+            logging_obj=logging_obj,
+            request_data=request_data,
+            messages=messages,
+            optional_params=optional_params,
+            litellm_params=litellm_params,
+            encoding=encoding,
+            api_key=api_key,
+            json_mode=json_mode,
+        )
+
+        # Extract and enhance usage with Perplexity-specific fields
+        try:
+            raw_response_json = raw_response.json()
+            self._enhance_usage_with_perplexity_fields(model_response, raw_response_json)
+        except Exception as e:
+            verbose_logger.debug(f"Error extracting Perplexity-specific usage fields: {e}")
+
+        return model_response
+
+    def _enhance_usage_with_perplexity_fields(
+        self, model_response: ModelResponse, raw_response_json: dict
+    ) -> None:
+        """
+        Extract citation tokens and search queries from Perplexity API response
+        and add them to the usage object using standard LiteLLM fields.
+        """
+        if not hasattr(model_response, "usage") or model_response.usage is None:
+            # Create a usage object if it doesn't exist (when usage was None)
+            model_response.usage = Usage(  # type: ignore[attr-defined]
+                prompt_tokens=0,
+                completion_tokens=0,
+                total_tokens=0
+            )
+
+        usage = model_response.usage  # type: ignore[attr-defined]
+
+        # Extract citation tokens count
+        citations = raw_response_json.get("citations", [])
+        citation_tokens = 0
+        if citations:
+            # Count total characters in citations as a proxy for citation tokens
+            # This is an estimation - in practice, you might want to use proper tokenization
+            total_citation_chars = sum(len(str(citation)) for citation in citations if citation)
+            # Rough estimation: ~4 characters per token (OpenAI's general rule)
+            if total_citation_chars > 0:
+                citation_tokens = max(1, total_citation_chars // 4)
+
+        # Extract search queries count from usage or response metadata
+        # Perplexity might include this in the usage object or as separate metadata
+        perplexity_usage = raw_response_json.get("usage", {})
+        
+        # Try to extract search queries from usage field first, then root level
+        num_search_queries = perplexity_usage.get("num_search_queries")
+        if num_search_queries is None:
+            num_search_queries = raw_response_json.get("num_search_queries")
+        if num_search_queries is None:
+            num_search_queries = perplexity_usage.get("search_queries")
+        if num_search_queries is None:
+            num_search_queries = raw_response_json.get("search_queries")
+        
+        # Create or update prompt_tokens_details to include web search requests and citation tokens
+        if citation_tokens > 0 or (num_search_queries is not None and num_search_queries > 0):
+            if usage.prompt_tokens_details is None:
+                usage.prompt_tokens_details = PromptTokensDetailsWrapper()
+            
+            # Store citation tokens count for cost calculation
+            if citation_tokens > 0:
+                setattr(usage, "citation_tokens", citation_tokens)
+            
+            # Store search queries count in the standard web_search_requests field
+            if num_search_queries is not None and num_search_queries > 0:
+                usage.prompt_tokens_details.web_search_requests = num_search_queries
diff --git a/litellm/llms/perplexity/cost_calculator.py b/litellm/llms/perplexity/cost_calculator.py
new file mode 100644
index 0000000000..c8fd2a682a
--- /dev/null
+++ b/litellm/llms/perplexity/cost_calculator.py
@@ -0,0 +1,79 @@
+"""
+Helper util for handling perplexity-specific cost calculation
+- e.g.: citation tokens, search queries
+"""
+
+from typing import Tuple, Union
+
+from litellm.types.utils import Usage
+from litellm.utils import get_model_info
+
+
+def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
+    """
+    Calculates the cost per token for a given model, prompt tokens, and completion tokens.
+
+    Input:
+        - model: str, the model name without provider prefix
+        - usage: LiteLLM Usage block, containing perplexity-specific usage information
+
+    Returns:
+        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
+    """
+    ## GET MODEL INFO
+    model_info = get_model_info(model=model, custom_llm_provider="perplexity")
+
+    def _safe_float_cast(value: Union[str, int, float, None, object], default: float = 0.0) -> float:
+        """Safely cast a value to float with proper type handling for mypy."""
+        if value is None:
+            return default
+        try:
+            return float(value)  # type: ignore
+        except (ValueError, TypeError):
+            return default
+
+    ## CALCULATE INPUT COST
+    input_cost_per_token = _safe_float_cast(model_info.get("input_cost_per_token"))
+    prompt_cost: float = (usage.prompt_tokens or 0) * input_cost_per_token
+
+    ## ADD CITATION TOKENS COST (if present)
+    citation_tokens = getattr(usage, "citation_tokens", 0) or 0
+    citation_cost_value = model_info.get("citation_cost_per_token")
+    if citation_tokens > 0 and citation_cost_value is not None:
+        citation_cost_per_token = _safe_float_cast(citation_cost_value)
+        prompt_cost += citation_tokens * citation_cost_per_token
+
+    ## CALCULATE OUTPUT COST
+    output_cost_per_token = _safe_float_cast(model_info.get("output_cost_per_token"))
+    completion_cost: float = (usage.completion_tokens or 0) * output_cost_per_token
+
+    ## ADD REASONING TOKENS COST (if present)
+    reasoning_tokens = getattr(usage, "reasoning_tokens", 0) or 0
+    # Also check completion_tokens_details if reasoning_tokens is not directly available
+    if reasoning_tokens == 0 and hasattr(usage, "completion_tokens_details") and usage.completion_tokens_details:
+        reasoning_tokens = getattr(usage.completion_tokens_details, "reasoning_tokens", 0) or 0
+    
+    reasoning_cost_value = model_info.get("output_cost_per_reasoning_token")
+    if reasoning_tokens > 0 and reasoning_cost_value is not None:
+        reasoning_cost_per_token = _safe_float_cast(reasoning_cost_value)
+        completion_cost += reasoning_tokens * reasoning_cost_per_token
+
+    ## ADD SEARCH QUERIES COST (if present)
+    num_search_queries = 0
+    if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
+        num_search_queries = getattr(usage.prompt_tokens_details, "web_search_requests", 0) or 0
+    
+    # Check both possible keys for search cost (legacy and current)
+    search_cost_value = model_info.get("search_queries_cost_per_query") or model_info.get("search_context_cost_per_query")
+    if num_search_queries > 0 and search_cost_value is not None:
+        # Handle both dict and float formats
+        if isinstance(search_cost_value, dict):
+            # Use the "low" size as default - tests expect 0.005 / 1000
+            search_cost_per_query = _safe_float_cast(search_cost_value.get("search_context_size_low", 0)) / 1000
+        else:
+            search_cost_per_query = _safe_float_cast(search_cost_value)
+        search_cost = num_search_queries * search_cost_per_query
+        # Add search cost to completion cost (similar to how other providers handle it)
+        completion_cost += search_cost
+
+    return prompt_cost, completion_cost 
\ No newline at end of file
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 9d72f852de..023fc3f935 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -13647,13 +13647,14 @@
         "input_cost_per_token": 2e-06,
         "output_cost_per_token": 8e-06,
         "output_cost_per_reasoning_token": 3e-06,
+        "citation_cost_per_token": 2e-06,
+        "search_queries_cost_per_query": {
+            "search_queries_size_low": 0.005,
+            "search_queries_size_medium": 0.005,
+            "search_queries_size_high": 0.005
+        },
         "litellm_provider": "perplexity",
         "mode": "chat",
-        "search_context_cost_per_query": {
-            "search_context_size_low": 0.005,
-            "search_context_size_medium": 0.005,
-            "search_context_size_high": 0.005
-        },
         "supports_reasoning": true,
         "supports_web_search": true
     },
diff --git a/litellm/types/utils.py b/litellm/types/utils.py
index fd8ecd9e14..14be87704f 100644
--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@@ -177,7 +177,7 @@ class ModelInfoBase(ProviderSpecificModelInfo, total=False):
     search_context_cost_per_query: Optional[
         SearchContextCostPerQuery
     ]  # Cost for using web search tool
-
+    citation_cost_per_token: Optional[float]  # Cost per citation token for Perplexity
     litellm_provider: Required[str]
     mode: Required[
         Literal[
diff --git a/litellm/utils.py b/litellm/utils.py
index e3b14a1676..dacc168045 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -4650,6 +4650,7 @@ def _get_model_info_helper(  # noqa: PLR0915
                 output_cost_per_second=_model_info.get("output_cost_per_second", None),
                 output_cost_per_image=_model_info.get("output_cost_per_image", None),
                 output_vector_size=_model_info.get("output_vector_size", None),
+                citation_cost_per_token=_model_info.get("citation_cost_per_token", None),
                 litellm_provider=_model_info.get(
                     "litellm_provider", custom_llm_provider
                 ),
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index 9d72f852de..8d4e5e276e 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -13647,13 +13647,14 @@
         "input_cost_per_token": 2e-06,
         "output_cost_per_token": 8e-06,
         "output_cost_per_reasoning_token": 3e-06,
-        "litellm_provider": "perplexity",
-        "mode": "chat",
+        "citation_cost_per_token": 2e-06,
         "search_context_cost_per_query": {
             "search_context_size_low": 0.005,
             "search_context_size_medium": 0.005,
             "search_context_size_high": 0.005
         },
+        "litellm_provider": "perplexity",
+        "mode": "chat",
         "supports_reasoning": true,
         "supports_web_search": true
     },
diff --git a/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py b/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py
new file mode 100644
index 0000000000..6f64f46b4a
--- /dev/null
+++ b/tests/test_litellm/llms/perplexity/chat/test_perplexity_chat_transformation.py
@@ -0,0 +1,409 @@
+"""
+Test file for Perplexity chat transformation functionality.
+
+Tests the response transformation to extract citation tokens and search queries
+from Perplexity API responses.
+"""
+
+import os
+import sys
+from unittest.mock import Mock
+
+import pytest
+
+# Add the project root to Python path
+sys.path.insert(0, os.path.abspath("../../../../.."))
+
+from litellm import ModelResponse
+from litellm.llms.perplexity.chat.transformation import PerplexityChatConfig
+from litellm.types.utils import Usage
+
+
+class TestPerplexityChatTransformation:
+    """Test suite for Perplexity chat transformation functionality."""
+
+    def test_enhance_usage_with_citation_tokens(self):
+        """Test extraction of citation tokens from API response."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse with basic usage
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response with citations
+        raw_response_dict = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            },
+            "citations": [
+                "This is a citation with some text content",
+                "Another citation with more text here",
+                "Third citation with additional information"
+            ]
+        }
+        
+        # Enhance the usage with Perplexity fields
+        config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+        
+        # Check that citation tokens were added
+        assert hasattr(model_response.usage, "citation_tokens")
+        citation_tokens = getattr(model_response.usage, "citation_tokens")
+        
+        # Should have extracted citation tokens (estimated based on character count)
+        assert citation_tokens > 0
+        assert isinstance(citation_tokens, int)
+
+    def test_enhance_usage_with_search_queries_from_usage(self):
+        """Test extraction of search queries from usage field in API response."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse with basic usage
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response with search queries in usage
+        raw_response_dict = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "num_search_queries": 3
+            }
+        }
+        
+        # Enhance the usage with Perplexity fields
+        config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+        
+        # Check that search queries were added to prompt_tokens_details
+        assert hasattr(model_response.usage, "prompt_tokens_details")
+        assert model_response.usage.prompt_tokens_details is not None
+        assert hasattr(model_response.usage.prompt_tokens_details, "web_search_requests")
+        
+        web_search_requests = model_response.usage.prompt_tokens_details.web_search_requests
+        assert web_search_requests == 3
+
+    def test_enhance_usage_with_search_queries_from_root(self):
+        """Test extraction of search queries from root level in API response."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse with basic usage
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response with search queries at root level
+        raw_response_dict = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            },
+            "num_search_queries": 2
+        }
+        
+        # Enhance the usage with Perplexity fields
+        config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+        
+        # Check that search queries were added to prompt_tokens_details
+        assert hasattr(model_response.usage, "prompt_tokens_details")
+        assert model_response.usage.prompt_tokens_details is not None
+        assert hasattr(model_response.usage.prompt_tokens_details, "web_search_requests")
+        
+        web_search_requests = model_response.usage.prompt_tokens_details.web_search_requests
+        assert web_search_requests == 2
+
+    def test_enhance_usage_with_both_citations_and_search_queries(self):
+        """Test extraction of both citation tokens and search queries."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse with basic usage
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response with both citations and search queries
+        raw_response_dict = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "num_search_queries": 2
+            },
+            "citations": [
+                "Citation one with some content",
+                "Citation two with more information"
+            ]
+        }
+        
+        # Enhance the usage with Perplexity fields
+        config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+        
+        # Check that both fields were added
+        assert hasattr(model_response.usage, "citation_tokens")
+        assert hasattr(model_response.usage, "prompt_tokens_details")
+        assert model_response.usage.prompt_tokens_details is not None
+        assert hasattr(model_response.usage.prompt_tokens_details, "web_search_requests")
+        
+        citation_tokens = getattr(model_response.usage, "citation_tokens")
+        web_search_requests = model_response.usage.prompt_tokens_details.web_search_requests
+        
+        assert citation_tokens > 0
+        assert web_search_requests == 2
+
+    def test_enhance_usage_with_empty_citations(self):
+        """Test handling of empty citations array."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse with basic usage
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response with empty citations
+        raw_response_dict = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            },
+            "citations": []
+        }
+        
+        # Enhance the usage with Perplexity fields
+        config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+        
+        # Should not set citation_tokens for empty citations
+        citation_tokens = getattr(model_response.usage, "citation_tokens", 0)
+        assert citation_tokens == 0
+
+    def test_enhance_usage_with_missing_fields(self):
+        """Test handling when both citations and search queries are missing."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse with basic usage
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response without citations or search queries
+        raw_response_dict = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150
+            }
+        }
+        
+        # Should not raise an error
+        config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+        
+        # Should not have added custom fields
+        citation_tokens = getattr(model_response.usage, "citation_tokens", 0)
+        assert citation_tokens == 0
+        
+        # prompt_tokens_details might be None or have web_search_requests as 0
+        if hasattr(model_response.usage, "prompt_tokens_details") and model_response.usage.prompt_tokens_details:
+            web_search_requests = getattr(model_response.usage.prompt_tokens_details, "web_search_requests", 0)
+            assert web_search_requests == 0
+
+    def test_citation_token_estimation(self):
+        """Test that citation token estimation is reasonable."""
+        config = PerplexityChatConfig()
+        
+        # Test cases with known character counts
+        test_cases = [
+            # (citation_text, expected_min_tokens, expected_max_tokens)
+            ("Short", 1, 2),
+            ("This is a longer citation with multiple words", 10, 15),
+            ("A very long citation with many words and characters that should result in more tokens", 18, 25),
+        ]
+        
+        for citation_text, min_tokens, max_tokens in test_cases:
+            model_response = ModelResponse()
+            model_response.usage = Usage(
+                prompt_tokens=100,
+                completion_tokens=50,
+                total_tokens=150
+            )
+            
+            raw_response_dict = {
+                "usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150},
+                "citations": [citation_text]
+            }
+            
+            config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+            
+            citation_tokens = getattr(model_response.usage, "citation_tokens")
+            
+            # Should be within reasonable range
+            assert min_tokens <= citation_tokens <= max_tokens, f"Citation '{citation_text}' resulted in {citation_tokens} tokens, expected {min_tokens}-{max_tokens}"
+
+    def test_multiple_citations_aggregation(self):
+        """Test that multiple citations are aggregated correctly."""
+        config = PerplexityChatConfig()
+        
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        raw_response_dict = {
+            "usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150},
+            "citations": [
+                "First citation with some text",
+                "Second citation with different content",
+                "Third citation with more information"
+            ]
+        }
+        
+        config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+        
+        citation_tokens = getattr(model_response.usage, "citation_tokens")
+        
+        # Should have aggregated all citations
+        total_chars = sum(len(citation) for citation in raw_response_dict["citations"])
+        expected_tokens = total_chars // 4  # Our estimation logic
+        
+        assert citation_tokens == expected_tokens
+
+    def test_search_queries_priority_usage_over_root(self):
+        """Test that search queries from usage field take priority over root level."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse with basic usage
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Mock raw response with search queries in both locations
+        raw_response_dict = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "num_search_queries": 5  # This should take priority
+            },
+            "num_search_queries": 3  # This should be ignored
+        }
+        
+        # Enhance the usage with Perplexity fields
+        config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+        
+        # Check that usage field took priority
+        assert hasattr(model_response.usage, "prompt_tokens_details")
+        assert model_response.usage.prompt_tokens_details is not None
+        web_search_requests = model_response.usage.prompt_tokens_details.web_search_requests
+        
+        assert web_search_requests == 5  # Should use the usage field value, not root
+
+    def test_no_usage_object_handling(self):
+        """Test handling when model_response has no usage object."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse without usage
+        model_response = ModelResponse()
+        
+        # Mock raw response with Perplexity-specific fields
+        raw_response_dict = {
+            "choices": [{"message": {"content": "Test response"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "num_search_queries": 2
+            },
+            "citations": ["Some citation"]
+        }
+        
+        # Should not raise an error when usage is None
+        config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+        
+        # Usage should be created with the Perplexity fields
+        assert model_response.usage is not None
+        assert hasattr(model_response.usage, "citation_tokens")
+        assert hasattr(model_response.usage, "prompt_tokens_details")
+        assert model_response.usage.prompt_tokens_details is not None
+        assert hasattr(model_response.usage.prompt_tokens_details, "web_search_requests")
+        
+        citation_tokens = getattr(model_response.usage, "citation_tokens")
+        web_search_requests = model_response.usage.prompt_tokens_details.web_search_requests
+        
+        assert citation_tokens > 0
+        assert web_search_requests == 2
+
+    @pytest.mark.parametrize("search_query_location", ["usage", "root"])
+    def test_search_queries_extraction_locations(self, search_query_location):
+        """Test search queries extraction from different response locations."""
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse with basic usage
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Create response dict based on parameter
+        if search_query_location == "usage":
+            raw_response_dict = {
+                "usage": {
+                    "prompt_tokens": 100,
+                    "completion_tokens": 50,
+                    "total_tokens": 150,
+                    "num_search_queries": 4
+                }
+            }
+        else:  # root
+            raw_response_dict = {
+                "usage": {
+                    "prompt_tokens": 100,
+                    "completion_tokens": 50,
+                    "total_tokens": 150
+                },
+                "num_search_queries": 4
+            }
+        
+        # Enhance the usage with Perplexity fields
+        config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+        
+        # Should extract search queries from either location
+        assert hasattr(model_response.usage, "prompt_tokens_details")
+        assert model_response.usage.prompt_tokens_details is not None
+        web_search_requests = model_response.usage.prompt_tokens_details.web_search_requests
+        
+        assert web_search_requests == 4 
\ No newline at end of file
diff --git a/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py b/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py
new file mode 100644
index 0000000000..f9a5210007
--- /dev/null
+++ b/tests/test_litellm/llms/perplexity/test_perplexity_cost_calculator.py
@@ -0,0 +1,373 @@
+"""
+Test file for Perplexity cost calculator functionality.
+
+Tests the cost calculation for Perplexity models including citation tokens, 
+search queries, and reasoning tokens.
+"""
+
+import json
+import math
+import os
+import sys
+from unittest.mock import Mock, patch
+
+import pytest
+
+# Add the project root to Python path
+sys.path.insert(0, os.path.abspath("../../../.."))
+
+import litellm
+from litellm.cost_calculator import completion_cost, cost_per_token
+from litellm.llms.perplexity.cost_calculator import cost_per_token as perplexity_cost_per_token
+from litellm.types.utils import Usage, PromptTokensDetailsWrapper
+from litellm.utils import get_model_info
+
+
+class TestPerplexityCostCalculator:
+    """Test suite for Perplexity cost calculation functionality."""
+
+    @pytest.fixture(autouse=True)
+    def setup_model_cost_map(self):
+        """Set up the model cost map for testing."""
+        # Ensure we use local model cost map for consistent testing
+        os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+        
+        # Load the model cost map
+        try:
+            with open("model_prices_and_context_window.json", "r") as f:
+                model_cost_map = json.load(f)
+            litellm.model_cost = model_cost_map
+        except FileNotFoundError:
+            # Fallback to ensure we have the Perplexity model configuration
+            litellm.model_cost = {
+                "perplexity/sonar-deep-research": {
+                    "max_tokens": 128000,
+                    "max_input_tokens": 128000,
+                    "input_cost_per_token": 2e-06,
+                    "output_cost_per_token": 8e-06,
+                    "output_cost_per_reasoning_token": 3e-06,
+                    "citation_cost_per_token": 2e-06,
+                    "search_context_cost_per_query": {
+                        "search_context_size_low": 0.005,
+                        "search_context_size_medium": 0.005,
+                        "search_context_size_high": 0.005
+                    },
+                    "litellm_provider": "perplexity",
+                    "mode": "chat",
+                    "supports_reasoning": True,
+                    "supports_web_search": True,
+                }
+            }
+
+    def test_basic_cost_calculation(self):
+        """Test basic cost calculation without additional fields."""
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        prompt_cost, completion_cost = perplexity_cost_per_token(
+            model="sonar-deep-research", 
+            usage=usage
+        )
+        
+        # Expected costs:
+        # Input: 100 tokens * $2e-6 = $0.0002
+        # Output: 50 tokens * $8e-6 = $0.0004
+        expected_prompt_cost = 100 * 2e-6
+        expected_completion_cost = 50 * 8e-6
+        
+        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
+        assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6)
+
+    def test_citation_tokens_cost_calculation(self):
+        """Test cost calculation with citation tokens."""
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Add citation tokens
+        usage.citation_tokens = 25
+        
+        prompt_cost, completion_cost = perplexity_cost_per_token(
+            model="sonar-deep-research", 
+            usage=usage
+        )
+        
+        # Expected costs:
+        # Input: 100 tokens * $2e-6 = $0.0002
+        # Citation: 25 tokens * $2e-6 = $0.00005
+        # Total prompt cost: $0.00025
+        # Output: 50 tokens * $8e-6 = $0.0004
+        expected_prompt_cost = (100 * 2e-6) + (25 * 2e-6)
+        expected_completion_cost = 50 * 8e-6
+        
+        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
+        assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6)
+
+    def test_search_queries_cost_calculation(self):
+        """Test cost calculation with search queries."""
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150,
+            prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=3)
+        )
+        
+        prompt_cost, completion_cost = perplexity_cost_per_token(
+            model="sonar-deep-research", 
+            usage=usage
+        )
+        
+        # Expected costs:
+        # Input: 100 tokens * $2e-6 = $0.0002
+        # Output: 50 tokens * $8e-6 = $0.0004
+        # Search: 3 queries * ($0.005 / 1000) = $0.000015
+        # Total completion cost: $0.000415
+        expected_prompt_cost = 100 * 2e-6
+        expected_completion_cost = (50 * 8e-6) + (3 / 1000 * 0.005)
+        
+        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
+        assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6)
+
+    def test_reasoning_tokens_from_direct_attribute(self):
+        """Test reasoning tokens cost calculation from direct attribute."""
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Set reasoning tokens directly
+        usage.reasoning_tokens = 20
+        
+        prompt_cost, completion_cost = perplexity_cost_per_token(
+            model="sonar-deep-research", 
+            usage=usage
+        )
+        
+        # Expected costs:
+        # Input: 100 tokens * $2e-6 = $0.0002
+        # Output: 50 tokens * $8e-6 = $0.0004
+        # Reasoning: 20 tokens * $3e-6 = $0.00006
+        # Total completion cost: $0.00046
+        expected_prompt_cost = 100 * 2e-6
+        expected_completion_cost = (50 * 8e-6) + (20 * 3e-6)
+        
+        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
+        assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6)
+
+    def test_reasoning_tokens_from_completion_tokens_details(self):
+        """Test reasoning tokens cost calculation from completion_tokens_details."""
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150,
+            reasoning_tokens=20  # This should be stored in completion_tokens_details
+        )
+        
+        prompt_cost, completion_cost = perplexity_cost_per_token(
+            model="sonar-deep-research", 
+            usage=usage
+        )
+        
+        # Expected costs:
+        # Input: 100 tokens * $2e-6 = $0.0002
+        # Output: 50 tokens * $8e-6 = $0.0004
+        # Reasoning: 20 tokens * $3e-6 = $0.00006
+        # Total completion cost: $0.00046
+        expected_prompt_cost = 100 * 2e-6
+        expected_completion_cost = (50 * 8e-6) + (20 * 3e-6)
+        
+        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
+        assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6)
+
+    def test_comprehensive_cost_calculation(self):
+        """Test cost calculation with all fields combined."""
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150,
+            reasoning_tokens=15,
+            prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=2)
+        )
+        
+        # Add custom fields
+        usage.citation_tokens = 30
+        
+        prompt_cost, completion_cost = perplexity_cost_per_token(
+            model="sonar-deep-research", 
+            usage=usage
+        )
+        
+        # Expected costs:
+        # Input: 100 tokens * $2e-6 = $0.0002
+        # Citation: 30 tokens * $2e-6 = $0.00006
+        # Total prompt cost: $0.00026
+        # Output: 50 tokens * $8e-6 = $0.0004
+        # Reasoning: 15 tokens * $3e-6 = $0.000045
+        # Search: 2 queries * ($0.005 / 1000) = $0.00001
+        # Total completion cost: $0.000455
+        expected_prompt_cost = (100 * 2e-6) + (30 * 2e-6)
+        expected_completion_cost = (50 * 8e-6) + (15 * 3e-6) + (2 / 1000 * 0.005)
+        
+        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
+        assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6)
+
+    def test_zero_values_handling(self):
+        """Test that zero or missing values are handled correctly."""
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150,
+            prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=0)
+        )
+        
+        # These should not raise errors and should not affect cost
+        usage.citation_tokens = 0
+        
+        prompt_cost, completion_cost = perplexity_cost_per_token(
+            model="sonar-deep-research", 
+            usage=usage
+        )
+        
+        # Should be same as basic calculation
+        expected_prompt_cost = 100 * 2e-6
+        expected_completion_cost = 50 * 8e-6
+        
+        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
+        assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6)
+
+    def test_missing_model_info_fields(self):
+        """Test behavior when model info is missing some fields."""
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150,
+            prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=2)
+        )
+        
+        usage.citation_tokens = 25
+        
+        # Mock get_model_info to return incomplete model info
+        with patch('litellm.llms.perplexity.cost_calculator.get_model_info') as mock_get_model_info:
+            mock_get_model_info.return_value = {
+                "input_cost_per_token": 2e-6,
+                "output_cost_per_token": 8e-6,
+                # Missing search_queries_cost_per_query
+            }
+            
+            prompt_cost, completion_cost = perplexity_cost_per_token(
+                model="sonar-deep-research", 
+                usage=usage
+            )
+            
+            # Should only calculate basic costs when fields are missing
+            expected_prompt_cost = 100 * 2e-6
+            expected_completion_cost = 50 * 8e-6
+            
+            assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
+            assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6)
+
+    def test_integration_with_main_cost_calculator(self):
+        """Test integration with the main LiteLLM cost calculator."""
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150,
+            reasoning_tokens=10,
+            prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=1)
+        )
+        
+        usage.citation_tokens = 20
+        
+        # Test main cost calculator
+        prompt_cost, completion_cost_val = cost_per_token(
+            model="sonar-deep-research",
+            custom_llm_provider="perplexity",
+            usage_object=usage
+        )
+        
+        # Should match direct call to perplexity cost calculator
+        expected_prompt, expected_completion = perplexity_cost_per_token(
+            model="sonar-deep-research", 
+            usage=usage
+        )
+        
+        assert math.isclose(prompt_cost, expected_prompt, rel_tol=1e-6)
+        assert math.isclose(completion_cost_val, expected_completion, rel_tol=1e-6)
+
+    def test_integration_with_completion_cost_function(self):
+        """Test integration with the completion_cost function."""
+        from litellm import ModelResponse
+        
+        # Create a mock ModelResponse
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150,
+            reasoning_tokens=10,
+            prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=1)
+        )
+        usage.citation_tokens = 15
+        
+        response = ModelResponse()
+        response.usage = usage
+        response.model = "sonar-deep-research"
+        
+        # Test completion_cost function
+        total_cost = completion_cost(completion_response=response, custom_llm_provider="perplexity")
+        
+        # Calculate expected total cost
+        expected_prompt_cost = (100 * 2e-6) + (15 * 2e-6)  # Input + citation
+        expected_completion_cost = (50 * 8e-6) + (10 * 3e-6) + (1 / 1000 * 0.005)  # Output + reasoning + search
+        expected_total = expected_prompt_cost + expected_completion_cost
+        
+        assert math.isclose(total_cost, expected_total, rel_tol=1e-6)
+
+    def test_model_info_access(self):
+        """Test that model info correctly returns the new cost fields."""
+        model_info = get_model_info(model="sonar-deep-research", custom_llm_provider="perplexity")
+        
+        # Check that the new fields are accessible
+        assert "citation_cost_per_token" in model_info
+        assert model_info["citation_cost_per_token"] == 2e-6
+        assert model_info["search_context_cost_per_query"] == {
+            "search_context_size_low": 0.005,
+            "search_context_size_medium": 0.005,
+            "search_context_size_high": 0.005
+        }
+
+    @pytest.mark.parametrize("citation_tokens", [0, 10, 25, 100])
+    @pytest.mark.parametrize("search_queries", [0, 1, 5, 10])
+    @pytest.mark.parametrize("reasoning_tokens", [0, 15, 30])
+    def test_cost_calculation_combinations(self, citation_tokens, search_queries, reasoning_tokens):
+        """Test various combinations of citation tokens, search queries, and reasoning tokens."""
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150,
+            reasoning_tokens=reasoning_tokens,
+            prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=search_queries)
+        )
+        
+        usage.citation_tokens = citation_tokens
+        
+        prompt_cost, completion_cost = perplexity_cost_per_token(
+            model="sonar-deep-research", 
+            usage=usage
+        )
+        
+        # Calculate expected costs
+        expected_prompt_cost = (100 * 2e-6) + (citation_tokens * 2e-6)
+        expected_completion_cost = (50 * 8e-6) + (reasoning_tokens * 3e-6) + (search_queries / 1000 * 0.005)
+        
+        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
+        assert math.isclose(completion_cost, expected_completion_cost, rel_tol=1e-6)
+        
+        # Ensure costs are non-negative
+        assert prompt_cost >= 0
+        assert completion_cost >= 0 
\ No newline at end of file
diff --git a/tests/test_litellm/llms/perplexity/test_perplexity_integration.py b/tests/test_litellm/llms/perplexity/test_perplexity_integration.py
new file mode 100644
index 0000000000..ae72b8a962
--- /dev/null
+++ b/tests/test_litellm/llms/perplexity/test_perplexity_integration.py
@@ -0,0 +1,319 @@
+"""
+Integration tests for Perplexity cost calculation and transformation.
+
+Tests the end-to-end functionality of Perplexity cost calculation 
+including integration with the main LiteLLM cost calculator.
+"""
+
+import json
+import math
+import os
+import sys
+from unittest.mock import Mock, patch
+
+import pytest
+
+# Add the project root to Python path
+sys.path.insert(0, os.path.abspath("../../../.."))
+
+import litellm
+from litellm import ModelResponse
+from litellm.cost_calculator import completion_cost, cost_per_token
+from litellm.llms.perplexity.chat.transformation import PerplexityChatConfig
+from litellm.types.utils import Usage, PromptTokensDetailsWrapper
+from litellm.utils import get_model_info
+
+
+class TestPerplexityIntegration:
+    """Integration test suite for Perplexity functionality."""
+
+    @pytest.fixture(autouse=True)
+    def setup_model_cost_map(self):
+        """Set up the model cost map for testing."""
+        # Ensure we use local model cost map for consistent testing
+        os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
+        
+        # Load the model cost map
+        try:
+            with open("model_prices_and_context_window.json", "r") as f:
+                model_cost_map = json.load(f)
+            litellm.model_cost = model_cost_map
+        except FileNotFoundError:
+            # Fallback to ensure we have the Perplexity model configuration
+            litellm.model_cost = {
+                "perplexity/sonar-deep-research": {
+                    "max_tokens": 128000,
+                    "max_input_tokens": 128000,
+                    "input_cost_per_token": 2e-06,
+                    "output_cost_per_token": 8e-06,
+                    "output_cost_per_reasoning_token": 3e-06,
+                    "citation_cost_per_token": 2e-06,
+                    "search_queries_cost_per_query": {
+                        "search_queries_size_low": 0.005,
+                        "search_queries_size_medium": 0.005,
+                        "search_queries_size_high": 0.005
+                    },
+                    "litellm_provider": "perplexity",
+                    "mode": "chat",
+                    "supports_reasoning": True,
+                    "supports_web_search": True,
+                }
+            }
+
+    def test_end_to_end_cost_calculation_with_transformation(self):
+        """Test end-to-end cost calculation with response transformation."""
+        # Create a Perplexity API response that includes citations and search queries
+        config = PerplexityChatConfig()
+        
+        # Create a ModelResponse with basic usage (before transformation)
+        model_response = ModelResponse()
+        model_response.model = "sonar-deep-research"
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150,
+            reasoning_tokens=10
+        )
+        
+        # Simulate raw response from Perplexity API
+        raw_response_dict = {
+            "choices": [{"message": {"content": "Test response with citations"}}],
+            "usage": {
+                "prompt_tokens": 100,
+                "completion_tokens": 50,
+                "total_tokens": 150,
+                "num_search_queries": 2
+            },
+            "citations": [
+                "This is the first citation with important information about the topic",
+                "Another citation providing additional context for the response"
+            ]
+        }
+        
+        # Apply transformation to extract Perplexity-specific fields
+        config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+        
+        # Now calculate the cost with the enhanced usage
+        total_cost = completion_cost(completion_response=model_response, custom_llm_provider="perplexity")
+        
+        # Calculate expected cost
+        citation_chars = sum(len(citation) for citation in raw_response_dict["citations"])
+        citation_tokens = citation_chars // 4
+        
+        expected_prompt_cost = (100 * 2e-6) + (citation_tokens * 2e-6)  # Input + citation
+        expected_completion_cost = (50 * 8e-6) + (10 * 3e-6) + (2 / 1000 * 0.005)  # Output + reasoning + search
+        expected_total = expected_prompt_cost + expected_completion_cost
+        
+        assert math.isclose(total_cost, expected_total, rel_tol=1e-6)
+
+    def test_cost_calculation_without_custom_fields(self):
+        """Test that cost calculation works normally when custom fields are absent."""
+        # Create a standard response without Perplexity-specific fields
+        model_response = ModelResponse()
+        model_response.model = "sonar-deep-research"
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Calculate cost without custom fields
+        total_cost = completion_cost(completion_response=model_response, custom_llm_provider="perplexity")
+        
+        # Should only include basic input/output costs
+        expected_cost = (100 * 2e-6) + (50 * 8e-6)
+        
+        assert math.isclose(total_cost, expected_cost, rel_tol=1e-6)
+
+    def test_main_cost_calculator_integration(self):
+        """Test integration with the main LiteLLM cost calculator."""
+        # Create usage with all Perplexity fields
+        usage = Usage(
+            prompt_tokens=200,
+            completion_tokens=100,
+            total_tokens=300,
+            reasoning_tokens=25,
+            prompt_tokens_details=PromptTokensDetailsWrapper(web_search_requests=3)
+        )
+        usage.citation_tokens = 40
+        
+        # Test main cost calculator
+        prompt_cost, completion_cost_val = cost_per_token(
+            model="sonar-deep-research",
+            custom_llm_provider="perplexity",
+            usage_object=usage
+        )
+        
+        # Calculate expected costs
+        expected_prompt_cost = (200 * 2e-6) + (40 * 2e-6)  # Input + citation
+        expected_completion_cost = (100 * 8e-6) + (25 * 3e-6) + (3 / 1000 * 0.005)  # Output + reasoning + search
+        
+        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
+        assert math.isclose(completion_cost_val, expected_completion_cost, rel_tol=1e-6)
+
+    def test_model_info_includes_custom_fields(self):
+        """Test that get_model_info returns the custom Perplexity cost fields."""
+        model_info = get_model_info(model="sonar-deep-research", custom_llm_provider="perplexity")
+        
+        # Verify custom fields are included
+        required_fields = [
+            "citation_cost_per_token",
+            "search_context_cost_per_query",
+            "input_cost_per_token",
+            "output_cost_per_token",
+            "output_cost_per_reasoning_token"
+        ]
+        
+        for field in required_fields:
+            assert field in model_info, f"Missing field: {field}"
+            assert model_info[field] is not None, f"Null value for field: {field}"
+
+    def test_various_citation_sizes(self):
+        """Test cost calculation with various citation sizes."""
+        config = PerplexityChatConfig()
+        
+        test_cases = [
+            # (citations, expected_approximate_tokens)
+            (["Short"], 1),
+            (["This is a medium-length citation with some content"], 12),
+            (["Very short", "Another citation", "Third one with more text content"], 15),
+            ([""], 0),  # Empty citation
+        ]
+        
+        for citations, expected_approx_tokens in test_cases:
+            model_response = ModelResponse()
+            model_response.model = "sonar-deep-research"
+            model_response.usage = Usage(
+                prompt_tokens=100,
+                completion_tokens=50,
+                total_tokens=150
+            )
+            
+            raw_response_dict = {
+                "usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150},
+                "citations": citations
+            }
+            
+            config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+            
+            citation_tokens = getattr(model_response.usage, "citation_tokens", 0)
+            
+            # Allow for reasonable variance in token estimation
+            if expected_approx_tokens == 0:
+                assert citation_tokens == 0
+            else:
+                assert abs(citation_tokens - expected_approx_tokens) <= 5
+
+    def test_cost_calculation_with_zero_values(self):
+        """Test cost calculation handles zero values for custom fields correctly."""
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        
+        # Set custom fields to zero
+        usage.citation_tokens = 0
+        usage.prompt_tokens_details = PromptTokensDetailsWrapper(web_search_requests=0)
+        
+        # Should not add any extra cost
+        prompt_cost, completion_cost_val = cost_per_token(
+            model="sonar-deep-research",
+            custom_llm_provider="perplexity",
+            usage_object=usage
+        )
+        
+        expected_prompt_cost = 100 * 2e-6
+        expected_completion_cost = 50 * 8e-6
+        
+        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
+        assert math.isclose(completion_cost_val, expected_completion_cost, rel_tol=1e-6)
+
+    def test_high_volume_cost_calculation(self):
+        """Test cost calculation with high token and query counts."""
+        usage = Usage(
+            prompt_tokens=50000,
+            completion_tokens=25000,
+            total_tokens=75000,
+            reasoning_tokens=10000
+        )
+        
+        usage.citation_tokens = 5000
+        usage.prompt_tokens_details = PromptTokensDetailsWrapper(web_search_requests=100)
+        
+        total_cost = completion_cost(
+            completion_response=ModelResponse(usage=usage, model="sonar-deep-research"),
+            custom_llm_provider="perplexity"
+        )
+        
+        # Calculate expected cost
+        expected_prompt_cost = (50000 * 2e-6) + (5000 * 2e-6)  # $0.11
+        expected_completion_cost = (25000 * 8e-6) + (10000 * 3e-6) + (100 / 1000 * 0.005)  # $0.23
+        expected_total = expected_prompt_cost + expected_completion_cost  # $0.34
+        
+        assert math.isclose(total_cost, expected_total, rel_tol=1e-6)
+        assert total_cost > 0.3  # Sanity check for high-volume scenario
+
+    def test_transformation_preserves_existing_usage_fields(self):
+        """Test that transformation doesn't overwrite existing standard usage fields."""
+        config = PerplexityChatConfig()
+        
+        model_response = ModelResponse()
+        model_response.usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150,
+            reasoning_tokens=20
+        )
+        
+        # Store original values
+        original_prompt_tokens = model_response.usage.prompt_tokens
+        original_completion_tokens = model_response.usage.completion_tokens
+        original_total_tokens = model_response.usage.total_tokens
+        
+        raw_response_dict = {
+            "usage": {
+                "prompt_tokens": 999,  # Different from original
+                "completion_tokens": 999,  # Different from original
+                "total_tokens": 999,  # Different from original
+                "num_search_queries": 3
+            },
+            "citations": ["Some citation"]
+        }
+        
+        config._enhance_usage_with_perplexity_fields(model_response, raw_response_dict)
+        
+        # Original usage fields should be preserved
+        assert model_response.usage.prompt_tokens == original_prompt_tokens
+        assert model_response.usage.completion_tokens == original_completion_tokens
+        assert model_response.usage.total_tokens == original_total_tokens
+        
+        # But custom fields should be added
+        assert hasattr(model_response.usage, "prompt_tokens_details")
+        assert hasattr(model_response.usage, "citation_tokens")
+        assert model_response.usage.prompt_tokens_details.web_search_requests == 3
+
+    @pytest.mark.parametrize("provider_name", ["perplexity", "PERPLEXITY", "Perplexity"])
+    def test_case_insensitive_provider_matching(self, provider_name):
+        """Test that cost calculation works with different case variations of provider name."""
+        usage = Usage(
+            prompt_tokens=100,
+            completion_tokens=50,
+            total_tokens=150
+        )
+        usage.citation_tokens = 10
+        usage.prompt_tokens_details = PromptTokensDetailsWrapper(web_search_requests=1)
+        
+        # Should work regardless of case
+        prompt_cost, completion_cost_val = cost_per_token(
+            model="sonar-deep-research",
+            custom_llm_provider=provider_name.lower(),  # Normalize to lowercase
+            usage_object=usage
+        )
+        
+        # Should calculate costs correctly
+        expected_prompt_cost = (100 * 2e-6) + (10 * 2e-6)
+        expected_completion_cost = (50 * 8e-6) + (1 / 1000 * 0.005)
+        
+        assert math.isclose(prompt_cost, expected_prompt_cost, rel_tol=1e-6)
+        assert math.isclose(completion_cost_val, expected_completion_cost, rel_tol=1e-6) 
\ No newline at end of file
diff --git a/tests/test_litellm/test_utils.py b/tests/test_litellm/test_utils.py
index 6dfbb8b7e5..8f043584c0 100644
--- a/tests/test_litellm/test_utils.py
+++ b/tests/test_litellm/test_utils.py
@@ -466,6 +466,7 @@ def test_aaamodel_prices_and_context_window_json_is_valid():
                     },
                     "additionalProperties": False,
                 },
+                "citation_cost_per_token": {"type": "number"},
                 "supported_modalities": {
                     "type": "array",
                     "items": {