[Feat] Allow using Veo Video Generation through LiteLLM Pass through routes (#14228)

* fix: add follow_redirects=True, * test_pass_through_with_httpbin_redirect * cook book veo video * docs Veo Video Generation with Google AI Studio * add veo-3.0-generate-preview cost tracking details * track vertex_video_models
2025-09-03 18:25:43 -07:00 · 2025-09-03 18:25:43 -07:00 · 23ae7170d1
commit 23ae7170d1
parent be7c762882
9 changed files with 711 additions and 0 deletions
--- a/cookbook/veo_video_generation.py
+++ b/cookbook/veo_video_generation.py
@ -0,0 +1,311 @@
+#!/usr/bin/env python3
+"""
+Complete example for Veo video generation through LiteLLM proxy.
+
+This script demonstrates how to:
+1. Generate videos using Google's Veo model
+2. Poll for completion status
+3. Download the generated video file
+
+Requirements:
+- LiteLLM proxy running with Google AI Studio pass-through configured
+- Google AI Studio API key with Veo access
+"""
+
+import json
+import os
+import time
+import requests
+from typing import Optional
+
+
+class VeoVideoGenerator:
+    """Complete Veo video generation client using LiteLLM proxy."""
+    
+    def __init__(self, base_url: str = "http://localhost:4000/gemini/v1beta", 
+                 api_key: str = "sk-1234"):
+        """
+        Initialize the Veo video generator.
+        
+        Args:
+            base_url: Base URL for the LiteLLM proxy with Gemini pass-through
+            api_key: API key for LiteLLM proxy authentication
+        """
+        self.base_url = base_url
+        self.api_key = api_key
+        self.headers = {
+            "x-goog-api-key": api_key,
+            "Content-Type": "application/json"
+        }
+    
+    def generate_video(self, prompt: str) -> Optional[str]:
+        """
+        Initiate video generation with Veo.
+        
+        Args:
+            prompt: Text description of the video to generate
+            
+        Returns:
+            Operation name if successful, None otherwise
+        """
+        print(f"🎬 Generating video with prompt: '{prompt}'")
+        
+        url = f"{self.base_url}/models/veo-3.0-generate-preview:predictLongRunning"
+        payload = {
+            "instances": [{
+                "prompt": prompt
+            }]
+        }
+        
+        try:
+            response = requests.post(url, headers=self.headers, json=payload)
+            response.raise_for_status()
+            
+            data = response.json()
+            operation_name = data.get("name")
+            
+            if operation_name:
+                print(f"✅ Video generation started: {operation_name}")
+                return operation_name
+            else:
+                print("❌ No operation name returned")
+                print(f"Response: {json.dumps(data, indent=2)}")
+                return None
+                
+        except requests.RequestException as e:
+            print(f"❌ Failed to start video generation: {e}")
+            if hasattr(e, 'response') and e.response is not None:
+                try:
+                    error_data = e.response.json()
+                    print(f"Error details: {json.dumps(error_data, indent=2)}")
+                except:
+                    print(f"Error response: {e.response.text}")
+            return None
+    
+    def wait_for_completion(self, operation_name: str, max_wait_time: int = 600) -> Optional[str]:
+        """
+        Poll operation status until video generation is complete.
+        
+        Args:
+            operation_name: Name of the operation to monitor
+            max_wait_time: Maximum time to wait in seconds (default: 10 minutes)
+            
+        Returns:
+            Video URI if successful, None otherwise
+        """
+        print("⏳ Waiting for video generation to complete...")
+        
+        operation_url = f"{self.base_url}/{operation_name}"
+        start_time = time.time()
+        poll_interval = 10  # Start with 10 seconds
+        
+        while time.time() - start_time < max_wait_time:
+            try:
+                print(f"🔍 Polling status... ({int(time.time() - start_time)}s elapsed)")
+                
+                response = requests.get(operation_url, headers=self.headers)
+                response.raise_for_status()
+                
+                data = response.json()
+                
+                # Check for errors
+                if "error" in data:
+                    print("❌ Error in video generation:")
+                    print(json.dumps(data["error"], indent=2))
+                    return None
+                
+                # Check if operation is complete
+                is_done = data.get("done", False)
+                
+                if is_done:
+                    print("🎉 Video generation complete!")
+                    
+                    try:
+                        # Extract video URI from nested response
+                        video_uri = data["response"]["generateVideoResponse"]["generatedSamples"][0]["video"]["uri"]
+                        print(f"📹 Video URI: {video_uri}")
+                        return video_uri
+                    except KeyError as e:
+                        print(f"❌ Could not extract video URI: {e}")
+                        print("Full response:")
+                        print(json.dumps(data, indent=2))
+                        return None
+                
+                # Wait before next poll, with exponential backoff
+                time.sleep(poll_interval)
+                poll_interval = min(poll_interval * 1.2, 30)  # Cap at 30 seconds
+                
+            except requests.RequestException as e:
+                print(f"❌ Error polling operation status: {e}")
+                time.sleep(poll_interval)
+        
+        print(f"⏰ Timeout after {max_wait_time} seconds")
+        return None
+    
+    def download_video(self, video_uri: str, output_filename: str = "generated_video.mp4") -> bool:
+        """
+        Download the generated video file.
+        
+        Args:
+            video_uri: URI of the video to download (from Google's response)
+            output_filename: Local filename to save the video
+            
+        Returns:
+            True if download successful, False otherwise
+        """
+        print(f"⬇️  Downloading video...")
+        print(f"Original URI: {video_uri}")
+        
+        # Convert Google URI to LiteLLM proxy URI
+        # Example: files/abc123 -> /gemini/v1beta/files/abc123:download?alt=media
+        if video_uri.startswith("files/"):
+            download_path = f"{video_uri}:download?alt=media"
+        else:
+            download_path = video_uri
+            
+        litellm_download_url = f"{self.base_url}/{download_path}"
+        print(f"Download URL: {litellm_download_url}")
+        
+        try:
+            # Download with streaming and redirect handling
+            response = requests.get(
+                litellm_download_url, 
+                headers=self.headers, 
+                stream=True,
+                allow_redirects=True  # Handle redirects automatically
+            )
+            response.raise_for_status()
+            
+            # Save video file
+            with open(output_filename, 'wb') as f:
+                downloaded_size = 0
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+                        downloaded_size += len(chunk)
+                        
+                        # Progress indicator for large files
+                        if downloaded_size % (1024 * 1024) == 0:  # Every MB
+                            print(f"📦 Downloaded {downloaded_size / (1024*1024):.1f} MB...")
+            
+            # Verify file was created and has content
+            if os.path.exists(output_filename):
+                file_size = os.path.getsize(output_filename)
+                if file_size > 0:
+                    print(f"✅ Video downloaded successfully!")
+                    print(f"📁 Saved as: {output_filename}")
+                    print(f"📏 File size: {file_size / (1024*1024):.2f} MB")
+                    return True
+                else:
+                    print("❌ Downloaded file is empty")
+                    os.remove(output_filename)
+                    return False
+            else:
+                print("❌ File was not created")
+                return False
+                
+        except requests.RequestException as e:
+            print(f"❌ Download failed: {e}")
+            if hasattr(e, 'response') and e.response is not None:
+                print(f"Status code: {e.response.status_code}")
+                print(f"Response headers: {dict(e.response.headers)}")
+            return False
+    
+    def generate_and_download(self, prompt: str, output_filename: str = None) -> bool:
+        """
+        Complete workflow: generate video and download it.
+        
+        Args:
+            prompt: Text description for video generation
+            output_filename: Output filename (auto-generated if None)
+            
+        Returns:
+            True if successful, False otherwise
+        """
+        # Auto-generate filename if not provided
+        if output_filename is None:
+            timestamp = int(time.time())
+            safe_prompt = "".join(c for c in prompt[:30] if c.isalnum() or c in (' ', '-', '_')).rstrip()
+            output_filename = f"veo_video_{safe_prompt.replace(' ', '_')}_{timestamp}.mp4"
+        
+        print("=" * 60)
+        print("🎬 VEO VIDEO GENERATION WORKFLOW")
+        print("=" * 60)
+        
+        # Step 1: Generate video
+        operation_name = self.generate_video(prompt)
+        if not operation_name:
+            return False
+        
+        # Step 2: Wait for completion
+        video_uri = self.wait_for_completion(operation_name)
+        if not video_uri:
+            return False
+        
+        # Step 3: Download video
+        success = self.download_video(video_uri, output_filename)
+        
+        if success:
+            print("=" * 60)
+            print("🎉 SUCCESS! Video generation complete!")
+            print(f"📁 Video saved as: {output_filename}")
+            print("=" * 60)
+        else:
+            print("=" * 60)
+            print("❌ FAILED! Video generation or download failed")
+            print("=" * 60)
+        
+        return success
+
+
+def main():
+    """
+    Example usage of the VeoVideoGenerator.
+    
+    Configure these environment variables:
+    - LITELLM_BASE_URL: Your LiteLLM proxy URL (default: http://localhost:4000/gemini/v1beta)
+    - LITELLM_API_KEY: Your LiteLLM API key (default: sk-1234)
+    """
+    
+    # Configuration from environment or defaults
+    base_url = os.getenv("LITELLM_BASE_URL", "http://localhost:4000/gemini/v1beta")
+    api_key = os.getenv("LITELLM_API_KEY", "sk-1234")
+    
+    print("🚀 Starting Veo Video Generation Example")
+    print(f"📡 Using LiteLLM proxy at: {base_url}")
+    
+    # Initialize generator
+    generator = VeoVideoGenerator(base_url=base_url, api_key=api_key)
+    
+    # Example prompts - try different ones!
+    example_prompts = [
+        "A cat playing with a ball of yarn in a sunny garden",
+        "Ocean waves crashing against rocky cliffs at sunset",
+        "A bustling city street with people walking and cars passing by",
+        "A peaceful forest with sunlight filtering through the trees"
+    ]
+    
+    # Use first example or get from user
+    prompt = example_prompts[0]
+    print(f"🎬 Using prompt: '{prompt}'")
+    
+    # Generate and download video
+    success = generator.generate_and_download(prompt)
+    
+    if success:
+        print("\n✅ Example completed successfully!")
+        print("💡 Try modifying the prompt in the script for different videos!")
+    else:
+        print("\n❌ Example failed!")
+        print("🔧 Check your LiteLLM proxy configuration and Google AI Studio API key")
+        
+        # Troubleshooting tips
+        print("\n🔍 Troubleshooting:")
+        print("1. Ensure LiteLLM proxy is running with Google AI Studio pass-through")
+        print("2. Verify your Google AI Studio API key has Veo access")
+        print("3. Check that your prompt meets Veo's content guidelines")
+        print("4. Review the LiteLLM proxy logs for detailed error information")
+
+
+if __name__ == "__main__":
+    main()
--- a/docs/my-website/docs/pass_through/google_ai_studio.md
+++ b/docs/my-website/docs/pass_through/google_ai_studio.md
@ -230,6 +230,13 @@ curl -X POST "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5
 ```


+## **Example 4: Video Generation with Veo**
+
+Generate videos using Google's Veo model through LiteLLM pass-through routes.
+
+[**→ Complete Veo Video Generation Guide**](../proxy/veo_video_generation.md)
+
+
 ## Advanced 

 Pre-requisites
--- a/docs/my-website/docs/proxy/veo_video_generation.md
+++ b/docs/my-website/docs/proxy/veo_video_generation.md
@ -0,0 +1,163 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Veo Video Generation with Google AI Studio
+
+Generate videos using Google's Veo model through LiteLLM's pass-through endpoints.
+
+## Quick Start
+
+LiteLLM allows you to use Google AI Studio's Veo video generation API through pass-through routes with zero configuration.
+
+### 1. Add Google AI Studio API Key to your environment 
+
+```bash
+export GEMINI_API_KEY="your_google_ai_studio_api_key"
+```
+
+### 2. Start LiteLLM Proxy 
+
+```bash
+litellm
+
+# RUNNING on http://0.0.0.0:4000
+```
+
+### 3. Generate Video
+
+<Tabs>
+<TabItem value="python" label="Python">
+
+```python
+import requests
+import time
+import json
+
+# Configuration
+BASE_URL = "http://localhost:4000/gemini/v1beta"
+API_KEY = "anything"  # Use "anything" as the key
+
+headers = {
+    "x-goog-api-key": API_KEY,
+    "Content-Type": "application/json"
+}
+
+# Step 1: Initiate video generation
+def generate_video(prompt):
+    url = f"{BASE_URL}/models/veo-3.0-generate-preview:predictLongRunning"
+    payload = {
+        "instances": [{
+            "prompt": prompt
+        }]
+    }
+    
+    response = requests.post(url, headers=headers, json=payload)
+    response.raise_for_status()
+    
+    data = response.json()
+    return data.get("name")  # Operation name
+
+# Step 2: Poll for completion
+def wait_for_completion(operation_name):
+    operation_url = f"{BASE_URL}/{operation_name}"
+    
+    while True:
+        response = requests.get(operation_url, headers=headers)
+        response.raise_for_status()
+        
+        data = response.json()
+        
+        if data.get("done", False):
+            # Extract video URI
+            video_uri = data["response"]["generateVideoResponse"]["generatedSamples"][0]["video"]["uri"]
+            return video_uri
+        
+        time.sleep(10)  # Wait 10 seconds before next poll
+
+# Step 3: Download video
+def download_video(video_uri, filename="generated_video.mp4"):
+    # Replace Google URL with LiteLLM proxy URL
+    litellm_url = video_uri.replace(
+        "https://generativelanguage.googleapis.com/v1beta", 
+        BASE_URL
+    )
+    
+    response = requests.get(litellm_url, headers=headers, stream=True)
+    response.raise_for_status()
+    
+    with open(filename, 'wb') as f:
+        for chunk in response.iter_content(chunk_size=8192):
+            if chunk:
+                f.write(chunk)
+    
+    return filename
+
+# Complete workflow
+prompt = "A cat playing with a ball of yarn in a sunny garden"
+
+print("Generating video...")
+operation_name = generate_video(prompt)
+
+print("Waiting for completion...")
+video_uri = wait_for_completion(operation_name)
+
+print("Downloading video...")
+filename = download_video(video_uri)
+
+print(f"Video saved as: {filename}")
+```
+
+</TabItem>
+
+<TabItem value="curl" label="Curl">
+
+```bash
+# Step 1: Initiate video generation
+curl -X POST "http://localhost:4000/gemini/v1beta/models/veo-3.0-generate-preview:predictLongRunning" \
+  -H "x-goog-api-key: anything" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "instances": [{
+      "prompt": "A cat playing with a ball of yarn in a sunny garden"
+    }]
+  }'
+
+# Response will include operation name:
+# {"name": "operations/generate_12345"}
+
+# Step 2: Poll for completion
+curl -X GET "http://localhost:4000/gemini/v1beta/operations/generate_12345" \
+  -H "x-goog-api-key: anything"
+
+# Step 3: Download video (when done=true)
+curl -X GET "http://localhost:4000/gemini/v1beta/files/VIDEO_ID:download?alt=media" \
+  -H "x-goog-api-key: anything" \
+  --output generated_video.mp4
+```
+
+</TabItem>
+</Tabs>
+
+## Complete Example
+
+For a full working example with error handling and logging, see our [Veo Video Generation Cookbook](https://github.com/BerriAI/litellm/blob/main/cookbook/veo_video_generation.py).
+
+## How It Works
+
+1. **Video Generation Request**: Send a prompt to Veo's `predictLongRunning` endpoint
+2. **Operation Polling**: Monitor the long-running operation until completion
+3. **File Download**: Download the generated video through LiteLLM's pass-through with automatic redirect handling
+
+LiteLLM handles:
+- ✅ Authentication with Google AI Studio
+- ✅ Request routing and proxying
+- ✅ Automatic redirect handling for file downloads
+
+## Configuration Options
+
+### Environment Variables
+
+```bash
+export GEMINI_API_KEY="your_google_ai_studio_api_key"
+```
+
--- a/litellm/init.py
+++ b/litellm/init.py
@ -450,6 +450,7 @@ vertex_vision_models: Set = set()
 vertex_chat_models: Set = set()
 vertex_code_chat_models: Set = set()
 vertex_ai_image_models: Set = set()
+vertex_ai_video_models: Set = set()
 vertex_text_models: Set = set()
 vertex_code_text_models: Set = set()
 vertex_embedding_models: Set = set()
@ -605,6 +606,9 @@ def add_known_models():
        elif value.get("litellm_provider") == "vertex_ai-image-models":
            key = key.replace("vertex_ai/", "")
            vertex_ai_image_models.add(key)
+        elif value.get("litellm_provider") == "vertex_ai-video-models":
+            key = key.replace("vertex_ai/", "")
+            vertex_ai_video_models.add(key)
        elif value.get("litellm_provider") == "vertex_ai-openai_models":
            key = key.replace("vertex_ai/", "")
            vertex_openai_models.add(key)
--- a/litellm/litellm_core_utils/get_llm_provider_logic.py
+++ b/litellm/litellm_core_utils/get_llm_provider_logic.py
@ -320,6 +320,7 @@ def get_llm_provider(  # noqa: PLR0915
            or model in litellm.vertex_embedding_models
            or model in litellm.vertex_vision_models
            or model in litellm.vertex_ai_image_models
+            or model in litellm.vertex_ai_video_models
        ):
            custom_llm_provider = "vertex_ai"
        ## ai21
--- a/litellm/llms/custom_httpx/http_handler.py
+++ b/litellm/llms/custom_httpx/http_handler.py
@ -212,6 +212,7 @@ class AsyncHTTPHandler:
            verify=ssl_config,
            cert=cert,
            headers=headers,
+            follow_redirects=True,
        )

    async def close(self):
@ -687,6 +688,7 @@ class HTTPHandler:
                verify=ssl_config,
                cert=cert,
                headers=headers,
+                follow_redirects=True,
            )
        else:
            self.client = client
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -9484,6 +9484,48 @@
        "source": "https://aistudio.google.com",
        "supports_tool_choice": true
    },
+    "gemini/veo-3.0-generate-preview": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "output_cost_per_second": 0.75,
+        "litellm_provider": "gemini",
+        "mode": "video_generation",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ],
+        "source": "https://ai.google.dev/gemini-api/docs/video"
+    },
+    "gemini/veo-3.0-fast-generate-preview": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "output_cost_per_second": 0.40,
+        "litellm_provider": "gemini",
+        "mode": "video_generation",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ],
+        "source": "https://ai.google.dev/gemini-api/docs/video"
+    },
+    "gemini/veo-2.0-generate-001": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "output_cost_per_second": 0.35,
+        "litellm_provider": "gemini",
+        "mode": "video_generation",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ],
+        "source": "https://ai.google.dev/gemini-api/docs/video"
+    },
    "vertex_ai/claude-opus-4-1": {
        "max_tokens": 4096,
        "max_input_tokens": 200000,
@ -10301,6 +10343,48 @@
        "mode": "image_generation",
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
    },
+    "vertex_ai/veo-3.0-generate-preview": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "output_cost_per_second": 0.75,
+        "litellm_provider": "vertex_ai-video-models",
+        "mode": "video_generation",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ],
+        "source": "https://ai.google.dev/gemini-api/docs/video"
+    },
+    "vertex_ai/veo-3.0-fast-generate-preview": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "output_cost_per_second": 0.40,
+        "litellm_provider": "vertex_ai-video-models",
+        "mode": "video_generation",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ],
+        "source": "https://ai.google.dev/gemini-api/docs/video"
+    },
+    "vertex_ai/veo-2.0-generate-001": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "output_cost_per_second": 0.35,
+        "litellm_provider": "vertex_ai-video-models",
+        "mode": "video_generation",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ],
+        "source": "https://ai.google.dev/gemini-api/docs/video"
+    },
    "text-embedding-004": {
        "max_tokens": 2048,
        "max_input_tokens": 2048,
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -9484,6 +9484,48 @@
        "source": "https://aistudio.google.com",
        "supports_tool_choice": true
    },
+    "gemini/veo-3.0-generate-preview": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "output_cost_per_second": 0.75,
+        "litellm_provider": "gemini",
+        "mode": "video_generation",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ],
+        "source": "https://ai.google.dev/gemini-api/docs/video"
+    },
+    "gemini/veo-3.0-fast-generate-preview": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "output_cost_per_second": 0.40,
+        "litellm_provider": "gemini",
+        "mode": "video_generation",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ],
+        "source": "https://ai.google.dev/gemini-api/docs/video"
+    },
+    "gemini/veo-2.0-generate-001": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "output_cost_per_second": 0.35,
+        "litellm_provider": "gemini",
+        "mode": "video_generation",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ],
+        "source": "https://ai.google.dev/gemini-api/docs/video"
+    },
    "vertex_ai/claude-opus-4-1": {
        "max_tokens": 4096,
        "max_input_tokens": 200000,
@ -10301,6 +10343,48 @@
        "mode": "image_generation",
        "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
    },
+    "vertex_ai/veo-3.0-generate-preview": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "output_cost_per_second": 0.75,
+        "litellm_provider": "vertex_ai-video-models",
+        "mode": "video_generation",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ],
+        "source": "https://ai.google.dev/gemini-api/docs/video"
+    },
+    "vertex_ai/veo-3.0-fast-generate-preview": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "output_cost_per_second": 0.40,
+        "litellm_provider": "vertex_ai-video-models",
+        "mode": "video_generation",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ],
+        "source": "https://ai.google.dev/gemini-api/docs/video"
+    },
+    "vertex_ai/veo-2.0-generate-001": {
+        "max_tokens": 1024,
+        "max_input_tokens": 1024,
+        "output_cost_per_second": 0.35,
+        "litellm_provider": "vertex_ai-video-models",
+        "mode": "video_generation",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ],
+        "source": "https://ai.google.dev/gemini-api/docs/video"
+    },
    "text-embedding-004": {
        "max_tokens": 2048,
        "max_input_tokens": 2048,
--- a/tests/test_litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py
+++ b/tests/test_litellm/proxy/pass_through_endpoints/test_pass_through_endpoints.py
@ -1245,3 +1245,58 @@ async def test_delete_pass_through_endpoint_empty_list():
        # Verify the exception
        assert exc_info.value.status_code == 400
        assert "no pass-through endpoints setup" in str(exc_info.value.detail).lower()
+
+
+
+@pytest.mark.asyncio
+async def test_pass_through_with_httpbin_redirect():
+    """
+    Integration test using httpbin.org redirect endpoint to test real redirect handling.
+    This tests the actual redirect handling capability end-to-end using the full pass_through_request function.
+    """
+    from unittest.mock import MagicMock
+
+    from fastapi import Request
+    from starlette.datastructures import Headers, QueryParams
+
+    from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
+        pass_through_request,
+    )
+
+    # Create mock request
+    mock_request = MagicMock(spec=Request)
+    mock_request.method = "GET"
+    mock_request.headers = Headers({})
+    mock_request.query_params = QueryParams("")
+    
+    # Mock the body method to return empty bytes for GET request
+    async def mock_body():
+        return b""
+    mock_request.body = mock_body
+    
+    # Mock user API key dict
+    mock_user_api_key_dict = MagicMock()
+    
+    try:
+        # Test with httpbin.org redirect endpoint
+        # This will redirect to httpbin.org/get
+        response = await pass_through_request(
+            request=mock_request,
+            target="https://httpbin.org/redirect/1",
+            custom_headers={},
+            user_api_key_dict=mock_user_api_key_dict
+        )
+        
+        # Should get the final response (200) from /get endpoint, not the redirect (302)
+        assert response.status_code == 200
+        
+        # The response should be from the /get endpoint
+        response_content = response.body.decode('utf-8')
+        
+        # httpbin.org/get returns JSON with info about the request
+        assert '"url": "https://httpbin.org/get"' in response_content
+        print("GOT A Response from HTTPBIN=", response_content)
+    except Exception as e:
+        # If httpbin.org is not accessible, skip the test
+        import pytest
+        pytest.skip(f"Could not reach httpbin.org for integration test: {e}")