[Feat] Allow using Veo Video Generation through LiteLLM Pass through routes (#14228)

* fix: add follow_redirects=True,

* test_pass_through_with_httpbin_redirect

* cook book veo video

* docs Veo Video Generation with Google AI Studio

* add veo-3.0-generate-preview cost tracking details

* track vertex_video_models
This commit is contained in:
Ishaan Jaff 2025-09-03 18:25:43 -07:00 committed by GitHub
parent be7c762882
commit 23ae7170d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 711 additions and 0 deletions

View File

@ -0,0 +1,311 @@
#!/usr/bin/env python3
"""
Complete example for Veo video generation through LiteLLM proxy.
This script demonstrates how to:
1. Generate videos using Google's Veo model
2. Poll for completion status
3. Download the generated video file
Requirements:
- LiteLLM proxy running with Google AI Studio pass-through configured
- Google AI Studio API key with Veo access
"""
import json
import os
import time
import requests
from typing import Optional
class VeoVideoGenerator:
"""Complete Veo video generation client using LiteLLM proxy."""
def __init__(self, base_url: str = "http://localhost:4000/gemini/v1beta",
api_key: str = "sk-1234"):
"""
Initialize the Veo video generator.
Args:
base_url: Base URL for the LiteLLM proxy with Gemini pass-through
api_key: API key for LiteLLM proxy authentication
"""
self.base_url = base_url
self.api_key = api_key
self.headers = {
"x-goog-api-key": api_key,
"Content-Type": "application/json"
}
def generate_video(self, prompt: str) -> Optional[str]:
"""
Initiate video generation with Veo.
Args:
prompt: Text description of the video to generate
Returns:
Operation name if successful, None otherwise
"""
print(f"🎬 Generating video with prompt: '{prompt}'")
url = f"{self.base_url}/models/veo-3.0-generate-preview:predictLongRunning"
payload = {
"instances": [{
"prompt": prompt
}]
}
try:
response = requests.post(url, headers=self.headers, json=payload)
response.raise_for_status()
data = response.json()
operation_name = data.get("name")
if operation_name:
print(f"✅ Video generation started: {operation_name}")
return operation_name
else:
print("❌ No operation name returned")
print(f"Response: {json.dumps(data, indent=2)}")
return None
except requests.RequestException as e:
print(f"❌ Failed to start video generation: {e}")
if hasattr(e, 'response') and e.response is not None:
try:
error_data = e.response.json()
print(f"Error details: {json.dumps(error_data, indent=2)}")
except:
print(f"Error response: {e.response.text}")
return None
def wait_for_completion(self, operation_name: str, max_wait_time: int = 600) -> Optional[str]:
"""
Poll operation status until video generation is complete.
Args:
operation_name: Name of the operation to monitor
max_wait_time: Maximum time to wait in seconds (default: 10 minutes)
Returns:
Video URI if successful, None otherwise
"""
print("⏳ Waiting for video generation to complete...")
operation_url = f"{self.base_url}/{operation_name}"
start_time = time.time()
poll_interval = 10 # Start with 10 seconds
while time.time() - start_time < max_wait_time:
try:
print(f"🔍 Polling status... ({int(time.time() - start_time)}s elapsed)")
response = requests.get(operation_url, headers=self.headers)
response.raise_for_status()
data = response.json()
# Check for errors
if "error" in data:
print("❌ Error in video generation:")
print(json.dumps(data["error"], indent=2))
return None
# Check if operation is complete
is_done = data.get("done", False)
if is_done:
print("🎉 Video generation complete!")
try:
# Extract video URI from nested response
video_uri = data["response"]["generateVideoResponse"]["generatedSamples"][0]["video"]["uri"]
print(f"📹 Video URI: {video_uri}")
return video_uri
except KeyError as e:
print(f"❌ Could not extract video URI: {e}")
print("Full response:")
print(json.dumps(data, indent=2))
return None
# Wait before next poll, with exponential backoff
time.sleep(poll_interval)
poll_interval = min(poll_interval * 1.2, 30) # Cap at 30 seconds
except requests.RequestException as e:
print(f"❌ Error polling operation status: {e}")
time.sleep(poll_interval)
print(f"⏰ Timeout after {max_wait_time} seconds")
return None
def download_video(self, video_uri: str, output_filename: str = "generated_video.mp4") -> bool:
"""
Download the generated video file.
Args:
video_uri: URI of the video to download (from Google's response)
output_filename: Local filename to save the video
Returns:
True if download successful, False otherwise
"""
print(f"⬇️ Downloading video...")
print(f"Original URI: {video_uri}")
# Convert Google URI to LiteLLM proxy URI
# Example: files/abc123 -> /gemini/v1beta/files/abc123:download?alt=media
if video_uri.startswith("files/"):
download_path = f"{video_uri}:download?alt=media"
else:
download_path = video_uri
litellm_download_url = f"{self.base_url}/{download_path}"
print(f"Download URL: {litellm_download_url}")
try:
# Download with streaming and redirect handling
response = requests.get(
litellm_download_url,
headers=self.headers,
stream=True,
allow_redirects=True # Handle redirects automatically
)
response.raise_for_status()
# Save video file
with open(output_filename, 'wb') as f:
downloaded_size = 0
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded_size += len(chunk)
# Progress indicator for large files
if downloaded_size % (1024 * 1024) == 0: # Every MB
print(f"📦 Downloaded {downloaded_size / (1024*1024):.1f} MB...")
# Verify file was created and has content
if os.path.exists(output_filename):
file_size = os.path.getsize(output_filename)
if file_size > 0:
print(f"✅ Video downloaded successfully!")
print(f"📁 Saved as: {output_filename}")
print(f"📏 File size: {file_size / (1024*1024):.2f} MB")
return True
else:
print("❌ Downloaded file is empty")
os.remove(output_filename)
return False
else:
print("❌ File was not created")
return False
except requests.RequestException as e:
print(f"❌ Download failed: {e}")
if hasattr(e, 'response') and e.response is not None:
print(f"Status code: {e.response.status_code}")
print(f"Response headers: {dict(e.response.headers)}")
return False
def generate_and_download(self, prompt: str, output_filename: str = None) -> bool:
"""
Complete workflow: generate video and download it.
Args:
prompt: Text description for video generation
output_filename: Output filename (auto-generated if None)
Returns:
True if successful, False otherwise
"""
# Auto-generate filename if not provided
if output_filename is None:
timestamp = int(time.time())
safe_prompt = "".join(c for c in prompt[:30] if c.isalnum() or c in (' ', '-', '_')).rstrip()
output_filename = f"veo_video_{safe_prompt.replace(' ', '_')}_{timestamp}.mp4"
print("=" * 60)
print("🎬 VEO VIDEO GENERATION WORKFLOW")
print("=" * 60)
# Step 1: Generate video
operation_name = self.generate_video(prompt)
if not operation_name:
return False
# Step 2: Wait for completion
video_uri = self.wait_for_completion(operation_name)
if not video_uri:
return False
# Step 3: Download video
success = self.download_video(video_uri, output_filename)
if success:
print("=" * 60)
print("🎉 SUCCESS! Video generation complete!")
print(f"📁 Video saved as: {output_filename}")
print("=" * 60)
else:
print("=" * 60)
print("❌ FAILED! Video generation or download failed")
print("=" * 60)
return success
def main():
"""
Example usage of the VeoVideoGenerator.
Configure these environment variables:
- LITELLM_BASE_URL: Your LiteLLM proxy URL (default: http://localhost:4000/gemini/v1beta)
- LITELLM_API_KEY: Your LiteLLM API key (default: sk-1234)
"""
# Configuration from environment or defaults
base_url = os.getenv("LITELLM_BASE_URL", "http://localhost:4000/gemini/v1beta")
api_key = os.getenv("LITELLM_API_KEY", "sk-1234")
print("🚀 Starting Veo Video Generation Example")
print(f"📡 Using LiteLLM proxy at: {base_url}")
# Initialize generator
generator = VeoVideoGenerator(base_url=base_url, api_key=api_key)
# Example prompts - try different ones!
example_prompts = [
"A cat playing with a ball of yarn in a sunny garden",
"Ocean waves crashing against rocky cliffs at sunset",
"A bustling city street with people walking and cars passing by",
"A peaceful forest with sunlight filtering through the trees"
]
# Use first example or get from user
prompt = example_prompts[0]
print(f"🎬 Using prompt: '{prompt}'")
# Generate and download video
success = generator.generate_and_download(prompt)
if success:
print("\n✅ Example completed successfully!")
print("💡 Try modifying the prompt in the script for different videos!")
else:
print("\n❌ Example failed!")
print("🔧 Check your LiteLLM proxy configuration and Google AI Studio API key")
# Troubleshooting tips
print("\n🔍 Troubleshooting:")
print("1. Ensure LiteLLM proxy is running with Google AI Studio pass-through")
print("2. Verify your Google AI Studio API key has Veo access")
print("3. Check that your prompt meets Veo's content guidelines")
print("4. Review the LiteLLM proxy logs for detailed error information")
if __name__ == "__main__":
main()

View File

@ -230,6 +230,13 @@ curl -X POST "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5
```
## **Example 4: Video Generation with Veo**
Generate videos using Google's Veo model through LiteLLM pass-through routes.
[**→ Complete Veo Video Generation Guide**](../proxy/veo_video_generation.md)
## Advanced
Pre-requisites

View File

@ -0,0 +1,163 @@
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
# Veo Video Generation with Google AI Studio
Generate videos using Google's Veo model through LiteLLM's pass-through endpoints.
## Quick Start
LiteLLM allows you to use Google AI Studio's Veo video generation API through pass-through routes with zero configuration.
### 1. Add Google AI Studio API Key to your environment
```bash
export GEMINI_API_KEY="your_google_ai_studio_api_key"
```
### 2. Start LiteLLM Proxy
```bash
litellm
# RUNNING on http://0.0.0.0:4000
```
### 3. Generate Video
<Tabs>
<TabItem value="python" label="Python">
```python
import requests
import time
import json
# Configuration
BASE_URL = "http://localhost:4000/gemini/v1beta"
API_KEY = "anything" # Use "anything" as the key
headers = {
"x-goog-api-key": API_KEY,
"Content-Type": "application/json"
}
# Step 1: Initiate video generation
def generate_video(prompt):
url = f"{BASE_URL}/models/veo-3.0-generate-preview:predictLongRunning"
payload = {
"instances": [{
"prompt": prompt
}]
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
return data.get("name") # Operation name
# Step 2: Poll for completion
def wait_for_completion(operation_name):
operation_url = f"{BASE_URL}/{operation_name}"
while True:
response = requests.get(operation_url, headers=headers)
response.raise_for_status()
data = response.json()
if data.get("done", False):
# Extract video URI
video_uri = data["response"]["generateVideoResponse"]["generatedSamples"][0]["video"]["uri"]
return video_uri
time.sleep(10) # Wait 10 seconds before next poll
# Step 3: Download video
def download_video(video_uri, filename="generated_video.mp4"):
# Replace Google URL with LiteLLM proxy URL
litellm_url = video_uri.replace(
"https://generativelanguage.googleapis.com/v1beta",
BASE_URL
)
response = requests.get(litellm_url, headers=headers, stream=True)
response.raise_for_status()
with open(filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return filename
# Complete workflow
prompt = "A cat playing with a ball of yarn in a sunny garden"
print("Generating video...")
operation_name = generate_video(prompt)
print("Waiting for completion...")
video_uri = wait_for_completion(operation_name)
print("Downloading video...")
filename = download_video(video_uri)
print(f"Video saved as: {filename}")
```
</TabItem>
<TabItem value="curl" label="Curl">
```bash
# Step 1: Initiate video generation
curl -X POST "http://localhost:4000/gemini/v1beta/models/veo-3.0-generate-preview:predictLongRunning" \
-H "x-goog-api-key: anything" \
-H "Content-Type: application/json" \
-d '{
"instances": [{
"prompt": "A cat playing with a ball of yarn in a sunny garden"
}]
}'
# Response will include operation name:
# {"name": "operations/generate_12345"}
# Step 2: Poll for completion
curl -X GET "http://localhost:4000/gemini/v1beta/operations/generate_12345" \
-H "x-goog-api-key: anything"
# Step 3: Download video (when done=true)
curl -X GET "http://localhost:4000/gemini/v1beta/files/VIDEO_ID:download?alt=media" \
-H "x-goog-api-key: anything" \
--output generated_video.mp4
```
</TabItem>
</Tabs>
## Complete Example
For a full working example with error handling and logging, see our [Veo Video Generation Cookbook](https://github.com/BerriAI/litellm/blob/main/cookbook/veo_video_generation.py).
## How It Works
1. **Video Generation Request**: Send a prompt to Veo's `predictLongRunning` endpoint
2. **Operation Polling**: Monitor the long-running operation until completion
3. **File Download**: Download the generated video through LiteLLM's pass-through with automatic redirect handling
LiteLLM handles:
- ✅ Authentication with Google AI Studio
- ✅ Request routing and proxying
- ✅ Automatic redirect handling for file downloads
## Configuration Options
### Environment Variables
```bash
export GEMINI_API_KEY="your_google_ai_studio_api_key"
```

View File

@ -450,6 +450,7 @@ vertex_vision_models: Set = set()
vertex_chat_models: Set = set()
vertex_code_chat_models: Set = set()
vertex_ai_image_models: Set = set()
vertex_ai_video_models: Set = set()
vertex_text_models: Set = set()
vertex_code_text_models: Set = set()
vertex_embedding_models: Set = set()
@ -605,6 +606,9 @@ def add_known_models():
elif value.get("litellm_provider") == "vertex_ai-image-models":
key = key.replace("vertex_ai/", "")
vertex_ai_image_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-video-models":
key = key.replace("vertex_ai/", "")
vertex_ai_video_models.add(key)
elif value.get("litellm_provider") == "vertex_ai-openai_models":
key = key.replace("vertex_ai/", "")
vertex_openai_models.add(key)

View File

@ -320,6 +320,7 @@ def get_llm_provider( # noqa: PLR0915
or model in litellm.vertex_embedding_models
or model in litellm.vertex_vision_models
or model in litellm.vertex_ai_image_models
or model in litellm.vertex_ai_video_models
):
custom_llm_provider = "vertex_ai"
## ai21

View File

@ -212,6 +212,7 @@ class AsyncHTTPHandler:
verify=ssl_config,
cert=cert,
headers=headers,
follow_redirects=True,
)
async def close(self):
@ -687,6 +688,7 @@ class HTTPHandler:
verify=ssl_config,
cert=cert,
headers=headers,
follow_redirects=True,
)
else:
self.client = client

View File

@ -9484,6 +9484,48 @@
"source": "https://aistudio.google.com",
"supports_tool_choice": true
},
"gemini/veo-3.0-generate-preview": {
"max_tokens": 1024,
"max_input_tokens": 1024,
"output_cost_per_second": 0.75,
"litellm_provider": "gemini",
"mode": "video_generation",
"supported_modalities": [
"text"
],
"supported_output_modalities": [
"video"
],
"source": "https://ai.google.dev/gemini-api/docs/video"
},
"gemini/veo-3.0-fast-generate-preview": {
"max_tokens": 1024,
"max_input_tokens": 1024,
"output_cost_per_second": 0.40,
"litellm_provider": "gemini",
"mode": "video_generation",
"supported_modalities": [
"text"
],
"supported_output_modalities": [
"video"
],
"source": "https://ai.google.dev/gemini-api/docs/video"
},
"gemini/veo-2.0-generate-001": {
"max_tokens": 1024,
"max_input_tokens": 1024,
"output_cost_per_second": 0.35,
"litellm_provider": "gemini",
"mode": "video_generation",
"supported_modalities": [
"text"
],
"supported_output_modalities": [
"video"
],
"source": "https://ai.google.dev/gemini-api/docs/video"
},
"vertex_ai/claude-opus-4-1": {
"max_tokens": 4096,
"max_input_tokens": 200000,
@ -10301,6 +10343,48 @@
"mode": "image_generation",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"vertex_ai/veo-3.0-generate-preview": {
"max_tokens": 1024,
"max_input_tokens": 1024,
"output_cost_per_second": 0.75,
"litellm_provider": "vertex_ai-video-models",
"mode": "video_generation",
"supported_modalities": [
"text"
],
"supported_output_modalities": [
"video"
],
"source": "https://ai.google.dev/gemini-api/docs/video"
},
"vertex_ai/veo-3.0-fast-generate-preview": {
"max_tokens": 1024,
"max_input_tokens": 1024,
"output_cost_per_second": 0.40,
"litellm_provider": "vertex_ai-video-models",
"mode": "video_generation",
"supported_modalities": [
"text"
],
"supported_output_modalities": [
"video"
],
"source": "https://ai.google.dev/gemini-api/docs/video"
},
"vertex_ai/veo-2.0-generate-001": {
"max_tokens": 1024,
"max_input_tokens": 1024,
"output_cost_per_second": 0.35,
"litellm_provider": "vertex_ai-video-models",
"mode": "video_generation",
"supported_modalities": [
"text"
],
"supported_output_modalities": [
"video"
],
"source": "https://ai.google.dev/gemini-api/docs/video"
},
"text-embedding-004": {
"max_tokens": 2048,
"max_input_tokens": 2048,

View File

@ -9484,6 +9484,48 @@
"source": "https://aistudio.google.com",
"supports_tool_choice": true
},
"gemini/veo-3.0-generate-preview": {
"max_tokens": 1024,
"max_input_tokens": 1024,
"output_cost_per_second": 0.75,
"litellm_provider": "gemini",
"mode": "video_generation",
"supported_modalities": [
"text"
],
"supported_output_modalities": [
"video"
],
"source": "https://ai.google.dev/gemini-api/docs/video"
},
"gemini/veo-3.0-fast-generate-preview": {
"max_tokens": 1024,
"max_input_tokens": 1024,
"output_cost_per_second": 0.40,
"litellm_provider": "gemini",
"mode": "video_generation",
"supported_modalities": [
"text"
],
"supported_output_modalities": [
"video"
],
"source": "https://ai.google.dev/gemini-api/docs/video"
},
"gemini/veo-2.0-generate-001": {
"max_tokens": 1024,
"max_input_tokens": 1024,
"output_cost_per_second": 0.35,
"litellm_provider": "gemini",
"mode": "video_generation",
"supported_modalities": [
"text"
],
"supported_output_modalities": [
"video"
],
"source": "https://ai.google.dev/gemini-api/docs/video"
},
"vertex_ai/claude-opus-4-1": {
"max_tokens": 4096,
"max_input_tokens": 200000,
@ -10301,6 +10343,48 @@
"mode": "image_generation",
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing"
},
"vertex_ai/veo-3.0-generate-preview": {
"max_tokens": 1024,
"max_input_tokens": 1024,
"output_cost_per_second": 0.75,
"litellm_provider": "vertex_ai-video-models",
"mode": "video_generation",
"supported_modalities": [
"text"
],
"supported_output_modalities": [
"video"
],
"source": "https://ai.google.dev/gemini-api/docs/video"
},
"vertex_ai/veo-3.0-fast-generate-preview": {
"max_tokens": 1024,
"max_input_tokens": 1024,
"output_cost_per_second": 0.40,
"litellm_provider": "vertex_ai-video-models",
"mode": "video_generation",
"supported_modalities": [
"text"
],
"supported_output_modalities": [
"video"
],
"source": "https://ai.google.dev/gemini-api/docs/video"
},
"vertex_ai/veo-2.0-generate-001": {
"max_tokens": 1024,
"max_input_tokens": 1024,
"output_cost_per_second": 0.35,
"litellm_provider": "vertex_ai-video-models",
"mode": "video_generation",
"supported_modalities": [
"text"
],
"supported_output_modalities": [
"video"
],
"source": "https://ai.google.dev/gemini-api/docs/video"
},
"text-embedding-004": {
"max_tokens": 2048,
"max_input_tokens": 2048,

View File

@ -1245,3 +1245,58 @@ async def test_delete_pass_through_endpoint_empty_list():
# Verify the exception
assert exc_info.value.status_code == 400
assert "no pass-through endpoints setup" in str(exc_info.value.detail).lower()
@pytest.mark.asyncio
async def test_pass_through_with_httpbin_redirect():
"""
Integration test using httpbin.org redirect endpoint to test real redirect handling.
This tests the actual redirect handling capability end-to-end using the full pass_through_request function.
"""
from unittest.mock import MagicMock
from fastapi import Request
from starlette.datastructures import Headers, QueryParams
from litellm.proxy.pass_through_endpoints.pass_through_endpoints import (
pass_through_request,
)
# Create mock request
mock_request = MagicMock(spec=Request)
mock_request.method = "GET"
mock_request.headers = Headers({})
mock_request.query_params = QueryParams("")
# Mock the body method to return empty bytes for GET request
async def mock_body():
return b""
mock_request.body = mock_body
# Mock user API key dict
mock_user_api_key_dict = MagicMock()
try:
# Test with httpbin.org redirect endpoint
# This will redirect to httpbin.org/get
response = await pass_through_request(
request=mock_request,
target="https://httpbin.org/redirect/1",
custom_headers={},
user_api_key_dict=mock_user_api_key_dict
)
# Should get the final response (200) from /get endpoint, not the redirect (302)
assert response.status_code == 200
# The response should be from the /get endpoint
response_content = response.body.decode('utf-8')
# httpbin.org/get returns JSON with info about the request
assert '"url": "https://httpbin.org/get"' in response_content
print("GOT A Response from HTTPBIN=", response_content)
except Exception as e:
# If httpbin.org is not accessible, skip the test
import pytest
pytest.skip(f"Could not reach httpbin.org for integration test: {e}")