Add Vertex and Gemini Videos API with Cost Tracking + UI support (#16323)

* Use video id for videos api * remove mock code * Potential fix for code scanning alert no. 3630: Clear-text logging of sensitive information Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * remove print statements * Update video prefix for 'video_' * Add veo with openai videos unified specs * Add videos testing to UI * remove mock code * Remove not need ui changes: * Fix mypy errors related to gemini * fix test_transform_video_create_request * Add vertex ai veo config * Add vertex ai veo config * Add cost tracking for gemini and add optional param passing * fix bugs related to vertex ai veo * Add Gemini Veo Video Generation in Openai Videos Unified Spec (#16229) * Add veo with openai videos unified specs * Add videos testing to UI * remove mock code * Remove not need ui changes: * Fix mypy errors related to gemini * fix test_transform_video_create_request * Add contant video duration for gemini and vertex * Fix litellm_mapped_tests tests * fix azure videos issue * Added doc for videos vertex ai * fix seconds param error * fix lint errors * test_transform_video_create_response_cost_tracking_no_duration --------- Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Co-authored-by: Ishaan Jaffer <ishaanjaffer0324@gmail.com>
2025-11-09 05:33:51 +05:30 · 2025-11-09 05:33:51 +05:30 · e037d9315d
commit e037d9315d
parent 454ffcd9c7
41 changed files with 3758 additions and 207 deletions
--- a/docs/my-website/docs/providers/azure/videos.md
+++ b/docs/my-website/docs/providers/azure/videos.md
@ -25,7 +25,6 @@ LiteLLM supports Azure OpenAI's video generation models including Sora with full
 import os 
 os.environ["AZURE_OPENAI_API_KEY"] = "your-azure-api-key"
 os.environ["AZURE_OPENAI_API_BASE"] = "https://your-resource.openai.azure.com/"
-os.environ["AZURE_OPENAI_API_VERSION"] = "2024-02-15-preview"
 ```

 ### Basic Usage
@ -37,7 +36,6 @@ import time

 os.environ["AZURE_OPENAI_API_KEY"] = "your-azure-api-key"
 os.environ["AZURE_OPENAI_API_BASE"] = "https://your-resource.openai.azure.com/"
-os.environ["AZURE_OPENAI_API_VERSION"] = "2024-02-15-preview"

 # Generate video
 response = video_generation(
@ -53,8 +51,7 @@ print(f"Initial Status: {response.status}")
 # Check status until video is ready
 while True:
    status_response = video_status(
-        video_id=response.id,
-        custom_llm_provider="azure"
+        video_id=response.id
    )
    
    print(f"Current Status: {status_response.status}")
@ -69,8 +66,7 @@ while True:

 # Download video content when ready
 video_bytes = video_content(
-    video_id=response.id,
-    custom_llm_provider="azure"
+    video_id=response.id
 )

 # Save to file
@ -87,7 +83,6 @@ Here's how to call Azure video generation models with the LiteLLM Proxy Server
 ```bash
 export AZURE_OPENAI_API_KEY="your-azure-api-key"
 export AZURE_OPENAI_API_BASE="https://your-resource.openai.azure.com/"
-export AZURE_OPENAI_API_VERSION="2024-02-15-preview"
 ```

 ### 2. Start the proxy 
@ -102,7 +97,6 @@ model_list:
      model: azure/sora-2
      api_key: os.environ/AZURE_OPENAI_API_KEY
      api_base: os.environ/AZURE_OPENAI_API_BASE
-      api_version: "2024-02-15-preview"
 ```

 </TabItem>
@ -211,8 +205,7 @@ general_settings:
 ```python
 # Download video content
 video_bytes = video_content(
-    video_id="video_1234567890",
-    model="azure/sora-2"
+    video_id="video_1234567890"
 )

 # Save to file
@ -243,8 +236,7 @@ def generate_and_download_video(prompt):
    
    # Step 3: Download video
    video_bytes = litellm.video_content(
-        video_id=video_id,
-        custom_llm_provider="azure"
+        video_id=video_id
    )
    
    # Step 4: Save to file
@ -264,9 +256,9 @@ video_file = generate_and_download_video(
 ```python
 # Video editing with reference image
 response = litellm.video_remix(
+    video_id="video_456",
    prompt="Make the cat jump higher",
    input_reference=open("path/to/image.jpg", "rb"),  # Reference image as file object
-    custom_llm_provider="azure"
    seconds="8"
 )

--- a/docs/my-website/docs/providers/gemini.md
+++ b/docs/my-website/docs/providers/gemini.md
@ -10,7 +10,7 @@ import TabItem from '@theme/TabItem';
 | Provider Route on LiteLLM | `gemini/` |
 | Provider Doc | [Google AI Studio ↗](https://aistudio.google.com/) |
 | API Endpoint for Provider | https://generativelanguage.googleapis.com |
-| Supported OpenAI Endpoints | `/chat/completions`, [`/embeddings`](../embedding/supported_embedding#gemini-ai-embedding-models), `/completions` |
+| Supported OpenAI Endpoints | `/chat/completions`, [`/embeddings`](../embedding/supported_embedding#gemini-ai-embedding-models), `/completions`, [`/videos`](./gemini/videos.md) |
 | Pass-through Endpoint | [Supported](../pass_through/google_ai_studio.md) |

 <br />
--- a/docs/my-website/docs/providers/gemini/videos.md
+++ b/docs/my-website/docs/providers/gemini/videos.md
@ -0,0 +1,409 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Gemini Video Generation (Veo)
+
+LiteLLM supports Google's Veo video generation models through a unified API interface.
+
+| Property | Details |
+|-------|-------|
+| Description | Google's Veo AI video generation models |
+| Provider Route on LiteLLM | `gemini/` |
+| Supported Models | `veo-3.0-generate-preview`, `veo-3.1-generate-preview` |
+| Cost Tracking | ✅ Duration-based pricing |
+| Logging Support | ✅ Full request/response logging |
+| Proxy Server Support | ✅ Full proxy integration with virtual keys |
+| Spend Management | ✅ Budget tracking and rate limiting |
+| Link to Provider Doc | [Google Veo Documentation ↗](https://ai.google.dev/gemini-api/docs/video) |
+
+## Quick Start
+
+### Required API Keys
+
+```python
+import os 
+os.environ["GEMINI_API_KEY"] = "your-google-api-key"
+# OR
+os.environ["GOOGLE_API_KEY"] = "your-google-api-key"
+```
+
+### Basic Usage
+
+```python
+from litellm import video_generation, video_status, video_content
+import os
+import time
+
+os.environ["GEMINI_API_KEY"] = "your-google-api-key"
+
+# Step 1: Generate video
+response = video_generation(
+    model="gemini/veo-3.0-generate-preview",
+    prompt="A cat playing with a ball of yarn in a sunny garden"
+)
+
+print(f"Video ID: {response.id}")
+print(f"Initial Status: {response.status}")  # "processing"
+
+# Step 2: Poll for completion
+while True:
+    status_response = video_status(
+        video_id=response.id
+    )
+    
+    print(f"Current Status: {status_response.status}")
+    
+    if status_response.status == "completed":
+        break
+    elif status_response.status == "failed":
+        print("Video generation failed")
+        break
+    
+    time.sleep(10)  # Wait 10 seconds before checking again
+
+# Step 3: Download video content
+video_bytes = video_content(
+    video_id=response.id
+)
+
+# Save to file
+with open("generated_video.mp4", "wb") as f:
+    f.write(video_bytes)
+
+print("Video downloaded successfully!")
+```
+
+## Supported Models
+
+| Model Name | Description | Max Duration | Status |
+|------------|-------------|--------------|--------|
+| veo-3.0-generate-preview | Veo 3.0 video generation | 8 seconds | Preview |
+| veo-3.1-generate-preview | Veo 3.1 video generation | 8 seconds | Preview |
+
+## Video Generation Parameters
+
+LiteLLM automatically maps OpenAI-style parameters to Veo's format:
+
+| OpenAI Parameter | Veo Parameter | Description | Example |
+|------------------|---------------|-------------|---------|
+| `prompt` | `prompt` | Text description of the video | "A cat playing" |
+| `size` | `aspectRatio` | Video dimensions → aspect ratio | "1280x720" → "16:9" |
+| `seconds` | `durationSeconds` | Duration in seconds | "8" → 8 |
+| `input_reference` | `image` | Reference image to animate | File object or path |
+| `model` | `model` | Model to use | "gemini/veo-3.0-generate-preview" |
+
+### Size to Aspect Ratio Mapping
+
+LiteLLM automatically converts size dimensions to Veo's aspect ratio format:
+- `"1280x720"`, `"1920x1080"` → `"16:9"` (landscape)
+- `"720x1280"`, `"1080x1920"` → `"9:16"` (portrait)
+
+### Supported Veo Parameters
+
+Based on Veo's API:
+- **prompt** (required): Text description with optional audio cues
+- **aspectRatio**: `"16:9"` (default) or `"9:16"`
+- **resolution**: `"720p"` (default) or `"1080p"` (Veo 3.1 only, 16:9 aspect ratio only)
+- **durationSeconds**: Video length (max 8 seconds for most models)
+- **image**: Reference image for animation
+- **negativePrompt**: What to exclude from the video (Veo 3.1)
+- **referenceImages**: Style and content references (Veo 3.1 only)
+
+## Complete Workflow Example
+
+```python
+import litellm
+import time
+
+def generate_and_download_veo_video(
+    prompt: str, 
+    output_file: str = "video.mp4",
+    size: str = "1280x720",
+    seconds: str = "8"
+):
+    """
+    Complete workflow for Veo video generation.
+    
+    Args:
+        prompt: Text description of the video
+        output_file: Where to save the video
+        size: Video dimensions (e.g., "1280x720" for 16:9)
+        seconds: Duration in seconds
+        
+    Returns:
+        bool: True if successful
+    """
+    print(f"🎬 Generating video: {prompt}")
+    
+    # Step 1: Initiate generation
+    response = litellm.video_generation(
+        model="gemini/veo-3.0-generate-preview",
+        prompt=prompt,
+        size=size,      # Maps to aspectRatio
+        seconds=seconds  # Maps to durationSeconds
+    )
+    
+    video_id = response.id
+    print(f"✓ Video generation started (ID: {video_id})")
+    
+    # Step 2: Wait for completion
+    max_wait_time = 600  # 10 minutes
+    start_time = time.time()
+    
+    while time.time() - start_time < max_wait_time:
+        status_response = litellm.video_status(video_id=video_id)
+        
+        if status_response.status == "completed":
+            print("✓ Video generation completed!")
+            break
+        elif status_response.status == "failed":
+            print("✗ Video generation failed")
+            return False
+        
+        print(f"⏳ Status: {status_response.status}")
+        time.sleep(10)
+    else:
+        print("✗ Timeout waiting for video generation")
+        return False
+    
+    # Step 3: Download video
+    print("⬇️  Downloading video...")
+    video_bytes = litellm.video_content(video_id=video_id)
+    
+    with open(output_file, "wb") as f:
+        f.write(video_bytes)
+    
+    print(f"✓ Video saved to {output_file}")
+    return True
+
+# Use it
+generate_and_download_veo_video(
+    prompt="A serene lake at sunset with mountains in the background",
+    output_file="sunset_lake.mp4"
+)
+```
+
+## Async Usage
+
+```python
+from litellm import avideo_generation, avideo_status, avideo_content
+import asyncio
+
+async def async_video_workflow():
+    # Generate video
+    response = await avideo_generation(
+        model="gemini/veo-3.0-generate-preview",
+        prompt="A cat playing with a ball of yarn"
+    )
+    
+    # Poll for completion
+    while True:
+        status = await avideo_status(video_id=response.id)
+        if status.status == "completed":
+            break
+        await asyncio.sleep(10)
+    
+    # Download content
+    video_bytes = await avideo_content(video_id=response.id)
+    
+    with open("video.mp4", "wb") as f:
+        f.write(video_bytes)
+
+# Run it
+asyncio.run(async_video_workflow())
+```
+
+## LiteLLM Proxy Usage
+
+### Configuration
+
+Add Veo models to your `config.yaml`:
+
+```yaml
+model_list:
+  - model_name: veo-3
+    litellm_params:
+      model: gemini/veo-3.0-generate-preview
+      api_key: os.environ/GEMINI_API_KEY
+```
+
+Start the proxy:
+
+```bash
+litellm --config config.yaml
+# Server running on http://0.0.0.0:4000
+```
+
+### Making Requests
+
+<Tabs>
+<TabItem value="curl" label="Curl">
+
+```bash
+# Step 1: Generate video
+curl --location 'http://0.0.0.0:4000/v1/videos' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer sk-1234' \
+--data '{
+    "model": "veo-3",
+    "prompt": "A cat playing with a ball of yarn in a sunny garden"
+}'
+
+# Response: {"id": "gemini::operations/generate_12345::...", "status": "processing", ...}
+
+# Step 2: Check status
+curl --location 'http://localhost:4000/v1/videos/{video_id}' \
+--header 'x-litellm-api-key: sk-1234'
+
+# Step 3: Download video (when status is "completed")
+curl --location 'http://localhost:4000/v1/videos/{video_id}/content' \
+--header 'x-litellm-api-key: sk-1234' \
+--output video.mp4
+```
+
+</TabItem>
+<TabItem value="python" label="Python SDK">
+
+```python
+import litellm
+
+litellm.api_base = "http://0.0.0.0:4000"
+litellm.api_key = "sk-1234"
+
+# Generate video
+response = litellm.video_generation(
+    model="veo-3",
+    prompt="A cat playing with a ball of yarn in a sunny garden"
+)
+
+# Check status
+import time
+while True:
+    status = litellm.video_status(video_id=response.id)
+    if status.status == "completed":
+        break
+    time.sleep(10)
+
+# Download video
+video_bytes = litellm.video_content(video_id=response.id)
+with open("video.mp4", "wb") as f:
+    f.write(video_bytes)
+```
+
+</TabItem>
+</Tabs>
+
+## Cost Tracking
+
+LiteLLM automatically tracks costs for Veo video generation:
+
+```python
+response = litellm.video_generation(
+    model="gemini/veo-3.0-generate-preview",
+    prompt="A beautiful sunset"
+)
+
+# Cost is calculated based on video duration
+# Veo pricing: ~$0.10 per second (estimated)
+# Default video duration: ~5 seconds
+# Estimated cost: ~$0.50
+```
+
+## Differences from OpenAI Video API
+
+| Feature | OpenAI (Sora) | Gemini (Veo) |
+|---------|---------------|--------------|
+| Reference Images | ✅ Supported | ❌ Not supported |
+| Size Control | ✅ Supported | ❌ Not supported |
+| Duration Control | ✅ Supported | ❌ Not supported |
+| Video Remix/Edit | ✅ Supported | ❌ Not supported |
+| Video List | ✅ Supported | ❌ Not supported |
+| Prompt-based Generation | ✅ Supported | ✅ Supported |
+| Async Operations | ✅ Supported | ✅ Supported |
+
+## Error Handling
+
+```python
+from litellm import video_generation, video_status, video_content
+from litellm.exceptions import APIError, Timeout
+
+try:
+    response = video_generation(
+        model="gemini/veo-3.0-generate-preview",
+        prompt="A beautiful landscape"
+    )
+    
+    # Poll with timeout
+    max_attempts = 60  # 10 minutes (60 * 10s)
+    for attempt in range(max_attempts):
+        status = video_status(video_id=response.id)
+        
+        if status.status == "completed":
+            video_bytes = video_content(video_id=response.id)
+            with open("video.mp4", "wb") as f:
+                f.write(video_bytes)
+            break
+        elif status.status == "failed":
+            raise APIError("Video generation failed")
+        
+        time.sleep(10)
+    else:
+        raise Timeout("Video generation timed out")
+        
+except APIError as e:
+    print(f"API Error: {e}")
+except Timeout as e:
+    print(f"Timeout: {e}")
+except Exception as e:
+    print(f"Unexpected error: {e}")
+```
+
+## Best Practices
+
+1. **Always poll for completion**: Veo video generation is asynchronous and can take several minutes
+2. **Set reasonable timeouts**: Allow at least 5-10 minutes for video generation
+3. **Handle failures gracefully**: Check for `failed` status and implement retry logic
+4. **Use descriptive prompts**: More detailed prompts generally produce better results
+5. **Store video IDs**: Save the operation ID/video ID to resume polling if your application restarts
+
+## Troubleshooting
+
+### Video generation times out
+
+```python
+# Increase polling timeout
+max_wait_time = 900  # 15 minutes instead of 10
+```
+
+### Video not found when downloading
+
+```python
+# Make sure video is completed before downloading
+status = video_status(video_id=video_id)
+if status.status != "completed":
+    print("Video not ready yet!")
+```
+
+### API key errors
+
+```python
+# Verify your API key is set
+import os
+print(os.environ.get("GEMINI_API_KEY"))
+
+# Or pass it explicitly
+response = video_generation(
+    model="gemini/veo-3.0-generate-preview",
+    prompt="...",
+    api_key="your-api-key-here"
+)
+```
+
+## See Also
+
+- [OpenAI Video Generation](../openai/videos.md)
+- [Azure Video Generation](../azure/videos.md)
+- [Vertex AI Video Generation](../vertex_ai/videos.md)
+- [Video Generation API Reference](/docs/videos)
+- [Veo Pass-through Endpoints](/docs/pass_through/google_ai_studio#example-4-video-generation-with-veo)
+
--- a/docs/my-website/docs/providers/openai/videos.md
+++ b/docs/my-website/docs/providers/openai/videos.md
@ -36,7 +36,6 @@ print(f"Status: {response.status}")
 # Download video content when ready
 video_bytes = video_content(
    video_id=response.id,
-    model="sora-2"
 )

 # Save to file
@ -171,8 +170,7 @@ curl http://localhost:4000/v1/videos \
 ```python
 # Download video content
 video_bytes = video_content(
-    video_id="video_1234567890",
-    custom_llm_provider="openai"  # Or use model="sora-2"
+    video_id="video_1234567890"
 )

 # Save to file
@ -203,8 +201,7 @@ def generate_and_download_video(prompt):
    
    # Step 3: Download video
    video_bytes = litellm.video_content(
-        video_id=video_id,
-        custom_llm_provider="openai"  
+        video_id=video_id
    )
    
    # Step 4: Save to file
@ -241,8 +238,7 @@ from litellm.exceptions import BadRequestError, AuthenticationError

 try:
    response = video_generation(
-        prompt="A cat playing with a ball of yarn",
-        model="sora-2"
+        prompt="A cat playing with a ball of yarn"
    )
 except AuthenticationError as e:
    print(f"Authentication failed: {e}")
--- a/docs/my-website/docs/providers/vertex_ai/videos.md
+++ b/docs/my-website/docs/providers/vertex_ai/videos.md
@ -0,0 +1,268 @@
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Vertex AI Video Generation (Veo)
+
+LiteLLM supports Vertex AI's Veo video generation models using the unified OpenAI video API surface.
+
+| Property | Details |
+|-------|-------|
+| Description | Google Cloud Vertex AI Veo video generation models |
+| Provider Route on LiteLLM | `vertex_ai/` |
+| Supported Models | `veo-2.0-generate-001`, `veo-3.0-generate-preview`, `veo-3.0-fast-generate-preview`, `veo-3.1-generate-preview`, `veo-3.1-fast-generate-preview` |
+| Cost Tracking | ✅ Duration-based pricing |
+| Logging Support | ✅ Full request/response logging |
+| Proxy Server Support | ✅ Full proxy integration with virtual keys |
+| Spend Management | ✅ Budget tracking and rate limiting |
+| Link to Provider Doc | [Vertex AI Veo Documentation ↗](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo-video-generation) |
+
+## Quick Start
+
+### Required Environment Setup
+
+```python
+import json
+import os
+
+os.environ["VERTEXAI_PROJECT"] = "your-gcp-project-id"
+os.environ["VERTEXAI_LOCATION"] = "us-central1"
+
+# Option 1: Point to a service account file
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/service_account.json"
+
+# Option 2: Store the service account JSON directly
+with open("/path/to/service_account.json", "r", encoding="utf-8") as f:
+    os.environ["VERTEXAI_CREDENTIALS"] = f.read()
+```
+
+### Basic Usage
+
+```python
+from litellm import video_generation, video_status, video_content
+import json
+import os
+import time
+
+with open("/path/to/service_account.json", "r", encoding="utf-8") as f:
+    vertex_credentials = f.read()
+
+response = video_generation(
+    model="vertex_ai/veo-3.0-generate-preview",
+    prompt="A cat playing with a ball of yarn in a sunny garden",
+    vertex_project="your-gcp-project-id",
+    vertex_location="us-central1",
+    vertex_credentials=vertex_credentials,
+    seconds="8",
+    size="1280x720",
+)
+
+print(f"Video ID: {response.id}")
+print(f"Initial Status: {response.status}")
+
+# Poll for completion
+while True:
+    status = video_status(
+        video_id=response.id,
+        vertex_project="your-gcp-project-id",
+        vertex_location="us-central1",
+        vertex_credentials=vertex_credentials,
+    )
+
+    print(f"Current Status: {status.status}")
+
+    if status.status == "completed":
+        break
+    if status.status == "failed":
+        raise RuntimeError("Video generation failed")
+
+    time.sleep(10)
+
+# Download the rendered video
+video_bytes = video_content(
+    video_id=response.id,
+    vertex_project="your-gcp-project-id",
+    vertex_location="us-central1",
+    vertex_credentials=vertex_credentials,
+)
+
+with open("generated_video.mp4", "wb") as f:
+    f.write(video_bytes)
+```
+
+## Supported Models
+
+| Model Name | Description | Max Duration | Status |
+|------------|-------------|--------------|--------|
+| veo-2.0-generate-001 | Veo 2.0 video generation | 5 seconds | GA |
+| veo-3.0-generate-preview | Veo 3.0 high quality | 8 seconds | Preview |
+| veo-3.0-fast-generate-preview | Veo 3.0 fast generation | 8 seconds | Preview |
+| veo-3.1-generate-preview | Veo 3.1 high quality | 10 seconds | Preview |
+| veo-3.1-fast-generate-preview | Veo 3.1 fast | 10 seconds | Preview |
+
+## Video Generation Parameters
+
+LiteLLM converts OpenAI-style parameters to Veo's API shape automatically:
+
+| OpenAI Parameter | Vertex AI Parameter | Description | Example |
+|------------------|---------------------|-------------|---------|
+| `prompt` | `instances[].prompt` | Text description of the video | "A cat playing" |
+| `size` | `parameters.aspectRatio` | Converted to `16:9` or `9:16` | "1280x720" → `16:9` |
+| `seconds` | `parameters.durationSeconds` | Clip length in seconds | "8" → `8` |
+| `input_reference` | `instances[].image` | Reference image for animation | `open("image.jpg", "rb")` |
+| Provider-specific params | `extra_body` | Forwarded to Vertex API | `{"negativePrompt": "blurry"}` |
+
+### Size to Aspect Ratio Mapping
+
+- `1280x720`, `1920x1080` → `16:9`
+- `720x1280`, `1080x1920` → `9:16`
+- Unknown sizes default to `16:9`
+
+## Async Usage
+
+```python
+from litellm import avideo_generation, avideo_status, avideo_content
+import asyncio
+import json
+
+with open("/path/to/service_account.json", "r", encoding="utf-8") as f:
+    vertex_credentials = f.read()
+
+
+async def workflow():
+    response = await avideo_generation(
+        model="vertex_ai/veo-3.1-generate-preview",
+        prompt="Slow motion water droplets splashing into a pool",
+        seconds="10",
+        vertex_project="your-gcp-project-id",
+        vertex_location="us-central1",
+        vertex_credentials=vertex_credentials,
+    )
+
+    while True:
+        status = await avideo_status(
+            video_id=response.id,
+            vertex_project="your-gcp-project-id",
+            vertex_location="us-central1",
+            vertex_credentials=vertex_credentials,
+        )
+
+        if status.status == "completed":
+            break
+        if status.status == "failed":
+            raise RuntimeError("Video generation failed")
+
+        await asyncio.sleep(10)
+
+    video_bytes = await avideo_content(
+        video_id=response.id,
+        vertex_project="your-gcp-project-id",
+        vertex_location="us-central1",
+        vertex_credentials=vertex_credentials,
+    )
+
+    with open("veo_water.mp4", "wb") as f:
+        f.write(video_bytes)
+
+asyncio.run(workflow())
+```
+
+## LiteLLM Proxy Usage
+
+Add Veo models to your `config.yaml`:
+
+```yaml
+model_list:
+  - model_name: veo-3
+    litellm_params:
+      model: vertex_ai/veo-3.0-generate-preview
+      vertex_project: os.environ/VERTEXAI_PROJECT
+      vertex_location: os.environ/VERTEXAI_LOCATION
+      vertex_credentials: os.environ/VERTEXAI_CREDENTIALS
+```
+
+Start the proxy and make requests:
+
+<Tabs>
+<TabItem value="curl" label="Curl">
+
+```bash
+# Step 1: Generate video
+curl --location 'http://0.0.0.0:4000/videos' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer sk-1234' \
+--data '{
+  "model": "veo-3",
+  "prompt": "Aerial shot over a futuristic city at sunrise",
+  "seconds": "8"
+}'
+
+# Step 2: Poll status
+curl --location 'http://localhost:4000/v1/videos/{video_id}' \
+--header 'x-litellm-api-key: sk-1234'
+
+# Step 3: Download video
+curl --location 'http://localhost:4000/v1/videos/{video_id}/content' \
+--header 'x-litellm-api-key: sk-1234' \
+--output video.mp4
+```
+
+</TabItem>
+<TabItem value="python" label="Python SDK">
+
+```python
+import litellm
+
+litellm.api_base = "http://0.0.0.0:4000"
+litellm.api_key = "sk-1234"
+
+response = litellm.video_generation(
+    model="veo-3",
+    prompt="Aerial shot over a futuristic city at sunrise",
+)
+
+status = litellm.video_status(video_id=response.id)
+while status.status not in ["completed", "failed"]:
+    status = litellm.video_status(video_id=response.id)
+
+if status.status == "completed":
+    content = litellm.video_content(video_id=response.id)
+    with open("veo_city.mp4", "wb") as f:
+        f.write(content)
+```
+
+</TabItem>
+</Tabs>
+
+## Cost Tracking
+
+LiteLLM records the duration returned by Veo so you can apply duration-based pricing.
+
+```python
+with open("/path/to/service_account.json", "r", encoding="utf-8") as f:
+    vertex_credentials = f.read()
+
+response = video_generation(
+    model="vertex_ai/veo-2.0-generate-001",
+    prompt="Flowers blooming in fast forward",
+    seconds="5",
+    vertex_project="your-gcp-project-id",
+    vertex_location="us-central1",
+    vertex_credentials=vertex_credentials,
+)
+
+print(response.usage)  # {"duration_seconds": 5.0}
+```
+
+## Troubleshooting
+
+- **`vertex_project is required`**: set `VERTEXAI_PROJECT` env var or pass `vertex_project` in the request.
+- **`Permission denied`**: ensure the service account has the `Vertex AI User` role and the correct region enabled.
+- **Video stuck in `processing`**: Veo operations are long-running. Continue polling every 10–15 seconds up to ~10 minutes.
+
+## See Also
+
+- [OpenAI Video Generation](../openai/videos.md)
+- [Azure Video Generation](../azure/videos.md)
+- [Gemini Video Generation](../gemini/videos.md)
+- [Video Generation API Reference](/docs/videos)
+
--- a/docs/my-website/docs/proxy/config_settings.md
+++ b/docs/my-website/docs/proxy/config_settings.md
@ -519,6 +519,7 @@ router_settings:
 | DEFAULT_SLACK_ALERTING_THRESHOLD | Default threshold for Slack alerting. Default is 300
 | DEFAULT_SOFT_BUDGET | Default soft budget for LiteLLM proxy keys. Default is 50.0
 | DEFAULT_TRIM_RATIO | Default ratio of tokens to trim from prompt end. Default is 0.75
+| DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS | Default duration for video generation in seconds in google. Default is 8
 | DIRECT_URL | Direct URL for service endpoint
 | DISABLE_ADMIN_UI | Toggle to disable the admin UI
 | DISABLE_AIOHTTP_TRANSPORT | Flag to disable aiohttp transport. When this is set to True, litellm will use httpx instead of aiohttp. **Default is False**
--- a/docs/my-website/docs/videos.md
+++ b/docs/my-website/docs/videos.md
@ -9,7 +9,7 @@ Fallbacks | ✅ (Between supported models) |
 | Guardrails Support | ✅ Content moderation and safety checks |
 | Proxy Server Support | ✅ Full proxy integration with virtual keys |
 | Spend Management | ✅ Budget tracking and rate limiting |
-| Supported Providers | `openai`, `azure` |
+| Supported Providers | `openai`, `azure`, `gemini`, `vertex_ai` |

 :::tip

@ -41,8 +41,7 @@ print(f"Initial Status: {response.status}")
 # Check status until video is ready
 while True:
    status_response = video_status(
-        video_id=response.id,
-        custom_llm_provider="openai"
+        video_id=response.id
    )
    
    print(f"Current Status: {status_response.status}")
@ -57,8 +56,7 @@ while True:

 # Download video content when ready
 video_bytes = video_content(
-    video_id=response.id,
-    custom_llm_provider="openai"
+    video_id=response.id
 )

 # Save to file
@ -88,8 +86,7 @@ async def test_async_video():
    # Check status until video is ready
    while True:
        status_response = await avideo_status(
-            video_id=response.id,
-            custom_llm_provider="openai"
+            video_id=response.id
        )
        
        print(f"Current Status: {status_response.status}")
@ -104,8 +101,7 @@ async def test_async_video():
    
    # Download video content when ready
    video_bytes = await avideo_content(
-        video_id=response.id,
-        custom_llm_provider="openai"
+        video_id=response.id
    )
    
    # Save to file
@ -120,21 +116,27 @@ asyncio.run(test_async_video())
 ```python
 from litellm import video_status

-# Check the status of a video generation
 status_response = video_status(
-    video_id="video_1234567890",
-    custom_llm_provider="openai"
+    video_id="video_1234567890"
 )

 print(f"Video Status: {status_response.status}")
 print(f"Created At: {status_response.created_at}")
 print(f"Model: {status_response.model}")
+```

-# Possible status values:
-# - "queued": Video is in the queue
-# - "processing": Video is being generated
-# - "completed": Video is ready for download
-# - "failed": Video generation failed
+### List Videos
+
+For listing videos, you need to specify the provider since there's no video_id to decode from:
+
+```python
+from litellm import video_list
+
+# List videos from OpenAI
+videos = video_list(custom_llm_provider="openai")
+
+for video in videos:
+    print(f"Video ID: {video['id']}")
 ```

 ### Video Generation with Reference Image
@ -207,7 +209,7 @@ print(f"Video ID: {response.id}")

 LiteLLM provides OpenAI API compatible video endpoints for complete video generation workflow:

- `/videos/generations` - Generate new videos
+- `/videos` - Generate new videos
 - `/videos/remix` - Edit existing videos with reference images  
 - `/videos/status` - Check video generation status
 - `/videos/retrieval` - Download completed videos
@ -227,7 +229,6 @@ model_list:
      model: azure/sora-2
      api_key: os.environ/AZURE_OPENAI_API_KEY
      api_base: os.environ/AZURE_OPENAI_API_BASE
-      api_version: "2024-02-15-preview"
 ```

 Start litellm
@ -253,31 +254,14 @@ curl --location 'http://localhost:4000/v1/videos' \
 Test video status request

 ```bash
-# Using custom-llm-provider header
-curl --location 'http://localhost:4000/v1/videos/video_id' \
--header 'Accept: application/json' \
--header 'x-litellm-api-key: sk-1234' \
--header 'custom-llm-provider: azure'
-
-# Or using query parameter
-curl --location 'http://localhost:4000/v1/videos/video_id?custom_llm_provider=azure' \
--header 'Accept: application/json' \
+curl --location 'http://localhost:4000/v1/videos/{video_id}' \
 --header 'x-litellm-api-key: sk-1234'
 ```

 Test video retrieval request

 ```bash
-# Using custom-llm-provider header
-curl --location 'http://localhost:4000/v1/videos/video_id/content' \
--header 'Accept: application/json' \
--header 'x-litellm-api-key: sk-1234' \
--header 'custom-llm-provider: openai' \
--output video.mp4
-
-# Or using query parameter
-curl --location 'http://localhost:4000/v1/videos/video_id/content?custom_llm_provider=openai' \
--header 'Accept: application/json' \
+curl --location 'http://localhost:4000/v1/videos/{video_id}/content' \
 --header 'x-litellm-api-key: sk-1234' \
 --output video.mp4
 ```
@ -285,27 +269,27 @@ curl --location 'http://localhost:4000/v1/videos/video_id/content?custom_llm_pro
 Test video remix request

 ```bash
-# Using custom_llm_provider in request body
-curl --location --request POST 'http://localhost:4000/v1/videos/video_id/remix' \
--header 'Accept: application/json' \
+curl --location --request POST 'http://localhost:4000/v1/videos/{video_id}/remix' \
 --header 'Content-Type: application/json' \
 --header 'x-litellm-api-key: sk-1234' \
--data '{
-    "prompt": "New remix instructions",
-    "custom_llm_provider": "azure"
-}'
-
-# Or using custom-llm-provider header
-curl --location --request POST 'http://localhost:4000/v1/videos/video_id/remix' \
--header 'Accept: application/json' \
--header 'Content-Type: application/json' \
--header 'x-litellm-api-key: sk-1234' \
--header 'custom-llm-provider: azure' \
 --data '{
    "prompt": "New remix instructions"
 }'
 ```

+Test video list request (requires custom_llm_provider)
+
+```bash
+# Note: video_list requires custom_llm_provider since there's no video_id to decode from
+curl --location 'http://localhost:4000/v1/videos?custom_llm_provider=openai' \
+--header 'x-litellm-api-key: sk-1234'
+
+# Or using header
+curl --location 'http://localhost:4000/v1/videos' \
+--header 'x-litellm-api-key: sk-1234' \
+--header 'custom-llm-provider: azure'
+```
+
 Test Azure video generation request

 ```bash
@ -618,4 +602,6 @@ The response follows OpenAI's video generation format with the following structu
 | Provider    | Link to Usage      |
 |-------------|--------------------|
 | OpenAI      |   [Usage](providers/openai/videos)  |
-| Azure       |   [Usage](providers/azure/videos)   |
+| Azure       |   [Usage](providers/azure/videos)   |
+| Gemini       |   [Usage](providers/gemini/videos)   |
+| Vertex AI   |   [Usage](providers/vertex_ai/videos) |
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -478,6 +478,7 @@ const sidebars = {
          label: "Vertex AI",
          items: [
            "providers/vertex",
+            "providers/vertex_ai/videos",
            "providers/vertex_partner",
            "providers/vertex_self_deployed",
            "providers/vertex_image",
@ -490,6 +491,7 @@ const sidebars = {
          label: "Google AI Studio",
          items: [
            "providers/gemini",
+            "providers/gemini/videos",
            "providers/google_ai_studio/files",
            "providers/google_ai_studio/image_gen",
            "providers/google_ai_studio/realtime",
--- a/litellm/constants.py
+++ b/litellm/constants.py
@ -280,6 +280,8 @@ ANTHROPIC_WEB_SEARCH_TOOL_MAX_USES = {
 DEFAULT_IMAGE_ENDPOINT_MODEL = "dall-e-2"
 DEFAULT_VIDEO_ENDPOINT_MODEL = "sora-2"

+DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS = int(os.getenv("DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS", 8))
+
 ### DATAFORSEO CONSTANTS ###
 DEFAULT_DATAFORSEO_LOCATION_CODE = int(
    os.getenv("DEFAULT_DATAFORSEO_LOCATION_CODE", 2250)
--- a/litellm/litellm_core_utils/health_check_helpers.py
+++ b/litellm/litellm_core_utils/health_check_helpers.py
@ -97,6 +97,7 @@ class HealthCheckHelpers:
            "audio_speech",
            "audio_transcription",
            "image_generation",
+            "video_generation",
            "rerank",
            "realtime",
            "batch",
@ -159,6 +160,10 @@ class HealthCheckHelpers:
                **_filter_model_params(model_params=model_params),
                prompt=prompt,
            ),
+            "video_generation": lambda: litellm.avideo_generation(
+                **_filter_model_params(model_params=model_params),
+                prompt=prompt or "test video generation",
+            ),
            "rerank": lambda: litellm.arerank(
                **_filter_model_params(model_params=model_params),
                query=prompt or "",
--- a/litellm/llms/base_llm/videos/transformation.py
+++ b/litellm/llms/base_llm/videos/transformation.py
@ -92,10 +92,11 @@ class BaseVideoConfig(ABC):
        self,
        model: str,
        prompt: str,
+        api_base: str,
        video_create_optional_request_params: Dict,
        litellm_params: GenericLiteLLMParams,
        headers: dict,
-    ) -> Tuple[Dict, RequestFiles]:
+    ) -> Tuple[Dict, RequestFiles, str]:
        pass

    @abstractmethod
@ -104,6 +105,8 @@ class BaseVideoConfig(ABC):
        model: str,
        raw_response: httpx.Response,
        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
+        request_data: Optional[Dict] = None,
    ) -> VideoObject:
        pass

@ -154,6 +157,7 @@ class BaseVideoConfig(ABC):
        self,
        raw_response: httpx.Response,
        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
    ) -> VideoObject:
        pass

@ -181,6 +185,7 @@ class BaseVideoConfig(ABC):
        self,
        raw_response: httpx.Response,
        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
    ) -> Dict[str,str]:
        pass

@ -229,6 +234,7 @@ class BaseVideoConfig(ABC):
        self,
        raw_response: httpx.Response,
        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
    ) -> VideoObject:
        pass

--- a/litellm/llms/custom_httpx/llm_http_handler.py
+++ b/litellm/llms/custom_httpx/llm_http_handler.py
@ -4099,7 +4099,7 @@ class BaseLLMHTTPHandler:
            or {},
            model=model,
        )
-
+        
        if extra_headers:
            headers.update(extra_headers)

@ -4109,12 +4109,13 @@ class BaseLLMHTTPHandler:
            litellm_params=dict(litellm_params),
        )

-        data, files = video_generation_provider_config.transform_video_create_request(
+        data, files, api_base = video_generation_provider_config.transform_video_create_request(
            model=model,
            prompt=prompt,
            video_create_optional_request_params=video_generation_optional_request_params,
            litellm_params=litellm_params,
            headers=headers,
+            api_base=api_base,
        )

        ## LOGGING
@ -4140,8 +4141,8 @@ class BaseLLMHTTPHandler:
                    timeout=timeout,
                )

-            # --- END MOCK VIDEO RESPONSE ---
            else:
+                # Use JSON content type for POST requests without files
                response = sync_httpx_client.post(
                    url=api_base,
                    headers=headers,
@ -4159,6 +4160,8 @@ class BaseLLMHTTPHandler:
            model=model,
            raw_response=response,
            logging_obj=logging_obj,
+            custom_llm_provider=custom_llm_provider,
+            request_data=data,
        )

    async def async_video_generation_handler(
@ -4206,9 +4209,10 @@ class BaseLLMHTTPHandler:
            litellm_params=dict(litellm_params),
        )

-        data, files = video_generation_provider_config.transform_video_create_request(
+        data, files, api_base = video_generation_provider_config.transform_video_create_request(
            model=model,
            prompt=prompt,
+            api_base=api_base,
            video_create_optional_request_params=video_generation_optional_request_params,
            litellm_params=litellm_params,
            headers=headers,
@ -4226,7 +4230,7 @@ class BaseLLMHTTPHandler:
        )

        try:
-            # Use JSON when no files, otherwise use form data with files
+            #Use JSON when no files, otherwise use form data with files
            if files is None or len(files) == 0:
                response = await async_httpx_client.post(
                    url=api_base,
@ -4253,6 +4257,8 @@ class BaseLLMHTTPHandler:
            model=model,
            raw_response=response,
            logging_obj=logging_obj,
+            custom_llm_provider=custom_llm_provider,
+            request_data=data,
        )

    ###### VIDEO CONTENT HANDLER ######
@ -4308,7 +4314,7 @@ class BaseLLMHTTPHandler:
        )

        # Transform the request using the provider config
-        url, params = video_content_provider_config.transform_video_content_request(
+        url, data = video_content_provider_config.transform_video_content_request(
            video_id=video_id,
            api_base=api_base,
            litellm_params=litellm_params,
@ -4316,12 +4322,21 @@ class BaseLLMHTTPHandler:
        )

        try:
-            # Make the GET request to download content
-            response = sync_httpx_client.get(
-                url=url,
-                headers=headers,
-                params=params,
-            )
+            # Use POST if params contains data (e.g., Vertex AI fetchPredictOperation)
+            # Otherwise use GET (e.g., OpenAI video content download)
+            if data:
+                response = sync_httpx_client.post(
+                    url=url,
+                    headers=headers,
+                    json=data,
+                )
+            else:
+                # Otherwise it's a GET request with query params
+                response = sync_httpx_client.get(
+                    url=url,
+                    headers=headers,
+                    params=data,
+                )

            # Transform the response using the provider config
            return video_content_provider_config.transform_video_content_response(
@ -4374,7 +4389,7 @@ class BaseLLMHTTPHandler:
        )

        # Transform the request using the provider config
-        url, params = video_content_provider_config.transform_video_content_request(
+        url, data = video_content_provider_config.transform_video_content_request(
            video_id=video_id,
            api_base=api_base,
            litellm_params=litellm_params,
@ -4382,12 +4397,21 @@ class BaseLLMHTTPHandler:
        )

        try:
-            # Make the GET request to download content
-            response = await async_httpx_client.get(
-                url=url,
-                headers=headers,
-                params=params,
-            )
+            # Use POST if params contains data (e.g., Vertex AI fetchPredictOperation)
+            # Otherwise use GET (e.g., OpenAI video content download)
+            if data:
+                response = await async_httpx_client.post(
+                    url=url,
+                    headers=headers,
+                    json=data,
+                )
+            else:
+                # Otherwise it's a GET request with query params
+                response = await async_httpx_client.get(
+                    url=url,
+                    headers=headers,
+                    params=data,
+                )

            # Transform the response using the provider config
            return video_content_provider_config.transform_video_content_response(
@ -4492,6 +4516,7 @@ class BaseLLMHTTPHandler:
            return video_remix_provider_config.transform_video_remix_response(
                raw_response=response,
                logging_obj=logging_obj,
+                custom_llm_provider=custom_llm_provider,
            )

        except Exception as e:
@ -4573,6 +4598,7 @@ class BaseLLMHTTPHandler:
            return video_remix_provider_config.transform_video_remix_response(
                raw_response=response,
                logging_obj=logging_obj,
+                custom_llm_provider=custom_llm_provider,
            )

        except Exception as e:
@ -4708,6 +4734,7 @@ class BaseLLMHTTPHandler:
            return video_list_provider_config.transform_video_list_response(
                raw_response=response,
                logging_obj=logging_obj,
+                custom_llm_provider=custom_llm_provider,
            )

        except Exception as e:
@ -4863,17 +4890,29 @@ class BaseLLMHTTPHandler:
                "api_base": url,
                "headers": headers,
                "video_id": video_id,
+                "data": data,
            },
        )

        try:
-            response = sync_httpx_client.get(
-                url=url,
-                headers=headers,
-            )
+            # Use POST if data is provided (e.g., Vertex AI fetchPredictOperation)
+            # Otherwise use GET (e.g., OpenAI video status)
+            if data:
+                response = sync_httpx_client.post(
+                    url=url,
+                    headers=headers,
+                    json=data,
+                )
+            else:
+                response = sync_httpx_client.get(
+                    url=url,
+                    headers=headers,
+                )
+
            return video_status_provider_config.transform_video_status_retrieve_response(
                raw_response=response,
                logging_obj=logging_obj,
+                custom_llm_provider=custom_llm_provider,
            )

        except Exception as e:
@ -4937,17 +4976,28 @@ class BaseLLMHTTPHandler:
                "api_base": url,
                "headers": headers,
                "video_id": video_id,
+                "data": data,
            },
        )

        try:
-            response = await async_httpx_client.get(
-                url=url,
-                headers=headers,
-            )
+            # Use POST if data is provided (e.g., Vertex AI fetchPredictOperation)
+            # Otherwise use GET (e.g., OpenAI video status)
+            if data:
+                response = await async_httpx_client.post(
+                    url=url,
+                    headers=headers,
+                    json=data,
+                )
+            else:
+                response = await async_httpx_client.get(
+                    url=url,
+                    headers=headers,
+                )
            return video_status_provider_config.transform_video_status_retrieve_response(
                raw_response=response,
                logging_obj=logging_obj,
+                custom_llm_provider=custom_llm_provider,
            )

        except Exception as e:
--- a/litellm/llms/gemini/videos/init.py
+++ b/litellm/llms/gemini/videos/init.py
@ -0,0 +1,5 @@
+# Gemini Video Generation Support
+from .transformation import GeminiVideoConfig
+
+__all__ = ["GeminiVideoConfig"]
+
--- a/litellm/llms/gemini/videos/transformation.py
+++ b/litellm/llms/gemini/videos/transformation.py
@ -0,0 +1,523 @@
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+import base64
+
+import httpx
+from httpx._types import RequestFiles
+
+from litellm.types.videos.main import VideoCreateOptionalRequestParams, VideoObject
+from litellm.types.router import GenericLiteLLMParams
+from litellm.secret_managers.main import get_secret_str
+from litellm.types.videos.utils import (
+    encode_video_id_with_provider,
+    extract_original_video_id,
+)
+from litellm.images.utils import ImageEditRequestUtils
+import litellm
+from litellm.types.llms.gemini import GeminiLongRunningOperationResponse, GeminiVideoGenerationInstance, GeminiVideoGenerationParameters, GeminiVideoGenerationRequest
+from litellm.constants import DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+    from ...base_llm.videos.transformation import BaseVideoConfig as _BaseVideoConfig
+    from ...base_llm.chat.transformation import BaseLLMException as _BaseLLMException
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+    BaseVideoConfig = _BaseVideoConfig
+    BaseLLMException = _BaseLLMException
+else:
+    LiteLLMLoggingObj = Any
+    BaseVideoConfig = Any
+    BaseLLMException = Any
+
+
+def _convert_image_to_gemini_format(image_file) -> Dict[str, str]:
+    """
+    Convert image file to Gemini format with base64 encoding and MIME type.
+    
+    Args:
+        image_file: File-like object opened in binary mode (e.g., open("path", "rb"))
+    
+    Returns:
+        Dict with bytesBase64Encoded and mimeType
+    """
+    mime_type = ImageEditRequestUtils.get_image_content_type(image_file)
+    
+    if hasattr(image_file, 'seek'):
+        image_file.seek(0)
+    image_bytes = image_file.read()
+    base64_encoded = base64.b64encode(image_bytes).decode("utf-8")
+    
+    return {
+        "bytesBase64Encoded": base64_encoded,
+        "mimeType": mime_type
+    }
+
+
+class GeminiVideoConfig(BaseVideoConfig):
+    """
+    Configuration class for Gemini (Veo) video generation.
+    
+    Veo uses a long-running operation model:
+    1. POST to :predictLongRunning returns operation name
+    2. Poll operation until done=true
+    3. Extract video URI from response
+    4. Download video using file API
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        Get the list of supported OpenAI parameters for Veo video generation.
+        Veo supports minimal parameters compared to OpenAI.
+        """
+        return [
+            "model",
+            "prompt",
+            "input_reference",
+            "seconds",
+            "size"
+        ]
+
+    def map_openai_params(
+        self,
+        video_create_optional_params: VideoCreateOptionalRequestParams,
+        model: str,
+        drop_params: bool,
+    ) -> Dict[str, Any]:
+        """
+        Map OpenAI-style parameters to Veo format.
+        
+        Mappings:
+        - prompt → prompt
+        - input_reference → image
+        - size → aspectRatio (e.g., "1280x720" → "16:9")
+        - seconds → durationSeconds (defaults to 4 seconds if not provided)
+        
+        All other params are passed through as-is to support Gemini-specific parameters.
+        """
+        mapped_params: Dict[str, Any] = {}
+        
+        # Get supported OpenAI params (exclude "model" and "prompt" which are handled separately)
+        supported_openai_params = self.get_supported_openai_params(model)
+        openai_params_to_map = {
+            param for param in supported_openai_params 
+            if param not in {"model", "prompt"}
+        }
+        
+        # Map input_reference to image
+        if "input_reference" in video_create_optional_params:
+            mapped_params["image"] = video_create_optional_params["input_reference"]
+        
+        # Map size to aspectRatio
+        if "size" in video_create_optional_params:
+            size = video_create_optional_params["size"]
+            if size is not None:
+                aspect_ratio = self._convert_size_to_aspect_ratio(size)
+                if aspect_ratio:
+                    mapped_params["aspectRatio"] = aspect_ratio
+        
+        # Map seconds to durationSeconds, default to 4 seconds (matching OpenAI)
+        if "seconds" in video_create_optional_params:
+            seconds = video_create_optional_params["seconds"]
+            try:
+                duration = int(seconds) if isinstance(seconds, str) else seconds
+                if duration is not None:
+                    mapped_params["durationSeconds"] = duration
+            except (ValueError, TypeError):
+                # If conversion fails, use default
+                pass
+        
+        # Pass through any other params that weren't mapped (Gemini-specific params)
+        for key, value in video_create_optional_params.items():
+            if key not in openai_params_to_map and key not in mapped_params:
+                mapped_params[key] = value
+        
+        return mapped_params
+    
+    def _convert_size_to_aspect_ratio(self, size: str) -> Optional[str]:
+        """
+        Convert OpenAI size format to Veo aspectRatio format.
+        
+        https://cloud.google.com/vertex-ai/generative-ai/docs/image/generate-videos
+        
+        Supported aspect ratios: 9:16 (portrait), 16:9 (landscape)
+        """
+        if not size:
+            return None
+        
+        aspect_ratio_map = {
+            "1280x720": "16:9",
+            "1920x1080": "16:9",
+            "720x1280": "9:16",
+            "1080x1920": "9:16",
+        }
+        
+        return aspect_ratio_map.get(size, "16:9")
+
+
+    def validate_environment(
+        self,
+        headers: dict,
+        model: str,
+        api_key: Optional[str] = None,
+    ) -> dict:
+        """
+        Validate environment and add Gemini API key to headers.
+        Gemini uses x-goog-api-key header for authentication.
+        """
+        api_key = (
+            api_key
+            or litellm.api_key
+            or get_secret_str("GOOGLE_API_KEY")
+            or get_secret_str("GEMINI_API_KEY")
+        )
+        
+        if not api_key:
+            raise ValueError(
+                "GEMINI_API_KEY or GOOGLE_API_KEY is required for Veo video generation. "
+                "Set it via environment variable or pass it as api_key parameter."
+            )
+        
+        headers.update({
+            "x-goog-api-key": api_key,
+            "Content-Type": "application/json",
+        })
+        return headers
+
+    def get_complete_url(
+        self,
+        model: str,
+        api_base: Optional[str],
+        litellm_params: dict,
+    ) -> str:
+        """
+        Get the complete URL for Veo video generation.
+        For video creation: returns full URL with :predictLongRunning
+        For status/delete: returns base URL only
+        """
+        if api_base is None:
+            api_base = get_secret_str("GEMINI_API_BASE") or "https://generativelanguage.googleapis.com"
+        
+        if not model or model == "":
+            return api_base.rstrip('/')
+        
+        model_name = model.replace("gemini/", "")
+        url = f"{api_base.rstrip('/')}/v1beta/models/{model_name}:predictLongRunning"
+        
+        return url
+
+    def transform_video_create_request(
+        self,
+        model: str,
+        prompt: str,
+        api_base: str,
+        video_create_optional_request_params: Dict,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> Tuple[Dict, RequestFiles, str]:
+        """
+        Transform the video creation request for Veo API.
+        
+        Veo expects:
+        {
+            "instances": [
+                {
+                    "prompt": "A cat playing with a ball of yarn"
+                }
+            ],
+            "parameters": {
+                "aspectRatio": "16:9",
+                "durationSeconds": 8,
+                "resolution": "720p"
+            }
+        }
+        """
+        instance = GeminiVideoGenerationInstance(prompt=prompt)
+        
+        params_copy = video_create_optional_request_params.copy()
+        
+        if "image" in params_copy and params_copy["image"] is not None:
+            image_data = _convert_image_to_gemini_format(params_copy["image"])
+            params_copy["image"] = image_data
+        
+        parameters = GeminiVideoGenerationParameters(**params_copy)
+        
+        request_body_obj = GeminiVideoGenerationRequest(
+            instances=[instance],
+            parameters=parameters
+        )
+        
+        request_data = request_body_obj.model_dump(exclude_none=True)
+        
+        return request_data, [], api_base
+
+    def transform_video_create_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
+        request_data: Optional[Dict] = None,
+    ) -> VideoObject:
+        """
+        Transform the Veo video creation response.
+        
+        Veo returns:
+        {
+            "name": "operations/generate_1234567890",
+            "metadata": {...},
+            "done": false,
+            "error": {...}
+        }
+        
+        We return this as a VideoObject with:
+        - id: operation name (used for polling)
+        - status: "processing"
+        - usage: includes duration_seconds for cost calculation
+        """        
+        response_data = raw_response.json()
+        
+        # Parse response using Pydantic model for type safety
+        try:
+            operation_response = GeminiLongRunningOperationResponse(**response_data)
+        except Exception as e:
+            raise ValueError(f"Failed to parse operation response: {e}")
+        
+        operation_name = operation_response.name
+        if not operation_name:
+            raise ValueError(f"No operation name in Veo response: {response_data}")
+        
+        if custom_llm_provider:
+            video_id = encode_video_id_with_provider(operation_name, custom_llm_provider, model)
+        else:
+            video_id = operation_name
+        
+        video_obj = VideoObject(
+            id=video_id,
+            object="video",
+            status="processing",
+            model=model,
+        )
+        
+        usage_data = {}
+        if request_data:
+            parameters = request_data.get("parameters", {})
+            duration = parameters.get("durationSeconds") or DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS
+            if duration is not None:
+                try:
+                    usage_data["duration_seconds"] = float(duration)
+                except (ValueError, TypeError):
+                    pass
+        
+        video_obj.usage = usage_data
+        return video_obj
+
+    def transform_video_status_retrieve_request(
+        self,
+        video_id: str,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> Tuple[str, Dict]:
+        """
+        Transform the video status retrieve request for Veo API.
+        
+        Veo polls operations at:
+        GET https://generativelanguage.googleapis.com/v1beta/{operation_name}
+        """
+        operation_name = extract_original_video_id(video_id)
+        url = f"{api_base.rstrip('/')}/v1beta/{operation_name}"
+        params: Dict[str, Any] = {}
+        
+        return url, params
+
+    def transform_video_status_retrieve_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
+    ) -> VideoObject:
+        """
+        Transform the Veo operation status response.
+        
+        Veo returns:
+        {
+            "name": "operations/generate_1234567890",
+            "done": false  # or true when complete
+        }
+        
+        When done=true:
+        {
+            "name": "operations/generate_1234567890",
+            "done": true,
+            "response": {
+                "generateVideoResponse": {
+                    "generatedSamples": [
+                        {
+                            "video": {
+                                "uri": "files/abc123..."
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+        """        
+        response_data = raw_response.json()
+        # Parse response using Pydantic model for type safety
+        operation_response = GeminiLongRunningOperationResponse(**response_data)
+        
+        operation_name = operation_response.name
+        is_done = operation_response.done
+        
+        if custom_llm_provider:
+            video_id = encode_video_id_with_provider(operation_name, custom_llm_provider, None)
+        else:
+            video_id = operation_name
+                   
+        video_obj = VideoObject(
+            id=video_id,
+            object="video",
+            status="processing" if not is_done else "completed"
+        )
+        return video_obj
+
+    def transform_video_content_request(
+        self,
+        video_id: str,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> Tuple[str, Dict]:
+        """
+        Transform the video content request for Veo API.
+        
+        For Veo, we need to:
+        1. Get operation status to extract video URI
+        2. Return download URL for the video
+        """        
+        operation_name = extract_original_video_id(video_id)
+        
+        status_url = f"{api_base.rstrip('/')}/v1beta/{operation_name}"
+        client = litellm.module_level_client
+        status_response = client.get(url=status_url, headers=headers)
+        status_response.raise_for_status()
+        response_data = status_response.json()
+        
+        operation_response = GeminiLongRunningOperationResponse(**response_data)
+
+        if not operation_response.done:
+            raise ValueError(
+                "Video generation is not complete yet. "
+                "Please check status with video_status() before downloading."
+            )
+        
+        if not operation_response.response:
+            raise ValueError("No response data in completed operation")
+        
+        generated_samples = operation_response.response.generateVideoResponse.generatedSamples
+        download_url = generated_samples[0].video.uri
+        
+        params: Dict[str, Any] = {}
+        
+        return download_url, params
+
+    def transform_video_content_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> bytes:
+        """
+        Transform the Veo video content download response.
+        Returns the video bytes directly.
+        """
+        return raw_response.content
+
+    def transform_video_remix_request(
+        self,
+        video_id: str,
+        prompt: str,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+        extra_body: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[str, Dict]:
+        """
+        Video remix is not supported by Veo API.
+        """
+        raise NotImplementedError(
+            "Video remix is not supported by Google Veo. "
+            "Please use video_generation() to create new videos."
+        )
+
+    def transform_video_remix_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
+    ) -> VideoObject:
+        """Video remix is not supported."""
+        raise NotImplementedError("Video remix is not supported by Google Veo.")
+
+    def transform_video_list_request(
+        self,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+        after: Optional[str] = None,
+        limit: Optional[int] = None,
+        order: Optional[str] = None,
+        extra_query: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[str, Dict]:
+        """
+        Video list is not supported by Veo API.
+        """
+        raise NotImplementedError(
+            "Video list is not supported by Google Veo. "
+            "Use the operations endpoint directly if you need to list operations."
+        )
+
+    def transform_video_list_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
+    ) -> Dict[str, str]:
+        """Video list is not supported."""
+        raise NotImplementedError("Video list is not supported by Google Veo.")
+
+    def transform_video_delete_request(
+        self,
+        video_id: str,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> Tuple[str, Dict]:
+        """
+        Video delete is not supported by Veo API.
+        """
+        raise NotImplementedError(
+            "Video delete is not supported by Google Veo. "
+            "Videos are automatically cleaned up by Google."
+        )
+
+    def transform_video_delete_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> VideoObject:
+        """Video delete is not supported."""
+        raise NotImplementedError("Video delete is not supported by Google Veo.")
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        from ..common_utils import GeminiError
+
+        return GeminiError(
+            status_code=status_code,
+            message=error_message,
+            headers=headers,
+        )
+
--- a/litellm/llms/openai/videos/transformation.py
+++ b/litellm/llms/openai/videos/transformation.py
@ -9,6 +9,7 @@ from litellm.types.llms.openai import CreateVideoRequest
 from litellm.types.router import GenericLiteLLMParams
 from litellm.secret_managers.main import get_secret_str
 from litellm.types.videos.main import VideoObject
+from litellm.types.videos.utils import encode_video_id_with_provider, extract_original_video_id
 import litellm
 from litellm.llms.openai.image_edit.transformation import ImageEditRequestUtils
 if TYPE_CHECKING:
@ -94,17 +95,18 @@ class OpenAIVideoConfig(BaseVideoConfig):
        self,
        model: str,
        prompt: str,
+        api_base: str,
        video_create_optional_request_params: Dict,
        litellm_params: GenericLiteLLMParams,
        headers: dict,
-    ) -> Tuple[Dict, RequestFiles]:
+    ) -> Tuple[Dict, RequestFiles, str]:
        """
        Transform the video creation request for OpenAI API.
        """
        # Remove model and extra_headers from optional params as they're handled separately
        video_create_optional_request_params = {
            k: v for k, v in video_create_optional_request_params.items()
-            if k not in ["model", "extra_headers"]
+            if k not in ["model", "extra_headers", "prompt"]
        }
        
        # Create the request data
@ -129,26 +131,24 @@ class OpenAIVideoConfig(BaseVideoConfig):
                image=_input_reference,
                field_name="input_reference",
            )
-        # Convert to dict for JSON serialization
-        return data_without_files, files_list
+        return data_without_files, files_list, api_base

    def transform_video_create_response(
        self,
        model: str,
        raw_response: httpx.Response,
        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
+        request_data: Optional[Dict] = None,
    ) -> VideoObject:
-        """
-        Transform the OpenAI video creation response.
-        """
+        """Transform the OpenAI video creation response."""
        response_data = raw_response.json()
-        
-        # Transform the response data
    
        video_obj = VideoObject(**response_data)  # type: ignore[arg-type]
        
-        # Create usage object with duration information for cost calculation
-        # Video generation API doesn't provide usage, so we create one with duration
+        if custom_llm_provider and video_obj.id:
+            video_obj.id = encode_video_id_with_provider(video_obj.id, custom_llm_provider, model)
+        
        usage_data = {}
        if video_obj:
            if hasattr(video_obj, 'seconds') and video_obj.seconds:
@ -156,9 +156,7 @@ class OpenAIVideoConfig(BaseVideoConfig):
                    usage_data["duration_seconds"] = float(video_obj.seconds)
                except (ValueError, TypeError):
                    pass
-        # Create the response
        video_obj.usage = usage_data
-
        
        return video_obj

@ -175,11 +173,13 @@ class OpenAIVideoConfig(BaseVideoConfig):
        OpenAI API expects the following request:
        - GET /v1/videos/{video_id}/content
        """
+        original_video_id = extract_original_video_id(video_id)
+        
        # Construct the URL for video content download
-        url = f"{api_base.rstrip('/')}/{video_id}/content"
+        url = f"{api_base.rstrip('/')}/{original_video_id}/content"
        
        # Add video_id as query parameter
-        params = {"video_id": video_id}
+        params = {"video_id": original_video_id}
        
        return url, params

@ -198,8 +198,10 @@ class OpenAIVideoConfig(BaseVideoConfig):
        OpenAI API expects the following request:
        - POST /v1/videos/{video_id}/remix
        """
+        original_video_id = extract_original_video_id(video_id)
+        
        # Construct the URL for video remix
-        url = f"{api_base.rstrip('/')}/{video_id}/remix"
+        url = f"{api_base.rstrip('/')}/{original_video_id}/remix"
        
        # Prepare the request data
        data = {"prompt": prompt}
@ -215,17 +217,14 @@ class OpenAIVideoConfig(BaseVideoConfig):
        raw_response: httpx.Response,
        logging_obj: LiteLLMLoggingObj,
    ) -> bytes:
-        """
-        Transform the OpenAI video content download response.
-        Returns raw video content as bytes.
-        """
-        # For video content download, return the raw content as bytes
+        """Transform the OpenAI video content download response."""
        return raw_response.content

    def transform_video_remix_response(
        self,
        raw_response: httpx.Response,
        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
    ) -> VideoObject:
        """
        Transform the OpenAI video remix response.
@ -235,6 +234,9 @@ class OpenAIVideoConfig(BaseVideoConfig):
        # Transform the response data
        video_obj = VideoObject(**response_data)  # type: ignore[arg-type]
        
+        if custom_llm_provider and video_obj.id:
+            video_obj.id = encode_video_id_with_provider(video_obj.id, custom_llm_provider, None)
+        
        # Create usage object with duration information for cost calculation
        # Video remix API doesn't provide usage, so we create one with duration
        usage_data = {}
@ -287,8 +289,20 @@ class OpenAIVideoConfig(BaseVideoConfig):
        self,
        raw_response: httpx.Response,
        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
    ) -> Dict[str,str]:
-        return raw_response.json()
+        response_data = raw_response.json()
+        
+        if custom_llm_provider and "data" in response_data:
+            for video_obj in response_data.get("data", []):
+                if isinstance(video_obj, dict) and "id" in video_obj:
+                    video_obj["id"] = encode_video_id_with_provider(
+                        video_obj["id"], 
+                        custom_llm_provider, 
+                        video_obj.get("model")
+                    )
+        
+        return response_data

    def transform_video_delete_request(
        self,
@ -303,8 +317,10 @@ class OpenAIVideoConfig(BaseVideoConfig):
        OpenAI API expects the following request:
        - DELETE /v1/videos/{video_id}
        """
+        original_video_id = extract_original_video_id(video_id)
+        
        # Construct the URL for video delete
-        url = f"{api_base.rstrip('/')}/{video_id}"
+        url = f"{api_base.rstrip('/')}/{original_video_id}"
        
        # No data needed for DELETE request
        data: Dict[str, Any] = {}
@ -336,8 +352,11 @@ class OpenAIVideoConfig(BaseVideoConfig):
        """
        Transform the OpenAI video retrieve request.
        """
+        # Extract the original video_id (remove provider encoding if present)
+        original_video_id = extract_original_video_id(video_id)
+        
        # For video retrieve, we just need to construct the URL
-        url = f"{api_base.rstrip('/')}/{video_id}"
+        url = f"{api_base.rstrip('/')}/{original_video_id}"
        
        # No additional data needed for GET request
        data: Dict[str, Any] = {}
@ -348,6 +367,7 @@ class OpenAIVideoConfig(BaseVideoConfig):
        self,
        raw_response: httpx.Response,
        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
    ) -> VideoObject:
        """
        Transform the OpenAI video retrieve response.
@ -355,6 +375,9 @@ class OpenAIVideoConfig(BaseVideoConfig):
        response_data = raw_response.json()
        # Transform the response data
        video_obj = VideoObject(**response_data)  # type: ignore[arg-type]
+        
+        if custom_llm_provider and video_obj.id:
+            video_obj.id = encode_video_id_with_provider(video_obj.id, custom_llm_provider, None)

        return video_obj

--- a/litellm/llms/vertex_ai/videos/init.py
+++ b/litellm/llms/vertex_ai/videos/init.py
@ -0,0 +1,10 @@
+"""
+Vertex AI Video Generation Module
+
+This module provides support for Vertex AI's Veo video generation API.
+"""
+
+from .transformation import VertexAIVideoConfig
+
+__all__ = ["VertexAIVideoConfig"]
+
--- a/litellm/llms/vertex_ai/videos/transformation.py
+++ b/litellm/llms/vertex_ai/videos/transformation.py
@ -0,0 +1,597 @@
+"""
+Vertex AI Video Generation Transformation
+
+Handles transformation of requests/responses for Vertex AI's Veo video generation API.
+Based on: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo-video-generation
+"""
+
+import base64
+import time
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+
+import httpx
+from httpx._types import RequestFiles
+
+from litellm.llms.base_llm.videos.transformation import BaseVideoConfig
+from litellm.llms.vertex_ai.common_utils import (
+    _convert_vertex_datetime_to_openai_datetime,
+)
+from litellm.llms.vertex_ai.vertex_llm_base import VertexBase
+from litellm.types.router import GenericLiteLLMParams
+from litellm.types.videos.main import VideoCreateOptionalRequestParams, VideoObject
+from litellm.types.videos.utils import (
+    encode_video_id_with_provider,
+    extract_original_video_id,
+)
+from litellm.images.utils import ImageEditRequestUtils
+from litellm.constants import DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS
+
+if TYPE_CHECKING:
+    from litellm.litellm_core_utils.litellm_logging import Logging as _LiteLLMLoggingObj
+    from litellm.llms.base_llm.chat.transformation import (
+        BaseLLMException as _BaseLLMException,
+    )
+
+    LiteLLMLoggingObj = _LiteLLMLoggingObj
+    BaseLLMException = _BaseLLMException
+else:
+    LiteLLMLoggingObj = Any
+    BaseLLMException = Any
+
+
+def _convert_image_to_vertex_format(image_file) -> Dict[str, str]:
+    """
+    Convert image file to Vertex AI format with base64 encoding and MIME type.
+
+    Args:
+        image_file: File-like object opened in binary mode (e.g., open("path", "rb"))
+
+    Returns:
+        Dict with bytesBase64Encoded and mimeType
+    """
+    mime_type = ImageEditRequestUtils.get_image_content_type(image_file)
+
+    if hasattr(image_file, "seek"):
+        image_file.seek(0)
+    image_bytes = image_file.read()
+    base64_encoded = base64.b64encode(image_bytes).decode("utf-8")
+
+    return {"bytesBase64Encoded": base64_encoded, "mimeType": mime_type}
+
+
+class VertexAIVideoConfig(BaseVideoConfig, VertexBase):
+    """
+    Configuration class for Vertex AI (Veo) video generation.
+
+    Veo uses a long-running operation model:
+    1. POST to :predictLongRunning returns operation name
+    2. Poll operation using :fetchPredictOperation until done=true
+    3. Extract video data (base64) from response
+    """
+
+    def __init__(self):
+        BaseVideoConfig.__init__(self)
+        VertexBase.__init__(self)
+
+    @staticmethod
+    def extract_model_from_operation_name(operation_name: str) -> Optional[str]:
+        """
+        Extract the model name from a Vertex AI operation name.
+        
+        Args:
+            operation_name: Operation name in format:
+                projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL/operations/OPERATION_ID
+        
+        Returns:
+            Model name (e.g., "veo-2.0-generate-001") or None if extraction fails
+        """
+        parts = operation_name.split("/")
+        # Model is at index 7 in the operation name format
+        if len(parts) >= 8:
+            return parts[7]
+        return None
+
+    def get_supported_openai_params(self, model: str) -> list:
+        """
+        Get the list of supported OpenAI parameters for Veo video generation.
+        Veo supports minimal parameters compared to OpenAI.
+        """
+        return ["model", "prompt", "input_reference", "seconds", "size"]
+
+    def map_openai_params(
+        self,
+        video_create_optional_params: VideoCreateOptionalRequestParams,
+        model: str,
+        drop_params: bool,
+    ) -> Dict[str, Any]:
+        """
+        Map OpenAI-style parameters to Veo format.
+
+        Mappings:
+        - prompt → prompt (in instances)
+        - input_reference → image (in instances)
+        - size → aspectRatio (e.g., "1280x720" → "16:9")
+        - seconds → durationSeconds (defaults to 4 seconds if not provided)
+        """
+        mapped_params: Dict[str, Any] = {}
+
+        # Map input_reference to image (will be processed in transform_video_create_request)
+        if "input_reference" in video_create_optional_params:
+            mapped_params["image"] = video_create_optional_params["input_reference"]
+
+        # Map size to aspectRatio
+        if "size" in video_create_optional_params:
+            size = video_create_optional_params["size"]
+            if size is not None:
+                aspect_ratio = self._convert_size_to_aspect_ratio(size)
+                if aspect_ratio:
+                    mapped_params["aspectRatio"] = aspect_ratio
+
+        # Map seconds to durationSeconds, default to 4 seconds (matching OpenAI)
+        if "seconds" in video_create_optional_params:
+            seconds = video_create_optional_params["seconds"]
+            try:
+                duration = int(seconds) if isinstance(seconds, str) else seconds
+                if duration is not None:
+                    mapped_params["durationSeconds"] = duration
+            except (ValueError, TypeError):
+                # If conversion fails, use default
+                pass
+
+        return mapped_params
+
+    def _convert_size_to_aspect_ratio(self, size: str) -> Optional[str]:
+        """
+        Convert OpenAI size format to Veo aspectRatio format.
+
+        Supported aspect ratios: 9:16 (portrait), 16:9 (landscape)
+        """
+        if not size:
+            return None
+
+        aspect_ratio_map = {
+            "1280x720": "16:9",
+            "1920x1080": "16:9",
+            "720x1280": "9:16",
+            "1080x1920": "9:16",
+        }
+
+        return aspect_ratio_map.get(size, "16:9")
+
+    def validate_environment(
+        self,
+        headers: Dict,
+        model: str,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        litellm_params: Optional[dict] = None,
+        **kwargs,
+    ) -> Dict:
+        """
+        Validate environment and return headers for Vertex AI OCR.
+        
+        Vertex AI uses Bearer token authentication with access token from credentials.
+        """
+        # Extract Vertex AI parameters using safe helpers from VertexBase
+        # Use safe_get_* methods that don't mutate litellm_params dict
+        litellm_params = litellm_params or {}
+        
+        vertex_project = VertexBase.safe_get_vertex_ai_project(litellm_params=litellm_params)
+        vertex_credentials = VertexBase.safe_get_vertex_ai_credentials(litellm_params=litellm_params)
+        
+        # Get access token from Vertex credentials
+        access_token, project_id = self.get_access_token(
+            credentials=vertex_credentials,
+            project_id=vertex_project,
+        )
+
+        headers = {
+            "Authorization": f"Bearer {access_token}",
+            "Content-Type": "application/json",
+            **headers,
+        }
+
+        return headers
+
+    def get_complete_url(
+        self,
+        model: str,
+        api_base: Optional[str],
+        litellm_params: dict,
+    ) -> str:
+        """
+        Get the complete URL for Veo video generation.
+
+        Returns URL for :predictLongRunning endpoint:
+        https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL:predictLongRunning
+        """
+        vertex_project = VertexBase.safe_get_vertex_ai_project(litellm_params)
+        vertex_location = VertexBase.safe_get_vertex_ai_location(litellm_params)
+
+        if not vertex_project:
+            raise ValueError(
+                "vertex_project is required for Vertex AI video generation. "
+                "Set it via environment variable VERTEXAI_PROJECT or pass as parameter."
+            )
+
+        # Default to us-central1 if no location specified
+        vertex_location = vertex_location or "us-central1"
+
+        # Extract model name (remove vertex_ai/ prefix if present)
+        model_name = model.replace("vertex_ai/", "")
+
+        # Construct the URL
+        if api_base:
+            base_url = api_base.rstrip("/")
+        else:
+            base_url = f"https://{vertex_location}-aiplatform.googleapis.com"
+
+        url = f"{base_url}/v1/projects/{vertex_project}/locations/{vertex_location}/publishers/google/models/{model_name}"
+
+        return url
+
+    def transform_video_create_request(
+        self,
+        model: str,
+        prompt: str,
+        api_base: str,
+        video_create_optional_request_params: Dict,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> Tuple[Dict, RequestFiles, str]:
+        """
+        Transform the video creation request for Veo API.
+
+        Veo expects:
+        {
+            "instances": [
+                {
+                    "prompt": "A cat playing with a ball of yarn",
+                    "image": {
+                        "bytesBase64Encoded": "...",
+                        "mimeType": "image/jpeg"
+                    }
+                }
+            ],
+            "parameters": {
+                "aspectRatio": "16:9",
+                "durationSeconds": 8
+            }
+        }
+        """
+        # Build instance with prompt
+        instance_dict: Dict[str, Any] = {"prompt": prompt}
+        params_copy = video_create_optional_request_params.copy()
+
+
+        # Check if user wants to provide full instance dict
+        if "instances" in params_copy and isinstance(params_copy["instances"], dict):
+            # Replace/merge with user-provided instance
+            instance_dict.update(params_copy["instances"])
+            params_copy.pop("instances")
+        elif "image" in params_copy and params_copy["image"] is not None:
+            image_data = _convert_image_to_vertex_format(params_copy["image"])
+            instance_dict["image"] = image_data
+            params_copy.pop("image")
+
+        # Build request data directly (TypedDict doesn't have model_dump)
+        request_data: Dict[str, Any] = {"instances": [instance_dict]}
+
+        # Only add parameters if there are any
+        if params_copy:
+            request_data["parameters"] = params_copy
+
+        # Append :predictLongRunning endpoint to api_base
+        url = f"{api_base}:predictLongRunning"
+
+        # No files needed - everything is in JSON
+        return request_data, [], url
+
+    def transform_video_create_response(
+        self,
+        model: str,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
+        request_data: Optional[Dict] = None,
+    ) -> VideoObject:
+        """
+        Transform the Veo video creation response.
+
+        Veo returns:
+        {
+            "name": "projects/PROJECT_ID/locations/LOCATION/publishers/google/models/MODEL/operations/OPERATION_ID"
+        }
+
+        We return this as a VideoObject with:
+        - id: operation name (used for polling)
+        - status: "processing"
+        - usage: includes duration_seconds for cost calculation
+        """
+        response_data = raw_response.json()
+
+        operation_name = response_data.get("name")
+        if not operation_name:
+            raise ValueError(f"No operation name in Veo response: {response_data}")
+
+        if custom_llm_provider:
+            video_id = encode_video_id_with_provider(
+                operation_name, custom_llm_provider, model
+            )
+        else:
+            video_id = operation_name
+
+
+        video_obj = VideoObject(
+            id=video_id,
+            object="video",
+            status="processing",
+            model=model
+        )
+
+        usage_data = {}
+        if request_data:
+            parameters = request_data.get("parameters", {})
+            duration = parameters.get("durationSeconds") or DEFAULT_GOOGLE_VIDEO_DURATION_SECONDS
+            if duration is not None:
+                try:
+                    usage_data["duration_seconds"] = float(duration)
+                except (ValueError, TypeError):
+                    pass
+        
+        video_obj.usage = usage_data
+        return video_obj
+
+    def transform_video_status_retrieve_request(
+        self,
+        video_id: str,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> Tuple[str, Dict]:
+        """
+        Transform the video status retrieve request for Veo API.
+
+        Veo polls operations using :fetchPredictOperation endpoint with POST request.
+        """
+        operation_name = extract_original_video_id(video_id)
+        model = self.extract_model_from_operation_name(operation_name)
+        
+        if not model:
+            raise ValueError(
+                f"Invalid operation name format: {operation_name}. "
+                "Expected format: projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL/operations/OPERATION_ID"
+            )
+
+        # Construct the full URL including model ID
+        # URL format: https://LOCATION-aiplatform.googleapis.com/v1/projects/PROJECT/locations/LOCATION/publishers/google/models/MODEL:fetchPredictOperation
+        # Strip trailing slashes from api_base and append model
+        url = f"{api_base.rstrip('/')}/{model}:fetchPredictOperation"
+
+        # Request body contains the operation name
+        params = {"operationName": operation_name}
+
+        return url, params
+
+    def transform_video_status_retrieve_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
+    ) -> VideoObject:
+        """
+        Transform the Veo operation status response.
+
+        Veo returns:
+        {
+            "name": "projects/.../operations/OPERATION_ID",
+            "done": false  # or true when complete
+        }
+
+        When done=true:
+        {
+            "name": "projects/.../operations/OPERATION_ID",
+            "done": true,
+            "response": {
+                "@type": "type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse",
+                "raiMediaFilteredCount": 0,
+                "videos": [
+                    {
+                        "bytesBase64Encoded": "...",
+                        "mimeType": "video/mp4"
+                    }
+                ]
+            }
+        }
+        """
+        response_data = raw_response.json()
+
+        operation_name = response_data.get("name", "")
+        is_done = response_data.get("done", False)
+        error_data = response_data.get("error")
+
+        # Extract model from operation name
+        model = self.extract_model_from_operation_name(operation_name)
+
+        if custom_llm_provider:
+            video_id = encode_video_id_with_provider(
+                operation_name, custom_llm_provider, model
+            )
+        else:
+            video_id = operation_name
+
+        # Convert createTime to Unix timestamp
+        create_time_str = response_data.get("metadata", {}).get("createTime")
+        if create_time_str:
+            try:
+                created_at = _convert_vertex_datetime_to_openai_datetime(
+                    create_time_str
+                )
+            except Exception:
+                created_at = int(time.time())
+        else:
+            created_at = int(time.time())
+
+        if error_data:
+            status = "failed"
+        elif is_done:
+            status = "completed"
+        else:
+            status = "processing"
+
+        video_obj = VideoObject(
+            id=video_id,
+            object="video",
+            status=status,
+            model=model,
+            created_at=created_at,
+            error=error_data,
+        )
+        return video_obj
+
+    def transform_video_content_request(
+        self,
+        video_id: str,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> Tuple[str, Dict]:
+        """
+        Transform the video content request for Veo API.
+
+        For Veo, we need to:
+        1. Poll the operation status to ensure it's complete
+        2. Extract the base64 video data from the response
+        3. Return it for decoding
+
+        Since we need to make an HTTP call here, we'll use the same fetchPredictOperation
+        approach as status retrieval.
+        """
+        return self.transform_video_status_retrieve_request(video_id, api_base, litellm_params, headers)
+
+    def transform_video_content_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> bytes:
+        """
+        Transform the Veo video content download response.
+
+        Extracts the base64 encoded video from the response and decodes it to bytes.
+        """
+        response_data = raw_response.json()
+
+        if not response_data.get("done", False):
+            raise ValueError(
+                "Video generation is not complete yet. "
+                "Please check status with video_status() before downloading."
+            )
+
+        try:
+            video_response = response_data.get("response", {})
+            videos = video_response.get("videos", [])
+
+            if not videos or len(videos) == 0:
+                raise ValueError("No video data found in completed operation")
+
+            # Get the first video
+            video_data = videos[0]
+            base64_encoded = video_data.get("bytesBase64Encoded")
+
+            if not base64_encoded:
+                raise ValueError("No base64 encoded video data found")
+
+            # Decode base64 to bytes
+            video_bytes = base64.b64decode(base64_encoded)
+            return video_bytes
+
+        except (KeyError, IndexError) as e:
+            raise ValueError(f"Failed to extract video data: {e}")
+
+    def transform_video_remix_request(
+        self,
+        video_id: str,
+        prompt: str,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+        extra_body: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[str, Dict]:
+        """
+        Video remix is not supported by Veo API.
+        """
+        raise NotImplementedError(
+            "Video remix is not supported by Vertex AI Veo. "
+            "Please use video_generation() to create new videos."
+        )
+
+    def transform_video_remix_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
+    ) -> VideoObject:
+        """Video remix is not supported."""
+        raise NotImplementedError("Video remix is not supported by Vertex AI Veo.")
+
+    def transform_video_list_request(
+        self,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+        after: Optional[str] = None,
+        limit: Optional[int] = None,
+        order: Optional[str] = None,
+        extra_query: Optional[Dict[str, Any]] = None,
+    ) -> Tuple[str, Dict]:
+        """
+        Video list is not supported by Veo API.
+        """
+        raise NotImplementedError(
+            "Video list is not supported by Vertex AI Veo. "
+            "Use the operations endpoint directly if you need to list operations."
+        )
+
+    def transform_video_list_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+        custom_llm_provider: Optional[str] = None,
+    ) -> Dict[str, str]:
+        """Video list is not supported."""
+        raise NotImplementedError("Video list is not supported by Vertex AI Veo.")
+
+    def transform_video_delete_request(
+        self,
+        video_id: str,
+        api_base: str,
+        litellm_params: GenericLiteLLMParams,
+        headers: dict,
+    ) -> Tuple[str, Dict]:
+        """
+        Video delete is not supported by Veo API.
+        """
+        raise NotImplementedError(
+            "Video delete is not supported by Vertex AI Veo. "
+            "Videos are automatically cleaned up by Google."
+        )
+
+    def transform_video_delete_response(
+        self,
+        raw_response: httpx.Response,
+        logging_obj: LiteLLMLoggingObj,
+    ) -> VideoObject:
+        """Video delete is not supported."""
+        raise NotImplementedError("Video delete is not supported by Vertex AI Veo.")
+
+    def get_error_class(
+        self, error_message: str, status_code: int, headers: Union[dict, httpx.Headers]
+    ) -> BaseLLMException:
+        from litellm.llms.vertex_ai.common_utils import VertexAIError
+
+        return VertexAIError(
+            status_code=status_code,
+            message=error_message,
+            headers=headers,
+        )
+
--- a/litellm/main.py
+++ b/litellm/main.py
@ -6001,6 +6001,7 @@ async def ahealth_check(
            "audio_speech",
            "audio_transcription",
            "image_generation",
+            "video_generation",
            "batch",
            "rerank",
            "realtime",
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@ -12668,6 +12668,34 @@
            "video"
        ]
    },
+    "gemini/veo-3.1-fast-generate-preview": {
+        "litellm_provider": "gemini",
+        "max_input_tokens": 1024,
+        "max_tokens": 1024,
+        "mode": "video_generation",
+        "output_cost_per_second": 0.15,
+        "source": "https://ai.google.dev/gemini-api/docs/video",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ]
+    },
+    "gemini/veo-3.1-generate-preview": {
+        "litellm_provider": "gemini",
+        "max_input_tokens": 1024,
+        "max_tokens": 1024,
+        "mode": "video_generation",
+        "output_cost_per_second": 0.40,
+        "source": "https://ai.google.dev/gemini-api/docs/video",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ]
+    },
    "google_pse/search": {
        "input_cost_per_query": 0.005,
        "litellm_provider": "google_pse",
@ -23374,6 +23402,34 @@
            "video"
        ]
    },
+    "vertex_ai/veo-3.1-generate-preview": {
+        "litellm_provider": "vertex_ai-video-models",
+        "max_input_tokens": 1024,
+        "max_tokens": 1024,
+        "mode": "video_generation",
+        "output_cost_per_second": 0.4,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ]
+    },
+    "vertex_ai/veo-3.1-fast-generate-preview": {
+        "litellm_provider": "vertex_ai-video-models",
+        "max_input_tokens": 1024,
+        "max_tokens": 1024,
+        "mode": "video_generation",
+        "output_cost_per_second": 0.15,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ]
+    },
    "voyage/rerank-2": {
        "input_cost_per_query": 5e-08,
        "input_cost_per_token": 5e-08,
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -235,6 +235,7 @@ class LiteLLMRoutes(enum.Enum):
        "completion",
        "embeddings",
        "image_generation",
+        "video_generation",
        "audio_transcriptions",
        "moderations",
        "model_list",  # OpenAI /v1/models route
--- a/litellm/proxy/health_endpoints/_health_endpoints.py
+++ b/litellm/proxy/health_endpoints/_health_endpoints.py
@ -947,6 +947,7 @@ async def test_model_connection(
            "audio_speech",
            "audio_transcription",
            "image_generation",
+            "video_generation",
            "batch",
            "rerank",
            "realtime",
--- a/litellm/proxy/video_endpoints/endpoints.py
+++ b/litellm/proxy/video_endpoints/endpoints.py
@ -15,6 +15,7 @@ from litellm.proxy.common_utils.openai_endpoint_utils import (
    get_custom_llm_provider_from_request_headers,
    get_custom_llm_provider_from_request_query,
 )
+from litellm.types.videos.utils import decode_video_id_with_provider

 router = APIRouter()

@ -237,13 +238,15 @@ async def video_status(
    # Create data with video_id
    data: Dict[str, Any] = {"video_id": video_id}

-    # Extract custom_llm_provider from headers, query params, or body
+    decoded = decode_video_id_with_provider(video_id)
+    provider_from_id = decoded.get("custom_llm_provider")
+
    custom_llm_provider = (
        get_custom_llm_provider_from_request_headers(request=request)
        or get_custom_llm_provider_from_request_query(request=request)
        or await get_custom_llm_provider_from_request_body(request=request)
+        or provider_from_id
        or "openai"
-
    )
    if custom_llm_provider:
        data["custom_llm_provider"] = custom_llm_provider
@ -304,7 +307,7 @@ async def video_content(
    
    Example:
    ```bash
-    curl -X GET "http://localhost:4000/v1/videos/video_123/content" \
+    curl -X GET "http://localhost:4000/v1/videos/{video_id}/content" \
        -H "Authorization: Bearer sk-1234" \
        --output video.mp4
    ```
@ -326,11 +329,14 @@ async def video_content(
    # Create data with video_id
    data: Dict[str, Any] = {"video_id": video_id}

-    # Extract custom_llm_provider from headers, query params, or body
+    decoded = decode_video_id_with_provider(video_id)
+    provider_from_id = decoded.get("custom_llm_provider")
+    
    custom_llm_provider = (
        get_custom_llm_provider_from_request_headers(request=request)
        or get_custom_llm_provider_from_request_query(request=request)
        or await get_custom_llm_provider_from_request_body(request=request)
+        or provider_from_id
    )
    if custom_llm_provider:
        data["custom_llm_provider"] = custom_llm_provider
@ -428,11 +434,14 @@ async def video_remix(
    data = orjson.loads(body)
    data["video_id"] = video_id

-    # Extract custom_llm_provider from headers, query params, or body
+    decoded = decode_video_id_with_provider(video_id)
+    provider_from_id = decoded.get("custom_llm_provider")
+
    custom_llm_provider = (
        get_custom_llm_provider_from_request_headers(request=request)
        or get_custom_llm_provider_from_request_query(request=request)
        or data.get("custom_llm_provider")
+        or provider_from_id
    )
    if custom_llm_provider:
        data["custom_llm_provider"] = custom_llm_provider
--- a/litellm/types/llms/gemini.py
+++ b/litellm/types/llms/gemini.py
@ -221,3 +221,125 @@ class GeminiImageGenerationPrediction(TypedDict):
 class GeminiImageGenerationResponse(TypedDict):
    """Complete response body from Gemini image generation API"""
    predictions: List[GeminiImageGenerationPrediction]
+
+# Video Generation Types
+class GeminiVideoGenerationInstance(TypedDict):
+    """Instance data for Gemini video generation request"""
+    prompt: str
+
+
+class GeminiVideoGenerationParameters(BaseModel):
+    """
+    Parameters for Gemini video generation request.
+    
+    See: Veo 3/3.1 parameter guide.
+    """
+    aspectRatio: Optional[str] = None
+    """Aspect ratio for generated video (e.g., '16:9', '9:16')."""
+
+    durationSeconds: Optional[int] = None
+    """
+    Length of the generated video in seconds (e.g., 4, 5, 6, 8).
+    Must be 8 when using extension/interpolation or referenceImages.
+    """
+
+    resolution: Optional[str] = None
+    """
+    Video resolution (e.g., '720p', '1080p').
+    '1080p' only supports 8s duration; extension only supports '720p'.
+    """
+
+    negativePrompt: Optional[str] = None
+    """Text describing what not to include in the video."""
+
+    image: Optional[Any] = None
+    """
+    An initial image to animate (Image object).
+    """
+
+    lastFrame: Optional[Any] = None
+    """
+    The final image for interpolation video to transition.
+    Should be used with the 'image' parameter.
+    """
+
+    referenceImages: Optional[list] = None
+    """
+    Up to three images to be used as style/content references.
+    Only supported in Veo 3.1 (list of VideoGenerationReferenceImage objects).
+    """
+
+    video: Optional[Any] = None
+    """
+    Video to be used for video extension (Video object).
+    Only supported in Veo 3.1 & Veo 3 Fast.
+    """
+
+    personGeneration: Optional[str] = None
+    """
+    Controls the generation of people.
+    Text-to-video & Extension: "allow_all" only
+    Image-to-video, Interpolation, & Reference images (Veo 3.x): "allow_adult" only
+    See documentation for region restrictions & more.
+    """
+
+
+class GeminiVideoGenerationRequest(BaseModel):
+    """Complete request body for Gemini video generation"""
+    instances: List[GeminiVideoGenerationInstance]
+    parameters: Optional[GeminiVideoGenerationParameters] = None
+
+
+# Video Generation Operation Response Types
+class GeminiVideoUri(BaseModel):
+    """Video URI in the generated sample"""
+    uri: str
+    """File URI of the generated video (e.g., 'files/abc123...')"""
+
+
+class GeminiGeneratedVideoSample(BaseModel):
+    """Individual generated video sample"""
+    video: GeminiVideoUri
+    """Video object containing the URI"""
+
+
+class GeminiGenerateVideoResponse(BaseModel):
+    """Generate video response containing the samples"""
+    generatedSamples: List[GeminiGeneratedVideoSample]
+    """List of generated video samples"""
+
+
+class GeminiOperationResponse(BaseModel):
+    """Response object in the operation when done"""
+    generateVideoResponse: GeminiGenerateVideoResponse
+    """Video generation response"""
+
+
+class GeminiOperationMetadata(BaseModel):
+    """Metadata for the operation"""
+    createTime: Optional[str] = None
+    """Creation timestamp"""
+    model: Optional[str] = None
+    """Model used for generation"""
+
+
+class GeminiLongRunningOperationResponse(BaseModel):
+    """
+    Complete response for a long-running operation.
+    
+    Used when polling operation status and extracting results.
+    """
+    name: str
+    """Operation name (e.g., 'operations/generate_1234567890')"""
+    
+    done: bool = False
+    """Whether the operation is complete"""
+    
+    metadata: Optional[GeminiOperationMetadata] = None
+    """Operation metadata"""
+    
+    response: Optional[GeminiOperationResponse] = None
+    """Response object when operation is complete"""
+    
+    error: Optional[Dict[str, Any]] = None
+    """Error details if operation failed"""
--- a/litellm/types/llms/vertex_ai.py
+++ b/litellm/types/llms/vertex_ai.py
@ -638,6 +638,52 @@ class VertexBatchPredictionResponse(TypedDict, total=False):
    modelVersionId: str


+class VertexVideoImage(TypedDict, total=False):
+    """Image input for video generation"""
+
+    bytesBase64Encoded: str
+    mimeType: str
+
+
+class VertexVideoGenerationInstance(TypedDict, total=False):
+    """Instance object for Vertex AI video generation request"""
+
+    prompt: Required[str]
+    image: VertexVideoImage
+
+
+class VertexVideoGenerationParameters(TypedDict, total=False):
+    """Parameters for Vertex AI video generation"""
+
+    aspectRatio: Literal["9:16", "16:9"]
+    durationSeconds: int
+
+
+class VertexVideoGenerationRequest(TypedDict):
+    """Complete request body for Vertex AI video generation"""
+
+    instances: Required[List[VertexVideoGenerationInstance]]
+    parameters: VertexVideoGenerationParameters
+
+
+class VertexVideoOutput(TypedDict, total=False):
+    """Video output in response"""
+
+    bytesBase64Encoded: str
+    mimeType: str
+    gcsUri: str
+
+
+class VertexVideoGenerationResponse(TypedDict, total=False):
+    """Response body for Vertex AI video generation"""
+
+    name: str
+    done: bool
+    response: Dict[str, Any]
+    metadata: Dict[str, Any]
+    error: Dict[str, Any]
+
+
 VERTEX_CREDENTIALS_TYPES = Union[str, Dict[str, str]]


--- a/litellm/types/utils.py
+++ b/litellm/types/utils.py
@ -2788,6 +2788,10 @@ class SpecialEnums(Enum):

    LITELLM_MANAGED_GENERIC_RESPONSE_COMPLETE_STR = "litellm_proxy;model_id:{};generic_response_id:{}"  # generic implementation of 'managed batches' - used for finetuning and any future work.

+    LITELLM_MANAGED_VIDEO_COMPLETE_STR = (
+        "litellm:custom_llm_provider:{};model_id:{};video_id:{}"
+    )
+

 class ServiceTier(Enum):
    """Enum for service tier types used in cost calculations."""
--- a/litellm/types/videos/main.py
+++ b/litellm/types/videos/main.py
@ -10,7 +10,7 @@ class VideoObject(BaseModel):
    id: str
    object: Literal["video"]
    status: str
-    created_at: int
+    created_at: Optional[int] = None
    completed_at: Optional[int] = None
    expires_at: Optional[int] = None
    error: Optional[Dict[str, Any]] = None
@ -87,3 +87,10 @@ class VideoCreateRequestParams(VideoCreateOptionalRequestParams, total=False):
    Params here: https://platform.openai.com/docs/api-reference/videos/create
    """
    prompt: str
+
+class DecodedVideoId(TypedDict, total=False):
+    """Structure representing a decoded video ID"""
+
+    custom_llm_provider: Optional[str]
+    model_id: Optional[str]
+    video_id: str
--- a/litellm/types/videos/utils.py
+++ b/litellm/types/videos/utils.py
@ -0,0 +1,100 @@
+"""
+Utility functions for video ID encoding/decoding with provider information.
+
+Follows the pattern used in responses/utils.py for consistency.
+Format: vid_{base64_encoded_string}
+"""
+import base64
+from typing import Tuple, Optional
+from litellm.types.utils import SpecialEnums
+from litellm.types.videos.main import DecodedVideoId    
+from litellm._logging import verbose_logger
+
+
+
+VIDEO_ID_PREFIX = "video_"
+
+
+def encode_video_id_with_provider(
+    video_id: str, 
+    provider: str,
+    model_id: Optional[str] = None
+) -> str:
+    """Encode provider and model_id into video_id using base64."""
+    if not provider or not video_id:
+        return video_id
+    
+    if video_id.startswith(VIDEO_ID_PREFIX):
+        return video_id
+    
+    assembled_id = str(
+        SpecialEnums.LITELLM_MANAGED_VIDEO_COMPLETE_STR.value
+    ).format(provider, model_id or "", video_id)
+    
+    base64_encoded_id: str = base64.b64encode(assembled_id.encode("utf-8")).decode("utf-8")
+    
+    return f"{VIDEO_ID_PREFIX}{base64_encoded_id}"
+
+
+def decode_video_id_with_provider(encoded_video_id: str) -> DecodedVideoId:
+    """Decode provider and model_id from encoded video_id."""
+    if not encoded_video_id:
+        return DecodedVideoId(
+            custom_llm_provider=None,
+            model_id=None,
+            video_id=encoded_video_id,
+        )
+    
+    if not encoded_video_id.startswith(VIDEO_ID_PREFIX):
+        return DecodedVideoId(
+            custom_llm_provider=None,
+            model_id=None,
+            video_id=encoded_video_id,
+        )
+    
+    try:
+        cleaned_id = encoded_video_id.replace(VIDEO_ID_PREFIX, "")
+        decoded_id = base64.b64decode(cleaned_id.encode("utf-8")).decode("utf-8")
+
+        if ";" not in decoded_id:
+            return DecodedVideoId(
+                custom_llm_provider=None,
+                model_id=None,
+                video_id=encoded_video_id,
+            )
+
+        parts = decoded_id.split(";")
+
+        custom_llm_provider = None
+        model_id = None
+        decoded_video_id = encoded_video_id
+
+        if len(parts) >= 3:
+            custom_llm_provider_part = parts[0]
+            model_id_part = parts[1]
+            video_id_part = parts[2]
+
+            custom_llm_provider = custom_llm_provider_part.replace(
+                "litellm:custom_llm_provider:", ""
+            )
+            model_id = model_id_part.replace("model_id:", "")
+            decoded_video_id = video_id_part.replace("video_id:", "")
+
+        return DecodedVideoId(
+            custom_llm_provider=custom_llm_provider,
+            model_id=model_id,
+            video_id=decoded_video_id,
+        )
+    except Exception as e:
+        verbose_logger.debug(f"Error decoding video_id '{encoded_video_id}': {e}")
+        return DecodedVideoId(
+            custom_llm_provider=None,
+            model_id=None,
+            video_id=encoded_video_id,
+        )
+
+
+def extract_original_video_id(encoded_video_id: str) -> str:
+    """Extract original video ID without encoding."""
+    decoded = decode_video_id_with_provider(encoded_video_id)
+    return decoded.get("video_id", encoded_video_id)
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -7650,6 +7650,16 @@ class ProviderConfigManager:
            from litellm.llms.azure.videos.transformation import AzureVideoConfig

            return AzureVideoConfig()
+        elif LlmProviders.GEMINI == provider:
+            from litellm.llms.gemini.videos.transformation import GeminiVideoConfig
+
+            return GeminiVideoConfig()
+        elif LlmProviders.VERTEX_AI == provider:
+            from litellm.llms.vertex_ai.videos.transformation import (
+                VertexAIVideoConfig,
+            )
+
+            return VertexAIVideoConfig()
        return None

    @staticmethod
--- a/litellm/videos/main.py
+++ b/litellm/videos/main.py
@ -19,6 +19,7 @@ from litellm.types.router import GenericLiteLLMParams
 from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj
 from litellm.llms.base_llm.videos.transformation import BaseVideoConfig
 from litellm.llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler
+from litellm.types.videos.utils import decode_video_id_with_provider

 #################### Initialize provider clients ####################
 llm_http_handler: BaseLLMHTTPHandler = BaseLLMHTTPHandler()
@ -303,13 +304,10 @@ def video_content(
        ```python
        import litellm

-        # Download video content
        video_bytes = litellm.video_content(
-            video_id="video_123",
-            custom_llm_provider="openai"
+            video_id="video_123"
        )

-        # Save to file
        with open("video.mp4", "wb") as f:
            f.write(video_bytes)
        ```
@ -320,9 +318,10 @@ def video_content(
        litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None)
        _is_async = kwargs.pop("async_call", False) is True

-        # Ensure custom_llm_provider is not None - default to openai if not provided
+        # Try to decode provider from video_id if not explicitly provided
        if custom_llm_provider is None:
-            custom_llm_provider = "openai"
+            decoded = decode_video_id_with_provider(video_id)
+            custom_llm_provider = decoded.get("custom_llm_provider") or "openai"

        # get llm provider logic
        litellm_params = GenericLiteLLMParams(**kwargs)
@ -594,9 +593,10 @@ def video_remix(  # noqa: PLR0915
            response = VideoObject(**mock_response)
            return response

-        # Ensure custom_llm_provider is not None - default to openai if not provided
+        # Try to decode provider from video_id if not explicitly provided
        if custom_llm_provider is None:
-            custom_llm_provider = "openai"
+            decoded = decode_video_id_with_provider(video_id)
+            custom_llm_provider = decoded.get("custom_llm_provider") or "openai"

        # get llm provider logic
        litellm_params = GenericLiteLLMParams(**kwargs)
@ -907,7 +907,7 @@ async def avideo_status(

    Returns:
    - `response` (VideoObject): The response returned by the `video_status` function.
-"""
+    """
    local_vars = locals()
    try:
        loop = asyncio.get_event_loop()
@ -1015,8 +1015,7 @@ def video_status(  # noqa: PLR0915

        # Get video status
        video_status = litellm.video_status(
-            video_id="video_123",
-            custom_llm_provider="openai"
+            video_id="video_123"
        )

        print(f"Video status: {video_status.status}")
@ -1038,9 +1037,10 @@ def video_status(  # noqa: PLR0915
            response = VideoObject(**mock_response)
            return response

-        # Ensure custom_llm_provider is not None - default to openai if not provided
+        # Try to decode provider from video_id if not explicitly provided
        if custom_llm_provider is None:
-            custom_llm_provider = "openai"
+            decoded = decode_video_id_with_provider(video_id)
+            custom_llm_provider = decoded.get("custom_llm_provider") or "openai"

        # get llm provider logic
        litellm_params = GenericLiteLLMParams(**kwargs)
--- a/litellm/videos/utils.py
+++ b/litellm/videos/utils.py
@ -1,8 +1,9 @@
-from typing import Any, Dict, cast, get_type_hints
+from typing import Any, Dict, cast

 import litellm
 from litellm.llms.base_llm.videos.transformation import BaseVideoConfig
 from litellm.types.videos.main import VideoCreateOptionalRequestParams
+from litellm.utils import filter_out_litellm_params


 class VideoGenerationRequestUtils:
@ -25,25 +26,6 @@ class VideoGenerationRequestUtils:
        Returns:
            A dictionary of supported parameters for the video generation API
        """
-        # Get supported parameters for the model
-        supported_params = video_generation_provider_config.get_supported_openai_params(model)
-
-        # Check for unsupported parameters
-        unsupported_params = [
-            param
-            for param in video_generation_optional_params
-            if param not in supported_params
-        ]
-
-        if unsupported_params:
-            raise litellm.UnsupportedParamsError(
-                model=model,
-                message=(
-                    f"The following parameters are not supported for model {model}: "
-                    f"{', '.join(unsupported_params)}"
-                ),
-            )
-
        # Map parameters to provider-specific format
        mapped_params = video_generation_provider_config.map_openai_params(
            video_create_optional_params=video_generation_optional_params,
@ -51,6 +33,15 @@ class VideoGenerationRequestUtils:
            drop_params=litellm.drop_params,
        )

+        # Merge extra_body params if present (for provider-specific parameters)
+        if "extra_body" in video_generation_optional_params:
+            extra_body = video_generation_optional_params["extra_body"]
+            if extra_body and isinstance(extra_body, dict):
+                # extra_body params override mapped params
+                mapped_params.update(extra_body)
+            # Remove extra_body from mapped_params since it's not sent to the API
+            mapped_params.pop("extra_body", None)
+
        return mapped_params

    @staticmethod
@ -66,9 +57,44 @@ class VideoGenerationRequestUtils:
        Returns:
            VideoCreateOptionalRequestParams instance with only the valid parameters
        """
-        valid_keys = get_type_hints(VideoCreateOptionalRequestParams).keys()
-        filtered_params = {
-            k: v for k, v in params.items() if k in valid_keys and v is not None
+        params = dict(params or {})
+
+        raw_kwargs = params.get("kwargs", {})
+        if not isinstance(raw_kwargs, dict):
+            raw_kwargs = {}
+
+        kwargs_extra_body = raw_kwargs.pop("extra_body", None)
+        top_level_extra_body = params.get("extra_body")
+
+        base_params_raw = {
+            key: value
+            for key, value in params.items()
+            if key not in {"kwargs", "extra_body", "prompt", "model"} and value is not None
+        }
+        base_params = filter_out_litellm_params(kwargs=base_params_raw)
+
+        cleaned_kwargs = filter_out_litellm_params(
+            kwargs={k: v for k, v in raw_kwargs.items() if v is not None}
+        )
+
+        optional_params: Dict[str, Any] = {
+            **base_params,
+            **cleaned_kwargs,
        }

-        return cast(VideoCreateOptionalRequestParams, filtered_params)
+        merged_extra_body: Dict[str, Any] = {}
+        for extra_body_candidate in (top_level_extra_body, kwargs_extra_body):
+            if isinstance(extra_body_candidate, dict):
+                for key, value in extra_body_candidate.items():
+                    if value is not None:
+                        merged_extra_body[key] = value
+
+        if merged_extra_body:
+            merged_extra_body = filter_out_litellm_params(kwargs=merged_extra_body)
+            if merged_extra_body:
+                optional_params["extra_body"] = merged_extra_body
+                optional_params.update(merged_extra_body)
+
+        optional_params.pop("timeout", None)
+
+        return cast(VideoCreateOptionalRequestParams, optional_params)
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -12668,6 +12668,34 @@
            "video"
        ]
    },
+    "gemini/veo-3.1-fast-generate-preview": {
+        "litellm_provider": "gemini",
+        "max_input_tokens": 1024,
+        "max_tokens": 1024,
+        "mode": "video_generation",
+        "output_cost_per_second": 0.15,
+        "source": "https://ai.google.dev/gemini-api/docs/video",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ]
+    },
+    "gemini/veo-3.1-generate-preview": {
+        "litellm_provider": "gemini",
+        "max_input_tokens": 1024,
+        "max_tokens": 1024,
+        "mode": "video_generation",
+        "output_cost_per_second": 0.40,
+        "source": "https://ai.google.dev/gemini-api/docs/video",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ]
+    },
    "google_pse/search": {
        "input_cost_per_query": 0.005,
        "litellm_provider": "google_pse",
@ -23374,6 +23402,34 @@
            "video"
        ]
    },
+    "vertex_ai/veo-3.1-generate-preview": {
+        "litellm_provider": "vertex_ai-video-models",
+        "max_input_tokens": 1024,
+        "max_tokens": 1024,
+        "mode": "video_generation",
+        "output_cost_per_second": 0.4,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ]
+    },
+    "vertex_ai/veo-3.1-fast-generate-preview": {
+        "litellm_provider": "vertex_ai-video-models",
+        "max_input_tokens": 1024,
+        "max_tokens": 1024,
+        "mode": "video_generation",
+        "output_cost_per_second": 0.15,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/veo",
+        "supported_modalities": [
+            "text"
+        ],
+        "supported_output_modalities": [
+            "video"
+        ]
+    },
    "voyage/rerank-2": {
        "input_cost_per_query": 5e-08,
        "input_cost_per_token": 5e-08,
--- a/package-lock.json
+++ b/package-lock.json
@ -8172,4 +8172,4 @@
      }
    }
  }
-}
+}
--- a/tests/test_litellm/llms/azure/videos/test_azure_video_transformation.py
+++ b/tests/test_litellm/llms/azure/videos/test_azure_video_transformation.py
@ -129,10 +129,12 @@ class TestAzureVideoConfig:
        )
        
        headers = {"Authorization": f"Bearer {self.api_key}"}
+        api_base = f"{self.api_base}/openai/v1/videos"
        
-        data, files = self.config.transform_video_create_request(
+        data, files, url = self.config.transform_video_create_request(
            model=self.model,
            prompt="A cinematic shot of a city at night",
+            api_base=api_base,
            video_create_optional_request_params=video_params,
            litellm_params=litellm_params,
            headers=headers
@ -142,6 +144,8 @@ class TestAzureVideoConfig:
        assert data["seconds"] == 8
        assert data["size"] == "720x1280"
        assert data["model"] == self.model
+        # URL should be returned as-is for Azure
+        assert url == api_base

    def test_transform_video_create_response(self):
        """Test video creation response transformation."""
@ -275,13 +279,15 @@ class TestAzureVideoConfig:
        )
        
        headers = {"Authorization": f"Bearer {self.api_key}"}
+        api_base = f"{self.api_base}/openai/v1/videos"
        
        # Mock file existence
        with patch('os.path.exists', return_value=True):
            with patch('builtins.open', mock_open(read_data=b"fake image data")):
-                data, files = self.config.transform_video_create_request(
+                data, files, url = self.config.transform_video_create_request(
                    model=self.model,
                    prompt="A video with reference image",
+                    api_base=api_base,
                    video_create_optional_request_params=video_params,
                    litellm_params=litellm_params,
                    headers=headers
@ -291,6 +297,7 @@ class TestAzureVideoConfig:
        assert data["seconds"] == 10
        assert len(files) == 1
        assert files[0][0] == "input_reference"
+        assert url == api_base

    def test_error_handling_in_response_transformation(self):
        """Test error handling in response transformation methods."""
--- a/tests/test_litellm/llms/gemini/videos/init.py
+++ b/tests/test_litellm/llms/gemini/videos/init.py
@ -0,0 +1,2 @@
+# Gemini Video Generation Tests
+
--- a/tests/test_litellm/llms/gemini/videos/test_gemini_video_transformation.py
+++ b/tests/test_litellm/llms/gemini/videos/test_gemini_video_transformation.py
@ -0,0 +1,680 @@
+"""
+Tests for Gemini (Veo) video generation transformation.
+"""
+import json
+import os
+from unittest.mock import MagicMock, Mock, patch
+
+import httpx
+import pytest
+
+from litellm.llms.gemini.videos.transformation import GeminiVideoConfig
+from litellm.llms.openai.cost_calculation import video_generation_cost
+from litellm.types.router import GenericLiteLLMParams
+from litellm.types.videos.main import VideoObject
+
+
+class TestGeminiVideoConfig:
+    """Test GeminiVideoConfig transformation class."""
+
+    def setup_method(self):
+        """Setup test fixtures."""
+        self.config = GeminiVideoConfig()
+        self.mock_logging_obj = Mock()
+
+    def test_get_supported_openai_params(self):
+        """Test that correct params are supported."""
+        params = self.config.get_supported_openai_params("veo-3.0-generate-preview")
+        
+        assert "model" in params
+        assert "prompt" in params
+        assert "input_reference" in params
+        assert "seconds" in params
+        assert "size" in params
+
+    def test_validate_environment_with_api_key(self):
+        """Test environment validation with API key."""
+        headers = {}
+        result = self.config.validate_environment(
+            headers=headers,
+            model="veo-3.0-generate-preview",
+            api_key="test-api-key-123"
+        )
+        
+        assert "x-goog-api-key" in result
+        assert result["x-goog-api-key"] == "test-api-key-123"
+        assert "Content-Type" in result
+        assert result["Content-Type"] == "application/json"
+
+    @patch.dict('os.environ', {}, clear=True)
+    def test_validate_environment_missing_api_key(self):
+        """Test that missing API key raises error."""
+        headers = {}
+        
+        with pytest.raises(ValueError, match="GEMINI_API_KEY or GOOGLE_API_KEY is required"):
+            self.config.validate_environment(
+                headers=headers,
+                model="veo-3.0-generate-preview",
+                api_key=None
+            )
+
+    def test_get_complete_url(self):
+        """Test URL construction for video generation."""
+        url = self.config.get_complete_url(
+            model="gemini/veo-3.0-generate-preview",
+            api_base="https://generativelanguage.googleapis.com",
+            litellm_params={}
+        )
+        
+        expected = "https://generativelanguage.googleapis.com/v1beta/models/veo-3.0-generate-preview:predictLongRunning"
+        assert url == expected
+
+    def test_get_complete_url_default_api_base(self):
+        """Test URL construction with default API base."""
+        url = self.config.get_complete_url(
+            model="gemini/veo-3.0-generate-preview",
+            api_base=None,
+            litellm_params={}
+        )
+        
+        assert url.startswith("https://generativelanguage.googleapis.com")
+        assert "veo-3.0-generate-preview:predictLongRunning" in url
+
+    def test_transform_video_create_request(self):
+        """Test transformation of video creation request."""
+        prompt = "A cat playing with a ball of yarn"
+        api_base = "https://generativelanguage.googleapis.com/v1beta/models/veo-3.0-generate-preview:predictLongRunning"
+        
+        data, files, url = self.config.transform_video_create_request(
+            model="veo-3.0-generate-preview",
+            prompt=prompt,
+            api_base=api_base,
+            video_create_optional_request_params={},
+            litellm_params=GenericLiteLLMParams(),
+            headers={}
+        )
+        
+        # Check Veo format
+        assert "instances" in data
+        assert len(data["instances"]) == 1
+        assert data["instances"][0]["prompt"] == prompt
+        
+        # Check no files are uploaded
+        assert files == []
+        
+        # URL should be returned as-is for Gemini
+        assert url == api_base
+    
+    def test_transform_video_create_request_with_params(self):
+        """Test transformation with optional parameters."""
+        prompt = "A cat playing with a ball of yarn"
+        api_base = "https://generativelanguage.googleapis.com/v1beta/models/veo-3.0-generate-preview:predictLongRunning"
+        
+        data, files, url = self.config.transform_video_create_request(
+            model="veo-3.0-generate-preview",
+            prompt=prompt,
+            api_base=api_base,
+            video_create_optional_request_params={
+                "aspectRatio": "16:9",
+                "durationSeconds": 8,
+                "resolution": "1080p"
+            },
+            litellm_params=GenericLiteLLMParams(),
+            headers={}
+        )
+        
+        # Check Veo format with instances and parameters separated
+        instance = data["instances"][0]
+        assert instance["prompt"] == prompt
+        
+        # Parameters should be in a separate object
+        assert "parameters" in data
+        assert data["parameters"]["aspectRatio"] == "16:9"
+        assert data["parameters"]["durationSeconds"] == 8
+        assert data["parameters"]["resolution"] == "1080p"
+    
+    def test_map_openai_params(self):
+        """Test parameter mapping from OpenAI format to Veo format."""
+        openai_params = {
+            "size": "1280x720",
+            "seconds": "8",
+            "input_reference": "test_image.jpg"
+        }
+        
+        mapped = self.config.map_openai_params(
+            video_create_optional_params=openai_params,
+            model="veo-3.0-generate-preview",
+            drop_params=False
+        )
+        
+        # Check mappings (prompt is not mapped, it's passed separately)
+        assert mapped["aspectRatio"] == "16:9"  # 1280x720 is landscape
+        assert mapped["durationSeconds"] == 8
+        assert mapped["image"] == "test_image.jpg"
+
+    def test_map_openai_params_default_duration(self):
+        """Test that durationSeconds is omitted when not provided."""
+        openai_params = {
+            "size": "1280x720",
+        }
+        
+        mapped = self.config.map_openai_params(
+            video_create_optional_params=openai_params,
+            model="veo-3.0-generate-preview",
+            drop_params=False
+        )
+        
+        assert mapped["aspectRatio"] == "16:9"
+        assert "durationSeconds" not in mapped
+
+    def test_map_openai_params_with_gemini_specific_params(self):
+        """Test that Gemini-specific params are passed through correctly."""
+        params_with_gemini_specific = {
+            "size": "1280x720",
+            "seconds": "8",
+            "video": {"bytesBase64Encoded": "abc123", "mimeType": "video/mp4"},
+            "negativePrompt": "no people",
+            "referenceImages": [{"bytesBase64Encoded": "xyz789"}],
+            "personGeneration": "allow"
+        }
+        
+        mapped = self.config.map_openai_params(
+            video_create_optional_params=params_with_gemini_specific,
+            model="veo-3.1-generate-preview",
+            drop_params=False
+        )
+        
+        # Check OpenAI params are mapped
+        assert mapped["aspectRatio"] == "16:9"
+        assert mapped["durationSeconds"] == 8
+        
+        # Check Gemini-specific params are passed through
+        assert "video" in mapped
+        assert mapped["video"]["bytesBase64Encoded"] == "abc123"
+        assert mapped["negativePrompt"] == "no people"
+        assert mapped["referenceImages"] == [{"bytesBase64Encoded": "xyz789"}]
+        assert mapped["personGeneration"] == "allow"
+
+    def test_map_openai_params_with_extra_body(self):
+        """Test that extra_body params are merged and extra_body is removed."""
+        from litellm.videos.utils import VideoGenerationRequestUtils
+        
+        params_with_extra_body = {
+            "seconds": "4",
+            "extra_body": {
+                "negativePrompt": "no people",
+                "personGeneration": "allow",
+                "resolution": "1080p"
+            }
+        }
+        
+        mapped = VideoGenerationRequestUtils.get_optional_params_video_generation(
+            model="veo-3.0-generate-preview",
+            video_generation_provider_config=self.config,
+            video_generation_optional_params=params_with_extra_body
+        )
+        
+        # Check OpenAI params are mapped
+        assert mapped["durationSeconds"] == 4
+        
+        # Check extra_body params are merged
+        assert mapped["negativePrompt"] == "no people"
+        assert mapped["personGeneration"] == "allow"
+        assert mapped["resolution"] == "1080p"
+        
+        # Check extra_body itself is removed
+        assert "extra_body" not in mapped
+    
+    def test_convert_size_to_aspect_ratio(self):
+        """Test size to aspect ratio conversion."""
+        # Landscape
+        assert self.config._convert_size_to_aspect_ratio("1280x720") == "16:9"
+        assert self.config._convert_size_to_aspect_ratio("1920x1080") == "16:9"
+        
+        # Portrait
+        assert self.config._convert_size_to_aspect_ratio("720x1280") == "9:16"
+        assert self.config._convert_size_to_aspect_ratio("1080x1920") == "9:16"
+        
+        # Invalid (defaults to 16:9)
+        assert self.config._convert_size_to_aspect_ratio("invalid") == "16:9"
+        # Empty string returns None (no size specified)
+        assert self.config._convert_size_to_aspect_ratio("") is None
+
+    def test_transform_video_create_response(self):
+        """Test transformation of video creation response."""
+        # Mock response
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "operations/generate_1234567890",
+            "metadata": {
+                "createTime": "2024-11-04T10:00:00.123456Z"
+            }
+        }
+        
+        result = self.config.transform_video_create_response(
+            model="veo-3.0-generate-preview",
+            raw_response=mock_response,
+            logging_obj=self.mock_logging_obj,
+            custom_llm_provider="gemini"
+        )
+        
+        assert isinstance(result, VideoObject)
+        # ID is base64 encoded with provider info
+        assert result.id.startswith("video_")
+        assert result.status == "processing"
+        assert result.object == "video"
+
+
+    def test_transform_video_create_response_with_cost_tracking(self):
+        """Test that duration is captured for cost tracking."""
+        # Mock response
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "operations/generate_1234567890",
+        }
+        
+        # Request data with durationSeconds in parameters
+        request_data = {
+            "instances": [{"prompt": "A test video"}],
+            "parameters": {
+                "durationSeconds": 5,
+                "aspectRatio": "16:9"
+            }
+        }
+        
+        result = self.config.transform_video_create_response(
+            model="gemini/veo-3.0-generate-preview",
+            raw_response=mock_response,
+            logging_obj=self.mock_logging_obj,
+            custom_llm_provider="gemini",
+            request_data=request_data
+        )
+        
+        assert isinstance(result, VideoObject)
+        assert result.usage is not None, "Usage should be set"
+        assert "duration_seconds" in result.usage, "duration_seconds should be in usage"
+        assert result.usage["duration_seconds"] == 5.0, f"Expected 5.0, got {result.usage['duration_seconds']}"
+
+    def test_transform_video_create_response_cost_tracking_with_different_durations(self):
+        """Test cost tracking with different duration values."""
+        # Mock response
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "operations/generate_1234567890",
+        }
+        
+        # Test with 8 seconds
+        request_data_8s = {
+            "instances": [{"prompt": "Test"}],
+            "parameters": {"durationSeconds": 8}
+        }
+        
+        result_8s = self.config.transform_video_create_response(
+            model="gemini/veo-3.1-generate-preview",
+            raw_response=mock_response,
+            logging_obj=self.mock_logging_obj,
+            custom_llm_provider="gemini",
+            request_data=request_data_8s
+        )
+        
+        assert result_8s.usage["duration_seconds"] == 8.0
+        
+        # Test with 4 seconds
+        request_data_4s = {
+            "instances": [{"prompt": "Test"}],
+            "parameters": {"durationSeconds": 4}
+        }
+        
+        result_4s = self.config.transform_video_create_response(
+            model="gemini/veo-3.1-fast-generate-preview",
+            raw_response=mock_response,
+            logging_obj=self.mock_logging_obj,
+            custom_llm_provider="gemini",
+            request_data=request_data_4s
+        )
+        
+        assert result_4s.usage["duration_seconds"] == 4.0
+
+    def test_transform_video_create_response_cost_tracking_no_duration(self):
+        """Test that usage defaults to 8 seconds when no duration in request."""
+        # Mock response
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "operations/generate_1234567890",
+        }
+        
+        # Request data without durationSeconds (should default to 8 seconds for Google Veo)
+        request_data = {
+            "instances": [{"prompt": "A test video"}],
+            "parameters": {
+                "aspectRatio": "16:9"
+            }
+        }
+        
+        result = self.config.transform_video_create_response(
+            model="gemini/veo-3.0-generate-preview",
+            raw_response=mock_response,
+            logging_obj=self.mock_logging_obj,
+            custom_llm_provider="gemini",
+            request_data=request_data
+        )
+        
+        assert isinstance(result, VideoObject)
+        # When no duration is provided, it defaults to 8 seconds (Google Veo default)
+        assert result.usage is not None
+        assert "duration_seconds" in result.usage
+        assert result.usage["duration_seconds"] == 8.0, "Should default to 8 seconds when not provided (Google Veo default)"
+
+    def test_transform_video_status_retrieve_request(self):
+        """Test transformation of status retrieve request."""
+        video_id = "gemini::operations/generate_1234567890::veo-3.0"
+        
+        url, params = self.config.transform_video_status_retrieve_request(
+            video_id=video_id,
+            api_base="https://generativelanguage.googleapis.com",
+            litellm_params=GenericLiteLLMParams(),
+            headers={}
+        )
+        
+        assert "operations/generate_1234567890" in url
+        assert "v1beta" in url
+        assert params == {}
+
+    def test_transform_video_status_retrieve_response_processing(self):
+        """Test transformation of status response when still processing."""
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "operations/generate_1234567890",
+            "done": False,
+            "metadata": {
+                "createTime": "2024-11-04T10:00:00.123456Z"
+            }
+        }
+        
+        result = self.config.transform_video_status_retrieve_response(
+            raw_response=mock_response,
+            logging_obj=self.mock_logging_obj,
+            custom_llm_provider="gemini"
+        )
+        
+        assert isinstance(result, VideoObject)
+        assert result.status == "processing"
+
+    def test_transform_video_status_retrieve_response_completed(self):
+        """Test transformation of status response when completed."""
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "operations/generate_1234567890",
+            "done": True,
+            "metadata": {
+                "createTime": "2024-11-04T10:00:00.123456Z"
+            },
+            "response": {
+                "generateVideoResponse": {
+                    "generatedSamples": [
+                        {
+                            "video": {
+                                "uri": "files/abc123xyz"
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+        
+        result = self.config.transform_video_status_retrieve_response(
+            raw_response=mock_response,
+            logging_obj=self.mock_logging_obj,
+            custom_llm_provider="gemini"
+        )
+        
+        assert isinstance(result, VideoObject)
+        assert result.status == "completed"
+
+    @patch('litellm.module_level_client')
+    def test_transform_video_content_request(self, mock_client):
+        """Test transformation of content download request."""
+        video_id = "gemini::operations/generate_1234567890::veo-3.0"
+        
+        # Mock the status response
+        mock_status_response = Mock(spec=httpx.Response)
+        mock_status_response.json.return_value = {
+            "name": "operations/generate_1234567890",
+            "done": True,
+            "response": {
+                "generateVideoResponse": {
+                    "generatedSamples": [
+                        {
+                            "video": {
+                                "uri": "files/abc123xyz"
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+        mock_status_response.raise_for_status = Mock()
+        mock_client.get.return_value = mock_status_response
+        
+        url, params = self.config.transform_video_content_request(
+            video_id=video_id,
+            api_base="https://generativelanguage.googleapis.com",
+            litellm_params=GenericLiteLLMParams(),
+            headers={}
+        )
+        
+        # Should return download URL (may or may not include :download suffix)
+        assert "files/abc123xyz" in url
+        # Params are empty for Gemini file URIs
+        assert params == {}
+
+    def test_transform_video_content_response_bytes(self):
+        """Test transformation of content response (returns bytes directly)."""
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.headers = httpx.Headers({
+            "content-type": "video/mp4"
+        })
+        mock_response.content = b"fake_video_data"
+        
+        result = self.config.transform_video_content_response(
+            raw_response=mock_response,
+            logging_obj=self.mock_logging_obj
+        )
+        
+        assert result == b"fake_video_data"
+
+    def test_video_remix_not_supported(self):
+        """Test that video remix raises NotImplementedError."""
+        with pytest.raises(NotImplementedError, match="Video remix is not supported"):
+            self.config.transform_video_remix_request(
+                video_id="test_id",
+                prompt="test prompt",
+                api_base="https://test.com",
+                litellm_params=GenericLiteLLMParams(),
+                headers={}
+            )
+
+    def test_video_list_not_supported(self):
+        """Test that video list raises NotImplementedError."""
+        with pytest.raises(NotImplementedError, match="Video list is not supported"):
+            self.config.transform_video_list_request(
+                api_base="https://test.com",
+                litellm_params=GenericLiteLLMParams(),
+                headers={}
+            )
+
+    def test_video_delete_not_supported(self):
+        """Test that video delete raises NotImplementedError."""
+        with pytest.raises(NotImplementedError, match="Video delete is not supported"):
+            self.config.transform_video_delete_request(
+                video_id="test_id",
+                api_base="https://test.com",
+                litellm_params=GenericLiteLLMParams(),
+                headers={}
+            )
+
+
+class TestGeminiVideoIntegration:
+    """Integration tests for Gemini video generation workflow."""
+
+    def test_full_workflow_mock(self):
+        """Test full workflow with mocked responses."""
+        config = GeminiVideoConfig()
+        mock_logging_obj = Mock()
+        
+        # Step 1: Create request with parameters
+        prompt = "A beautiful sunset over mountains"
+        api_base = "https://generativelanguage.googleapis.com/v1beta/models/veo-3.0-generate-preview:predictLongRunning"
+        data, files, url = config.transform_video_create_request(
+            model="veo-3.0-generate-preview",
+            prompt=prompt,
+            api_base=api_base,
+            video_create_optional_request_params={
+                "aspectRatio": "16:9",
+                "durationSeconds": 8
+            },
+            litellm_params=GenericLiteLLMParams(),
+            headers={}
+        )
+        
+        # Verify instances and parameters structure
+        assert data["instances"][0]["prompt"] == prompt
+        assert data["parameters"]["aspectRatio"] == "16:9"
+        assert data["parameters"]["durationSeconds"] == 8
+        
+        # Step 2: Parse create response
+        mock_create_response = Mock(spec=httpx.Response)
+        mock_create_response.json.return_value = {
+            "name": "operations/generate_abc123",
+            "metadata": {
+                "createTime": "2024-11-04T10:00:00.123456Z"
+            }
+        }
+        
+        video_obj = config.transform_video_create_response(
+            model="veo-3.0-generate-preview",
+            raw_response=mock_create_response,
+            logging_obj=mock_logging_obj,
+            custom_llm_provider="gemini"
+        )
+        
+        assert video_obj.status == "processing"
+        assert video_obj.id.startswith("video_")
+        
+        # Step 3: Check status (completed)
+        mock_status_response = Mock(spec=httpx.Response)
+        mock_status_response.json.return_value = {
+            "name": "operations/generate_abc123",
+            "done": True,
+            "metadata": {
+                "createTime": "2024-11-04T10:00:00.123456Z"
+            },
+            "response": {
+                "generateVideoResponse": {
+                    "generatedSamples": [
+                        {
+                            "video": {
+                                "uri": "files/video123"
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+        
+        status_obj = config.transform_video_status_retrieve_response(
+            raw_response=mock_status_response,
+            logging_obj=mock_logging_obj,
+            custom_llm_provider="gemini"
+        )
+        
+        assert status_obj.status == "completed"
+
+
+class TestGeminiVideoCostTracking:
+    """Test cost tracking for Gemini video generation."""
+    
+    def test_cost_calculation_with_duration(self):
+        """Test that cost is calculated correctly using duration from usage."""
+        # Test VEO 2.0 ($0.35/second)
+        cost_veo2 = video_generation_cost(
+            model="gemini/veo-2.0-generate-001",
+            duration_seconds=5.0,
+            custom_llm_provider="gemini"
+        )
+        expected_veo2 = 0.35 * 5.0  # $1.75
+        assert abs(cost_veo2 - expected_veo2) < 0.001, f"Expected ${expected_veo2}, got ${cost_veo2}"
+        
+        # Test VEO 3.0 ($0.75/second)
+        cost_veo3 = video_generation_cost(
+            model="gemini/veo-3.0-generate-preview",
+            duration_seconds=8.0,
+            custom_llm_provider="gemini"
+        )
+        expected_veo3 = 0.75 * 8.0  # $6.00
+        assert abs(cost_veo3 - expected_veo3) < 0.001, f"Expected ${expected_veo3}, got ${cost_veo3}"
+        
+        # Test VEO 3.1 Standard ($0.40/second)
+        cost_veo31 = video_generation_cost(
+            model="gemini/veo-3.1-generate-preview",
+            duration_seconds=10.0,
+            custom_llm_provider="gemini"
+        )
+        expected_veo31 = 0.40 * 10.0  # $4.00
+        assert abs(cost_veo31 - expected_veo31) < 0.001, f"Expected ${expected_veo31}, got ${cost_veo31}"
+        
+        # Test VEO 3.1 Fast ($0.15/second)
+        cost_veo31_fast = video_generation_cost(
+            model="gemini/veo-3.1-fast-generate-preview",
+            duration_seconds=6.0,
+            custom_llm_provider="gemini"
+        )
+        expected_veo31_fast = 0.15 * 6.0  # $0.90
+        assert abs(cost_veo31_fast - expected_veo31_fast) < 0.001, f"Expected ${expected_veo31_fast}, got ${cost_veo31_fast}"
+    
+    def test_cost_calculation_end_to_end(self):
+        """Test complete cost tracking flow: request -> response -> cost calculation."""
+        config = GeminiVideoConfig()
+        mock_logging_obj = Mock()
+        
+        # Create request with duration
+        request_data = {
+            "instances": [{"prompt": "A beautiful sunset"}],
+            "parameters": {"durationSeconds": 5}
+        }
+        
+        # Mock response
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "operations/generate_test123",
+        }
+        
+        # Transform response
+        video_obj = config.transform_video_create_response(
+            model="gemini/veo-3.0-generate-preview",
+            raw_response=mock_response,
+            logging_obj=mock_logging_obj,
+            custom_llm_provider="gemini",
+            request_data=request_data
+        )
+        
+        # Verify usage has duration
+        assert video_obj.usage is not None
+        assert "duration_seconds" in video_obj.usage
+        duration = video_obj.usage["duration_seconds"]
+        
+        # Calculate cost using the duration from usage
+        cost = video_generation_cost(
+            model="gemini/veo-3.0-generate-preview",
+            duration_seconds=duration,
+            custom_llm_provider="gemini"
+        )
+        
+        # Verify cost calculation (VEO 3.0 is $0.75/second)
+        expected_cost = 0.75 * 5.0  # $3.75
+        assert abs(cost - expected_cost) < 0.001, f"Expected ${expected_cost}, got ${cost}"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
+
--- a/tests/test_litellm/llms/vertex_ai/videos/init.py
+++ b/tests/test_litellm/llms/vertex_ai/videos/init.py
@ -0,0 +1,4 @@
+"""
+Tests for Vertex AI video generation.
+"""
+
--- a/tests/test_litellm/llms/vertex_ai/videos/test_vertex_video_transformation.py
+++ b/tests/test_litellm/llms/vertex_ai/videos/test_vertex_video_transformation.py
@ -0,0 +1,550 @@
+"""
+Tests for Vertex AI (Veo) video generation transformation.
+"""
+import json
+import os
+import pytest
+from unittest.mock import Mock, MagicMock, patch
+import httpx
+import base64
+
+from litellm.llms.vertex_ai.videos.transformation import (
+    VertexAIVideoConfig,
+    _convert_image_to_vertex_format,
+)
+from litellm.types.videos.main import VideoObject
+from litellm.types.router import GenericLiteLLMParams
+
+
+class TestVertexAIVideoConfig:
+    """Test VertexAIVideoConfig transformation class."""
+
+    def setup_method(self):
+        """Setup test fixtures."""
+        self.config = VertexAIVideoConfig()
+        self.mock_logging_obj = Mock()
+
+    def test_get_supported_openai_params(self):
+        """Test that correct params are supported."""
+        params = self.config.get_supported_openai_params("veo-002")
+
+        assert "model" in params
+        assert "prompt" in params
+        assert "input_reference" in params
+        assert "seconds" in params
+        assert "size" in params
+
+    @patch.object(VertexAIVideoConfig, 'get_access_token')
+    def test_validate_environment(self, mock_get_access_token):
+        """Test environment validation for Vertex AI."""
+        # Mock the authentication
+        mock_get_access_token.return_value = ("mock-access-token", "test-project")
+        
+        headers = {}
+        litellm_params = {"vertex_project": "test-project"}
+        
+        result = self.config.validate_environment(
+            headers=headers,
+            model="veo-002",
+            api_key=None,
+            litellm_params=litellm_params
+        )
+
+        # Should add Authorization header
+        assert "Authorization" in result
+        assert result["Authorization"] == "Bearer mock-access-token"
+        assert "Content-Type" in result
+
+    def test_get_complete_url(self):
+        """Test URL construction for Vertex AI video generation."""
+        litellm_params = {
+            "vertex_project": "test-project",
+            "vertex_location": "us-central1",
+        }
+
+        url = self.config.get_complete_url(
+            model="vertex_ai/veo-002", api_base=None, litellm_params=litellm_params
+        )
+
+        expected = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002"
+        assert url == expected
+        # Should NOT include endpoint - that's added by transform methods
+        assert not url.endswith(":predictLongRunning")
+
+    def test_get_complete_url_with_custom_api_base(self):
+        """Test URL construction with custom API base."""
+        litellm_params = {
+            "vertex_project": "test-project",
+            "vertex_location": "us-west1",
+        }
+
+        url = self.config.get_complete_url(
+            model="veo-002",
+            api_base="https://custom-endpoint.example.com",
+            litellm_params=litellm_params,
+        )
+
+        assert url.startswith("https://custom-endpoint.example.com")
+        assert "test-project" in url
+        assert "us-west1" in url
+        assert "veo-002" in url
+        # Should NOT include endpoint
+        assert not url.endswith(":predictLongRunning")
+
+    def test_get_complete_url_missing_project(self):
+        """Test that missing vertex_project raises error."""
+        litellm_params = {}
+
+        # Note: The method might not raise if vertex_project can be fetched from env
+        # This test verifies the behavior when completely missing
+        try:
+            url = self.config.get_complete_url(
+                model="veo-002", api_base=None, litellm_params=litellm_params
+            )
+            # If no error is raised, vertex_project was obtained from environment
+            # In that case, just verify a URL was returned
+            assert url is not None
+        except ValueError as e:
+            # Expected behavior when vertex_project is truly missing
+            assert "vertex_project is required" in str(e)
+
+    def test_get_complete_url_default_location(self):
+        """Test URL construction with default location."""
+        litellm_params = {"vertex_project": "test-project"}
+
+        url = self.config.get_complete_url(
+            model="veo-002", api_base=None, litellm_params=litellm_params
+        )
+
+        # Should default to us-central1
+        assert "us-central1" in url
+        # Should NOT include endpoint
+        assert not url.endswith(":predictLongRunning")
+
+    def test_transform_video_create_request(self):
+        """Test transformation of video creation request."""
+        prompt = "A cat playing with a ball of yarn"
+        api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002"
+
+        data, files, url = self.config.transform_video_create_request(
+            model="veo-002",
+            prompt=prompt,
+            api_base=api_base,
+            video_create_optional_request_params={},
+            litellm_params=GenericLiteLLMParams(),
+            headers={},
+        )
+
+        # Check Vertex AI format
+        assert "instances" in data
+        assert len(data["instances"]) == 1
+        assert data["instances"][0]["prompt"] == prompt
+
+        # Parameters should not be present when empty
+        assert "parameters" not in data or data["parameters"] == {}
+
+        # Check URL has :predictLongRunning appended
+        assert url.endswith(":predictLongRunning")
+        assert api_base in url
+
+        # Check no files are uploaded
+        assert files == []
+
+    def test_transform_video_create_request_with_parameters(self):
+        """Test video creation request with aspect ratio and duration."""
+        prompt = "A dog running in a park"
+        api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002"
+
+        data, files, url = self.config.transform_video_create_request(
+            model="veo-002",
+            prompt=prompt,
+            api_base=api_base,
+            video_create_optional_request_params={
+                "aspectRatio": "16:9",
+                "durationSeconds": 8,
+            },
+            litellm_params=GenericLiteLLMParams(),
+            headers={},
+        )
+
+        assert data["instances"][0]["prompt"] == prompt
+        assert data["parameters"]["aspectRatio"] == "16:9"
+        assert data["parameters"]["durationSeconds"] == 8
+        assert url.endswith(":predictLongRunning")
+
+    def test_transform_video_create_request_with_image(self):
+        """Test video creation request with image input."""
+        prompt = "Extend this image with animation"
+        api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002"
+
+        # Create a mock image file
+        mock_image = Mock()
+        mock_image.read.return_value = b"fake_image_data"
+        mock_image.seek = Mock()
+
+        with patch(
+            "litellm.llms.vertex_ai.videos.transformation.ImageEditRequestUtils.get_image_content_type",
+            return_value="image/jpeg",
+        ):
+            data, files, url = self.config.transform_video_create_request(
+                model="veo-002",
+                prompt=prompt,
+                api_base=api_base,
+                video_create_optional_request_params={"image": mock_image},
+                litellm_params=GenericLiteLLMParams(),
+                headers={},
+            )
+
+        # Check image was converted to base64
+        assert "image" in data["instances"][0]
+        assert "bytesBase64Encoded" in data["instances"][0]["image"]
+        assert "mimeType" in data["instances"][0]["image"]
+        assert data["instances"][0]["image"]["mimeType"] == "image/jpeg"
+        assert url.endswith(":predictLongRunning")
+
+    def test_map_openai_params(self):
+        """Test parameter mapping from OpenAI to Vertex AI format."""
+        openai_params = {"seconds": "8", "size": "1280x720"}
+
+        mapped = self.config.map_openai_params(
+            video_create_optional_params=openai_params,
+            model="veo-002",
+            drop_params=False,
+        )
+
+        assert mapped["durationSeconds"] == 8
+        assert mapped["aspectRatio"] == "16:9"
+
+    def test_map_openai_params_default_duration(self):
+        """Test that durationSeconds is omitted when not provided."""
+        openai_params = {"size": "1280x720"}
+
+        mapped = self.config.map_openai_params(
+            video_create_optional_params=openai_params,
+            model="veo-002",
+            drop_params=False,
+        )
+
+        assert mapped["aspectRatio"] == "16:9"
+        assert "durationSeconds" not in mapped
+
+    def test_map_openai_params_size_conversions(self):
+        """Test size to aspect ratio conversions."""
+        test_cases = [
+            ("1280x720", "16:9"),
+            ("1920x1080", "16:9"),
+            ("720x1280", "9:16"),
+            ("1080x1920", "9:16"),
+            ("unknown", "16:9"),  # Default
+        ]
+
+        for size, expected_ratio in test_cases:
+            mapped = self.config.map_openai_params(
+                video_create_optional_params={"size": size},
+                model="veo-002",
+                drop_params=False,
+            )
+            assert mapped["aspectRatio"] == expected_ratio
+
+    def test_transform_video_create_response(self):
+        """Test transformation of video creation response."""
+        # Mock response with operation name
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+            "metadata": {"createTime": "2024-01-15T10:30:00.000Z"},
+        }
+
+        video_obj = self.config.transform_video_create_response(
+            model="vertex_ai/veo-002",
+            raw_response=mock_response,
+            logging_obj=self.mock_logging_obj,
+            custom_llm_provider="vertex_ai",
+        )
+
+        assert isinstance(video_obj, VideoObject)
+        assert video_obj.status == "processing"
+        assert video_obj.object == "video"
+        # Video ID is encoded with provider info, so just check it's not empty
+        assert video_obj.id
+        assert len(video_obj.id) > 0
+
+    def test_transform_video_create_response_missing_operation_name(self):
+        """Test that missing operation name raises error."""
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {}
+
+        with pytest.raises(ValueError, match="No operation name in Veo response"):
+            self.config.transform_video_create_response(
+                model="veo-002",
+                raw_response=mock_response,
+                logging_obj=self.mock_logging_obj,
+            )
+
+    def test_transform_video_status_retrieve_request(self):
+        """Test transformation of video status retrieve request."""
+        operation_name = "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345"
+        
+        # Provide an api_base that would be returned from get_complete_url
+        api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002"
+
+        url, params = self.config.transform_video_status_retrieve_request(
+            video_id=operation_name,
+            api_base=api_base,
+            litellm_params=GenericLiteLLMParams(),
+            headers={},
+        )
+
+        # Check URL contains fetchPredictOperation endpoint
+        assert "fetchPredictOperation" in url
+        assert "test-project" in url
+        assert "us-central1" in url
+        assert "veo-002" in url
+
+        # Check params contain operation name
+        assert params["operationName"] == operation_name
+
+    def test_transform_video_status_retrieve_request_invalid_format(self):
+        """Test that invalid operation name format raises error."""
+        invalid_operation_name = "invalid/operation/name"
+
+        with pytest.raises(ValueError, match="Invalid operation name format"):
+            self.config.transform_video_status_retrieve_request(
+                video_id=invalid_operation_name,
+                api_base=None,
+                litellm_params=GenericLiteLLMParams(),
+                headers={},
+            )
+
+    def test_transform_video_status_retrieve_response_processing(self):
+        """Test transformation of status response while processing."""
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+            "done": False,
+            "metadata": {"createTime": "2024-01-15T10:30:00.000Z"},
+        }
+
+        video_obj = self.config.transform_video_status_retrieve_response(
+            raw_response=mock_response,
+            logging_obj=self.mock_logging_obj,
+            custom_llm_provider="vertex_ai",
+        )
+
+        assert isinstance(video_obj, VideoObject)
+        assert video_obj.status == "processing"
+
+    def test_transform_video_status_retrieve_response_completed(self):
+        """Test transformation of status response when completed."""
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+            "done": True,
+            "metadata": {"createTime": "2024-01-15T10:30:00.000Z"},
+            "response": {
+                "@type": "type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse",
+                "raiMediaFilteredCount": 0,
+                "videos": [
+                    {
+                        "bytesBase64Encoded": base64.b64encode(
+                            b"fake_video_data"
+                        ).decode(),
+                        "mimeType": "video/mp4",
+                    }
+                ],
+            },
+        }
+
+        video_obj = self.config.transform_video_status_retrieve_response(
+            raw_response=mock_response,
+            logging_obj=self.mock_logging_obj,
+            custom_llm_provider="vertex_ai",
+        )
+
+        assert isinstance(video_obj, VideoObject)
+        assert video_obj.status == "completed"
+
+    def test_transform_video_status_retrieve_response_error(self):
+        """Test transformation of status response when an error is returned."""
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+            "done": True,
+            "metadata": {"createTime": "2024-01-15T10:30:00.000Z"},
+            "error": {
+                "code": 3,
+                "message": "Unsupported output video duration 3 seconds, supported durations are [8,5,6,7] for feature text_to_video.",
+            },
+        }
+
+        video_obj = self.config.transform_video_status_retrieve_response(
+            raw_response=mock_response,
+            logging_obj=self.mock_logging_obj,
+            custom_llm_provider="vertex_ai",
+        )
+
+        assert isinstance(video_obj, VideoObject)
+        assert video_obj.status == "failed"
+        assert video_obj.error == mock_response.json.return_value["error"]
+
+    def test_transform_video_content_request(self):
+        """Test transformation of video content request."""
+        operation_name = "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345"
+        api_base = "https://us-central1-aiplatform.googleapis.com/v1/projects/test-project/locations/us-central1/publishers/google/models/veo-002"
+
+        url, params = self.config.transform_video_content_request(
+            video_id=operation_name,
+            api_base=api_base,
+            litellm_params=GenericLiteLLMParams(),
+            headers={},
+        )
+
+        # Should use same fetchPredictOperation endpoint
+        assert "fetchPredictOperation" in url
+        assert params["operationName"] == operation_name
+
+    def test_transform_video_content_response(self):
+        """Test transformation of video content response."""
+        fake_video_bytes = b"fake_video_data_12345"
+        encoded_video = base64.b64encode(fake_video_bytes).decode()
+
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+            "done": True,
+            "response": {
+                "@type": "type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse",
+                "videos": [
+                    {"bytesBase64Encoded": encoded_video, "mimeType": "video/mp4"}
+                ],
+            },
+        }
+
+        video_bytes = self.config.transform_video_content_response(
+            raw_response=mock_response, logging_obj=self.mock_logging_obj
+        )
+
+        assert isinstance(video_bytes, bytes)
+        assert video_bytes == fake_video_bytes
+
+    def test_transform_video_content_response_not_complete(self):
+        """Test that incomplete video raises error."""
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+            "done": False,
+        }
+
+        with pytest.raises(
+            ValueError, match="Video generation is not complete yet"
+        ):
+            self.config.transform_video_content_response(
+                raw_response=mock_response, logging_obj=self.mock_logging_obj
+            )
+
+    def test_transform_video_content_response_missing_video_data(self):
+        """Test that missing video data raises error."""
+        mock_response = Mock(spec=httpx.Response)
+        mock_response.json.return_value = {
+            "name": "projects/test-project/locations/us-central1/publishers/google/models/veo-002/operations/12345",
+            "done": True,
+            "response": {"videos": []},
+        }
+
+        with pytest.raises(ValueError, match="No video data found"):
+            self.config.transform_video_content_response(
+                raw_response=mock_response, logging_obj=self.mock_logging_obj
+            )
+
+    def test_transform_video_remix_request_not_supported(self):
+        """Test that video remix raises NotImplementedError."""
+        with pytest.raises(
+            NotImplementedError, match="Video remix is not supported"
+        ):
+            self.config.transform_video_remix_request(
+                video_id="test-video-id",
+                prompt="new prompt",
+                api_base="https://example.com",
+                litellm_params=GenericLiteLLMParams(),
+                headers={},
+            )
+
+    def test_transform_video_list_request_not_supported(self):
+        """Test that video list raises NotImplementedError."""
+        with pytest.raises(NotImplementedError, match="Video list is not supported"):
+            self.config.transform_video_list_request(
+                api_base="https://example.com",
+                litellm_params=GenericLiteLLMParams(),
+                headers={},
+            )
+
+    def test_transform_video_delete_request_not_supported(self):
+        """Test that video delete raises NotImplementedError."""
+        with pytest.raises(
+            NotImplementedError, match="Video delete is not supported"
+        ):
+            self.config.transform_video_delete_request(
+                video_id="test-video-id",
+                api_base="https://example.com",
+                litellm_params=GenericLiteLLMParams(),
+                headers={},
+            )
+
+    def test_get_error_class(self):
+        """Test error class generation."""
+        error = self.config.get_error_class(
+            error_message="Test error", status_code=500, headers={}
+        )
+
+        # Should return VertexAIError
+        from litellm.llms.vertex_ai.common_utils import VertexAIError
+
+        assert isinstance(error, VertexAIError)
+        assert error.status_code == 500
+        assert "Test error" in str(error)
+
+
+class TestConvertImageToVertexFormat:
+    """Test the _convert_image_to_vertex_format helper function."""
+
+    def test_convert_image_to_vertex_format(self):
+        """Test image conversion to Vertex AI format."""
+        fake_image_data = b"fake_jpeg_image_data"
+        mock_image = Mock()
+        mock_image.read.return_value = fake_image_data
+        mock_image.seek = Mock()
+
+        with patch(
+            "litellm.llms.vertex_ai.videos.transformation.ImageEditRequestUtils.get_image_content_type",
+            return_value="image/jpeg",
+        ):
+            result = _convert_image_to_vertex_format(mock_image)
+
+        assert "bytesBase64Encoded" in result
+        assert "mimeType" in result
+        assert result["mimeType"] == "image/jpeg"
+
+        # Verify base64 encoding
+        decoded = base64.b64decode(result["bytesBase64Encoded"])
+        assert decoded == fake_image_data
+
+    def test_convert_image_to_vertex_format_with_seek(self):
+        """Test image conversion with seek support."""
+        fake_image_data = b"fake_png_image_data"
+        mock_image = Mock()
+        mock_image.read.return_value = fake_image_data
+        mock_image.seek = Mock()
+
+        with patch(
+            "litellm.llms.vertex_ai.videos.transformation.ImageEditRequestUtils.get_image_content_type",
+            return_value="image/png",
+        ):
+            result = _convert_image_to_vertex_format(mock_image)
+
+        # Verify seek was called
+        mock_image.seek.assert_called_once_with(0)
+
+        assert result["mimeType"] == "image/png"
+        decoded = base64.b64decode(result["bytesBase64Encoded"])
+        assert decoded == fake_image_data
+
--- a/tests/test_litellm/test_video_generation.py
+++ b/tests/test_litellm/test_video_generation.py
@ -150,9 +150,10 @@ class TestVideoGeneration:
        config = OpenAIVideoConfig()
        
        # Test request transformation
-        data, files = config.transform_video_create_request(
+        data, files, returned_api_base = config.transform_video_create_request(
            model="sora-2",
            prompt="Test video prompt",
+            api_base="https://api.openai.com/v1/videos",
            video_create_optional_request_params={
                "seconds": "8",
                "size": "720x1280"
@ -166,6 +167,7 @@ class TestVideoGeneration:
        assert data["seconds"] == "8"
        assert data["size"] == "720x1280"
        assert files == []
+        assert returned_api_base == "https://api.openai.com/v1/videos"

    def test_video_generation_response_transformation(self):
        """Test video generation response transformation."""
@ -228,9 +230,10 @@ class TestVideoGeneration:
        mock_file = MagicMock()
        mock_file.read.return_value = b"fake_image_data"
        
-        data, files = config.transform_video_create_request(
+        data, files, returned_api_base = config.transform_video_create_request(
            model="sora-2",
            prompt="Test video with image",
+            api_base="https://api.openai.com/v1/videos",
            video_create_optional_request_params={
                "input_reference": mock_file,
                "seconds": "8",
@ -291,42 +294,29 @@ class TestVideoGeneration:
        assert mapped_params["user"] == "test-user"

    def test_video_generation_unsupported_parameters(self):
-        """Test video generation with unsupported parameters."""
+        """Test video generation with provider-specific parameters via extra_body."""
        from litellm.videos.utils import VideoGenerationRequestUtils
        
-        # Test unsupported parameter detection
-        with pytest.raises(litellm.UnsupportedParamsError):
-            VideoGenerationRequestUtils.get_optional_params_video_generation(
-                model="sora-2",
-                video_generation_provider_config=OpenAIVideoConfig(),
-                video_generation_optional_params={
-                    "unsupported_param": "value"
+        # Test that provider-specific parameters can be passed via extra_body
+        # This allows support for Vertex AI and Gemini specific parameters
+        result = VideoGenerationRequestUtils.get_optional_params_video_generation(
+            model="sora-2",
+            video_generation_provider_config=OpenAIVideoConfig(),
+            video_generation_optional_params={
+                "seconds": "8",
+                "extra_body": {
+                    "vertex_ai_param": "value",
+                    "gemini_param": "value2"
                }
-            )
-
-    def test_video_generation_request_utils(self):
-        """Test video generation request utilities."""
-        from litellm.videos.utils import VideoGenerationRequestUtils
+            }
+        )
        
-        # Test parameter filtering
-        params = {
-            "prompt": "Test video",
-            "model": "sora-2",
-            "seconds": "8",
-            "size": "720x1280",
-            "user": "test-user",
-            "invalid_param": "should_be_filtered"
-        }
-        
-        filtered_params = VideoGenerationRequestUtils.get_requested_video_generation_optional_param(params)
-        
-        # Should only contain valid parameters
-        assert "prompt" not in filtered_params  # prompt is required, not optional
-        assert "seconds" in filtered_params
-        assert "size" in filtered_params
-        assert "user" in filtered_params
-        assert "invalid_param" not in filtered_params
-        # Note: model is included in the filtered params as it's part of the TypedDict
+        # extra_body params should be merged into the result
+        assert result["seconds"] == "8"
+        assert result["vertex_ai_param"] == "value"
+        assert result["gemini_param"] == "value2"
+        # extra_body itself should be removed from the result
+        assert "extra_body" not in result

    def test_video_generation_types(self):
        """Test video generation type definitions."""
--- a/ui/litellm-dashboard/package-lock.json
+++ b/ui/litellm-dashboard/package-lock.json
@ -23233,4 +23233,4 @@
      }
    }
  }
-}
+}
--- a/ui/litellm-dashboard/src/components/add_model/add_model_modes.tsx
+++ b/ui/litellm-dashboard/src/components/add_model/add_model_modes.tsx
@ -6,6 +6,7 @@ export const TEST_MODES = [
  { value: "audio_speech", label: "Audio Speech - /audio/speech" },
  { value: "audio_transcription", label: "Audio Transcription - /audio/transcriptions" },
  { value: "image_generation", label: "Image Generation - /images/generations" },
+  { value: "video_generation", label: "Video Generation - /videos" },
  { value: "rerank", label: "Rerank - /rerank" },
  { value: "realtime", label: "Realtime - /realtime" },
  { value: "batch", label: "Batch - /batch" },
--- a/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx
+++ b/ui/litellm-dashboard/src/components/chat_ui/mode_endpoint_mapping.tsx
@ -5,6 +5,7 @@ export enum ModelMode {
  AUDIO_SPEECH = "audio_speech",
  AUDIO_TRANSCRIPTION = "audio_transcription",
  IMAGE_GENERATION = "image_generation",
+  VIDEO_GENERATION = "video_generation",
  CHAT = "chat",
  RESPONSES = "responses",
  IMAGE_EDITS = "image_edits",
@ -15,6 +16,7 @@ export enum ModelMode {
 // Define an enum for the endpoint types your UI calls
 export enum EndpointType {
  IMAGE = "image",
+  VIDEO = "video",
  CHAT = "chat",
  RESPONSES = "responses",
  IMAGE_EDITS = "image_edits",
@ -28,6 +30,7 @@ export enum EndpointType {
 // Create a mapping between the model mode and the corresponding endpoint type
 export const litellmModeMapping: Record<ModelMode, EndpointType> = {
  [ModelMode.IMAGE_GENERATION]: EndpointType.IMAGE,
+  [ModelMode.VIDEO_GENERATION]: EndpointType.VIDEO,
  [ModelMode.CHAT]: EndpointType.CHAT,
  [ModelMode.RESPONSES]: EndpointType.RESPONSES,
  [ModelMode.IMAGE_EDITS]: EndpointType.IMAGE_EDITS,