From 26cd194d971bcf9237ae0dc6bdfad96191b7b4f1 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 12 Dec 2025 11:55:20 -0800 Subject: [PATCH] feat: final improvements for prompt management api --- .../mock_prompt_management_server/README.md | 293 +++++++++ .../mock_prompt_management_server.py | 390 ++++++++++++ .../generic_prompt_management_api.md | 576 ++++++++++++++++++ docs/my-website/docs/providers/gemini.md | 3 + .../docs/proxy/prompt_management.md | 55 +- docs/my-website/sidebars.js | 7 + 6 files changed, 1323 insertions(+), 1 deletion(-) create mode 100644 cookbook/mock_prompt_management_server/README.md create mode 100644 cookbook/mock_prompt_management_server/mock_prompt_management_server.py create mode 100644 docs/my-website/docs/adding_provider/generic_prompt_management_api.md diff --git a/cookbook/mock_prompt_management_server/README.md b/cookbook/mock_prompt_management_server/README.md new file mode 100644 index 0000000000..9ec76baacf --- /dev/null +++ b/cookbook/mock_prompt_management_server/README.md @@ -0,0 +1,293 @@ +# Mock Prompt Management Server + +A reference implementation of the [LiteLLM Generic Prompt Management API](https://docs.litellm.ai/docs/adding_provider/generic_prompt_management_api). + +This FastAPI server demonstrates how to build a prompt management API that integrates with LiteLLM without requiring a PR to the LiteLLM repository. + +## Quick Start + +### 1. Install Dependencies + +```bash +pip install fastapi uvicorn pydantic +``` + +### 2. Start the Server + +```bash +python mock_prompt_management_server.py +``` + +The server will start on `http://localhost:8080` + +### 3. Test the Endpoint + +```bash +# Get a prompt +curl "http://localhost:8080/beta/litellm_prompt_management?prompt_id=hello-world-prompt" + +# Get a prompt with authentication +curl "http://localhost:8080/beta/litellm_prompt_management?prompt_id=hello-world-prompt" \ + -H "Authorization: Bearer test-token-12345" + +# List all prompts +curl "http://localhost:8080/prompts" + +# Get prompt variables +curl "http://localhost:8080/prompts/hello-world-prompt/variables" +``` + +## Using with LiteLLM + +### Configuration + +Create a `config.yaml` file: + +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: openai/gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY + +prompts: + - prompt_id: "hello-world-prompt" + litellm_params: + prompt_integration: "generic_prompt_management" + api_base: http://localhost:8080 + api_key: test-token-12345 +``` + +### Start LiteLLM Proxy + +```bash +litellm --config config.yaml +``` + +### Make a Request + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-3.5-turbo", + "prompt_id": "hello-world-prompt", + "prompt_variables": { + "domain": "data science", + "task": "analyzing customer behavior" + }, + "messages": [ + {"role": "user", "content": "Please help me get started"} + ] + }' +``` + +## Available Prompts + +The server includes several example prompts: + +| Prompt ID | Description | Variables | +|-----------|-------------|-----------| +| `hello-world-prompt` | Basic helpful assistant | `domain`, `task` | +| `code-review-prompt` | Code review assistant | `years_experience`, `language`, `code` | +| `customer-support-prompt` | Customer support agent | `company_name`, `customer_message` | +| `data-analysis-prompt` | Data analysis expert | `analysis_type`, `dataset_name`, `data` | +| `creative-writing-prompt` | Creative writing assistant | `genre`, `length`, `topic` | + +## Authentication + +The server supports optional Bearer token authentication. Valid tokens for testing: + +- `test-token-12345` +- `dev-token-67890` +- `prod-token-abcdef` + +If no `Authorization` header is provided, requests are allowed (for testing purposes). + +## API Endpoints + +### LiteLLM Spec Endpoints + +#### `GET /beta/litellm_prompt_management` + +Get a prompt by ID (required by LiteLLM). + +**Query Parameters:** +- `prompt_id` (required): The prompt ID +- `project_name` (optional): Project filter +- `slug` (optional): Slug filter +- `version` (optional): Version filter + +**Response:** +```json +{ + "prompt_id": "hello-world-prompt", + "prompt_template": [ + { + "role": "system", + "content": "You are a helpful assistant specialized in {domain}." + }, + { + "role": "user", + "content": "Help me with: {task}" + } + ], + "prompt_template_model": "gpt-4", + "prompt_template_optional_params": { + "temperature": 0.7, + "max_tokens": 500 + } +} +``` + +### Convenience Endpoints (Not in LiteLLM Spec) + +#### `GET /health` + +Health check endpoint. + +#### `GET /prompts` + +List all available prompts. + +#### `GET /prompts/{prompt_id}/variables` + +Get all variables used in a prompt template. + +#### `POST /prompts` + +Create a new prompt (in-memory only, for testing). + +## Example: Full Integration Test + +### 1. Start the Mock Server + +```bash +python mock_prompt_management_server.py +``` + +### 2. Test with Python + +```python +from litellm import completion + +# The completion will: +# 1. Fetch the prompt from your API +# 2. Replace {domain} with "machine learning" +# 3. Replace {task} with "building a recommendation system" +# 4. Merge with your messages +# 5. Use the model and params from the prompt + +response = completion( + model="gpt-4", + prompt_id="hello-world-prompt", + prompt_variables={ + "domain": "machine learning", + "task": "building a recommendation system" + }, + messages=[ + {"role": "user", "content": "I have user behavior data from the past year."} + ], + # Configure the generic prompt manager + generic_prompt_config={ + "api_base": "http://localhost:8080", + "api_key": "test-token-12345", + } +) + +print(response.choices[0].message.content) +``` + +## Customization + +### Adding New Prompts + +Edit the `PROMPTS_DB` dictionary in `mock_prompt_management_server.py`: + +```python +PROMPTS_DB = { + "my-custom-prompt": { + "prompt_id": "my-custom-prompt", + "prompt_template": [ + { + "role": "system", + "content": "You are a {role}." + }, + { + "role": "user", + "content": "{user_input}" + } + ], + "prompt_template_model": "gpt-4", + "prompt_template_optional_params": { + "temperature": 0.8, + "max_tokens": 1000 + } + } +} +``` + +### Using a Database + +Replace the `PROMPTS_DB` dictionary with database queries: + +```python +@app.get("/beta/litellm_prompt_management") +async def get_prompt(prompt_id: str): + # Fetch from database + prompt = await db.prompts.find_one({"prompt_id": prompt_id}) + + if not prompt: + raise HTTPException(status_code=404, detail="Prompt not found") + + return PromptResponse(**prompt) +``` + +### Adding Access Control + +Use the custom query parameters for access control: + +```python +@app.get("/beta/litellm_prompt_management") +async def get_prompt( + prompt_id: str, + project_name: Optional[str] = None, + user_id: Optional[str] = None, + authorization: Optional[str] = Header(None) +): + token = verify_api_key(authorization) + + # Check if user has access to this project + if not has_project_access(token, project_name): + raise HTTPException(status_code=403, detail="Access denied") + + # Fetch and return prompt + ... +``` + +## Production Considerations + +Before deploying to production: + +1. **Use a real database** instead of in-memory storage +2. **Implement proper authentication** with JWT tokens or API keys +3. **Add rate limiting** to prevent abuse +4. **Use HTTPS** for encrypted communication +5. **Add logging and monitoring** for observability +6. **Implement caching** for frequently accessed prompts +7. **Add versioning** for prompt management +8. **Implement access control** based on teams/users +9. **Add input validation** for all parameters +10. **Use environment variables** for configuration + +## Related Documentation + +- [Generic Prompt Management API Documentation](https://docs.litellm.ai/docs/adding_provider/generic_prompt_management_api) +- [LiteLLM Prompt Management](https://docs.litellm.ai/docs/proxy/prompt_management) +- [Generic Guardrail API](https://docs.litellm.ai/docs/adding_provider/generic_guardrail_api) + +## Questions? + +This is a reference implementation for the LiteLLM Generic Prompt Management API. For questions or issues, please open an issue on the [LiteLLM GitHub repository](https://github.com/BerriAI/litellm). + diff --git a/cookbook/mock_prompt_management_server/mock_prompt_management_server.py b/cookbook/mock_prompt_management_server/mock_prompt_management_server.py new file mode 100644 index 0000000000..8f00e0a5ed --- /dev/null +++ b/cookbook/mock_prompt_management_server/mock_prompt_management_server.py @@ -0,0 +1,390 @@ +#!/usr/bin/env python3 +""" +Mock Prompt Management API Server + +This is a FastAPI server that implements the LiteLLM Generic Prompt Management API +for testing and demonstration purposes. + +Usage: + python mock_prompt_management_server.py + +The server will start on http://localhost:8080 + +Test the endpoint: + curl "http://localhost:8080/beta/litellm_prompt_management?prompt_id=hello-world-prompt" +""" + +import os +import json +from typing import Any, Dict, List, Optional + +from fastapi import FastAPI, HTTPException, Header, Query +from fastapi.responses import JSONResponse +from pydantic import BaseModel, Field + +# ============================================================================ +# Response Models +# ============================================================================ + + +class MessageContent(BaseModel): + """A single message in the prompt template""" + + role: str = Field(..., description="Message role (system, user, assistant)") + content: str = Field( + ..., description="Message content with optional {variable} placeholders" + ) + + +class PromptResponse(BaseModel): + """Response format for the prompt management API""" + + prompt_id: str = Field(..., description="The ID of the prompt") + prompt_template: List[MessageContent] = Field( + ..., description="Array of messages in OpenAI format" + ) + prompt_template_model: Optional[str] = Field( + None, description="Optional model to use for this prompt" + ) + prompt_template_optional_params: Optional[Dict[str, Any]] = Field( + None, description="Optional parameters like temperature, max_tokens, etc." + ) + + +# ============================================================================ +# Mock Prompt Database +# ============================================================================ + +PROMPTS_DB = { + "hello-world-prompt": { + "prompt_id": "hello-world-prompt", + "prompt_template": [ + { + "role": "system", + "content": "You are a helpful assistant specialized in {domain}.", + }, + {"role": "user", "content": "Help me with: {task}"}, + ], + "prompt_template_model": "gpt-4", + "prompt_template_optional_params": {"temperature": 0.7, "max_tokens": 500}, + }, + "code-review-prompt": { + "prompt_id": "code-review-prompt", + "prompt_template": [ + { + "role": "system", + "content": "You are an expert code reviewer with {years_experience} years of experience in {language}.", + }, + { + "role": "user", + "content": "Please review the following code for bugs, security issues, and best practices:\n\n{code}", + }, + ], + "prompt_template_model": "gpt-4-turbo", + "prompt_template_optional_params": { + "temperature": 0.3, + "max_tokens": 1500, + }, + }, + "customer-support-prompt": { + "prompt_id": "customer-support-prompt", + "prompt_template": [ + { + "role": "system", + "content": "You are a friendly customer support agent for {company_name}. Always be professional, empathetic, and solution-oriented.", + }, + { + "role": "user", + "content": "Customer inquiry: {customer_message}", + }, + ], + "prompt_template_model": "gpt-3.5-turbo", + "prompt_template_optional_params": { + "temperature": 0.8, + "max_tokens": 800, + "top_p": 0.9, + }, + }, + "data-analysis-prompt": { + "prompt_id": "data-analysis-prompt", + "prompt_template": [ + { + "role": "system", + "content": "You are a data scientist expert in {analysis_type} analysis.", + }, + { + "role": "user", + "content": "Analyze the following data and provide insights:\n\nDataset: {dataset_name}\nData: {data}", + }, + ], + "prompt_template_model": "gpt-4", + "prompt_template_optional_params": { + "temperature": 0.5, + "max_tokens": 2000, + }, + }, + "creative-writing-prompt": { + "prompt_id": "creative-writing-prompt", + "prompt_template": [ + { + "role": "system", + "content": "You are a creative writer specializing in {genre} fiction.", + }, + { + "role": "user", + "content": "Write a {length} story about: {topic}", + }, + ], + "prompt_template_model": "gpt-4", + "prompt_template_optional_params": { + "temperature": 0.9, + "max_tokens": 3000, + "top_p": 0.95, + }, + }, +} + +# Valid API tokens for authentication (in production, use a secure token store) +VALID_API_TOKENS = { + "test-token-12345", + "dev-token-67890", + "prod-token-abcdef", +} + +# ============================================================================ +# FastAPI App +# ============================================================================ + +app = FastAPI( + title="Mock Prompt Management API", + description="A mock server implementing the LiteLLM Generic Prompt Management API", + version="1.0.0", +) + + +def verify_api_key(authorization: Optional[str] = Header(None)) -> bool: + """ + Verify the API key from the Authorization header. + + Args: + authorization: Authorization header (Bearer token) + + Returns: + True if valid, raises HTTPException if invalid + """ + if authorization is None: + # Allow requests without authentication for testing + return True + + # Extract token from "Bearer " + if not authorization.startswith("Bearer "): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authorization header format. Expected 'Bearer '", + ) + + token = authorization.replace("Bearer ", "").strip() + + if token not in VALID_API_TOKENS: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid API key", + ) + + return True + + +@app.get("/beta/litellm_prompt_management", response_model=PromptResponse) +async def get_prompt( + prompt_id: str = Query(..., description="The ID of the prompt to fetch"), + project_name: Optional[str] = Query( + None, description="Optional project name filter" + ), + slug: Optional[str] = Query(None, description="Optional slug filter"), + version: Optional[str] = Query(None, description="Optional version filter"), + authorization: Optional[str] = Header(None), +) -> PromptResponse: + """ + Get a prompt by ID with optional filtering. + + This endpoint implements the LiteLLM Generic Prompt Management API specification. + + Args: + prompt_id: The ID of the prompt to fetch + project_name: Optional project name for filtering + slug: Optional slug for filtering + version: Optional version for filtering + authorization: Optional Bearer token for authentication + + Returns: + PromptResponse with the prompt template and configuration + + Raises: + HTTPException: 401 if authentication fails, 404 if prompt not found + """ + # Verify authentication + verify_api_key(authorization) + + # Log the request parameters (useful for debugging) + print(f"Fetching prompt: {prompt_id}") + if project_name: + print(f" Project: {project_name}") + if slug: + print(f" Slug: {slug}") + if version: + print(f" Version: {version}") + + # Check if prompt exists + if prompt_id not in PROMPTS_DB: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Prompt '{prompt_id}' not found. Available prompts: {list(PROMPTS_DB.keys())}", + ) + + # Get the prompt from the database + prompt_data = PROMPTS_DB[prompt_id] + + # Optional: Apply filtering based on project_name, slug, or version + # In a real implementation, you might use these to filter prompts by access control + # or to fetch specific versions from your database + + return PromptResponse(**prompt_data) + + +@app.get("/health") +async def health_check(): + """Health check endpoint""" + return { + "status": "healthy", + "service": "mock-prompt-management-api", + "version": "1.0.0", + } + + +@app.get("/prompts") +async def list_prompts(authorization: Optional[str] = Header(None)): + """ + List all available prompts. + + This is a convenience endpoint (not part of the LiteLLM spec) for + discovering available prompts. + """ + # Verify authentication + verify_api_key(authorization) + + prompts_list = [ + { + "prompt_id": pid, + "model": p.get("prompt_template_model"), + "has_variables": any( + "{" in msg.get("content", "") for msg in p.get("prompt_template", []) + ), + } + for pid, p in PROMPTS_DB.items() + ] + + return {"prompts": prompts_list, "total": len(prompts_list)} + + +@app.get("/prompts/{prompt_id}/variables") +async def get_prompt_variables( + prompt_id: str, authorization: Optional[str] = Header(None) +): + """ + Get all variables in a prompt template. + + This is a convenience endpoint (not part of the LiteLLM spec) for + discovering what variables a prompt expects. + """ + # Verify authentication + verify_api_key(authorization) + + if prompt_id not in PROMPTS_DB: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"Prompt '{prompt_id}' not found", + ) + + prompt_data = PROMPTS_DB[prompt_id] + variables = set() + + # Extract variables from the prompt template + import re + + for message in prompt_data["prompt_template"]: + content = message.get("content", "") + # Find all {variable} patterns + found_vars = re.findall(r"\{(\w+)\}", content) + variables.update(found_vars) + + return { + "prompt_id": prompt_id, + "variables": sorted(list(variables)), + "example_usage": { + "prompt_id": prompt_id, + "prompt_variables": {var: f"<{var}_value>" for var in variables}, + }, + } + + +@app.post("/prompts") +async def create_prompt( + prompt: PromptResponse, authorization: Optional[str] = Header(None) +): + """ + Create a new prompt (convenience endpoint for testing). + + This is NOT part of the LiteLLM spec - it's just for testing purposes. + """ + # Verify authentication + verify_api_key(authorization) + + if prompt.prompt_id in PROMPTS_DB: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=f"Prompt '{prompt.prompt_id}' already exists", + ) + + PROMPTS_DB[prompt.prompt_id] = prompt.dict() + + return { + "status": "created", + "prompt_id": prompt.prompt_id, + "message": "Prompt created successfully (in-memory only)", + } + + +# ============================================================================ +# Main +# ============================================================================ + +if __name__ == "__main__": + import uvicorn + + print("=" * 70) + print("Mock Prompt Management API Server") + print("=" * 70) + print(f"\nStarting server on http://localhost:8080") + print(f"\nAvailable prompts: {len(PROMPTS_DB)}") + for prompt_id in PROMPTS_DB.keys(): + print(f" - {prompt_id}") + print(f"\nValid API tokens: {len(VALID_API_TOKENS)}") + print(" - test-token-12345") + print(" - dev-token-67890") + print(" - prod-token-abcdef") + print("\nEndpoints:") + print(" GET /beta/litellm_prompt_management?prompt_id= (LiteLLM spec)") + print(" GET /health (health check)") + print(" GET /prompts (list all prompts)") + print( + " GET /prompts/{id}/variables (get prompt variables)" + ) + print(" POST /prompts (create prompt)") + print("\nExample usage:") + print( + ' curl "http://localhost:8080/beta/litellm_prompt_management?prompt_id=hello-world-prompt"' + ) + print("\nPress CTRL+C to stop the server") + print("=" * 70) + + uvicorn.run(app, host="0.0.0.0", port=8080, log_level="info") diff --git a/docs/my-website/docs/adding_provider/generic_prompt_management_api.md b/docs/my-website/docs/adding_provider/generic_prompt_management_api.md new file mode 100644 index 0000000000..d1b119d94c --- /dev/null +++ b/docs/my-website/docs/adding_provider/generic_prompt_management_api.md @@ -0,0 +1,576 @@ +# [BETA] Generic Prompt Management API - Integrate Without a PR + +## The Problem + +As a prompt management provider, integrating with LiteLLM traditionally requires: +- Making a PR to the LiteLLM repository +- Waiting for review and merge +- Maintaining provider-specific code in LiteLLM's codebase +- Updating the integration for changes to your API + +## The Solution + +The **Generic Prompt Management API** lets you integrate with LiteLLM **instantly** by implementing a simple API endpoint. No PR required. + +### Key Benefits + +1. **No PR Needed** - Deploy and integrate immediately +3. **Simple Contract** - One GET endpoint, standard JSON response +4. **Variable Substitution** - Support for prompt variables with `{variable}` syntax +5. **Custom Parameters** - Pass provider-specific query params via config +6. **Full Control** - You own and maintain your prompt management API +7. **Model & Parameters Override** - Optionally override model and parameters from your prompts + +## Get Started in 3 Steps + +### Step 1: Configure LiteLLM + +Add to your `config.yaml`: + +```yaml +prompts: + - prompt_id: "simple_prompt" + litellm_params: + prompt_integration: "generic_prompt_management" + api_base: http://localhost:8080 + api_key: os.environ/YOUR_API_KEY +``` + +### Step 2: Implement Your API Endpoint + +```python +from fastapi import FastAPI +from pydantic import BaseModel + +app = FastAPI() + +@app.get("/beta/litellm_prompt_management") +async def get_prompt(prompt_id: str): + return { + "prompt_id": prompt_id, + "prompt_template": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Help me with {task}"} + ], + "prompt_template_model": "gpt-4", + "prompt_template_optional_params": {"temperature": 0.7} + } +``` + +### Step 3: Use in Your App + +```python +from litellm import completion + +response = completion( + model="gpt-4", + prompt_id="simple_prompt", + prompt_variables={"task": "data analysis"}, + messages=[{"role": "user", "content": "I have sales data"}] +) +``` + +That's it! LiteLLM fetches your prompt, applies variables, and makes the request + +## API Contract + +### Endpoint + +Implement `GET /beta/litellm_prompt_management` + +### Request Format + +Your endpoint will receive a GET request with query parameters: + +``` +GET /beta/litellm_prompt_management?prompt_id={prompt_id}&{custom_params} +``` + +**Query Parameters:** +- `prompt_id` (required): The ID of the prompt to fetch +- Custom parameters: Any additional parameters you configured in `provider_specific_query_params` + +**Example:** +``` +GET /beta/litellm_prompt_management?prompt_id=hello-world-prompt-2bac&project_name=litellm&slug=hello-world-prompt-2bac +``` + +### Response Format + +```json +{ + "prompt_id": "hello-world-prompt-2bac", + "prompt_template": [ + { + "role": "system", + "content": "You are a helpful assistant specialized in {domain}." + }, + { + "role": "user", + "content": "Help me with {task}" + } + ], + "prompt_template_model": "gpt-4", + "prompt_template_optional_params": { + "temperature": 0.7, + "max_tokens": 500, + "top_p": 0.9 + } +} +``` + +**Response Fields:** +- `prompt_id` (string, required): The ID of the prompt +- `prompt_template` (array, required): Array of OpenAI-format messages with optional `{variable}` placeholders +- `prompt_template_model` (string, optional): Model to use for this prompt (overrides client model unless `ignore_prompt_manager_model: true`) +- `prompt_template_optional_params` (object, optional): Additional parameters like temperature, max_tokens, etc. (merged with client params unless `ignore_prompt_manager_optional_params: true`) + +## LiteLLM Configuration + +Add to `config.yaml`: + +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: openai/gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY + +prompts: + - prompt_id: "simple_prompt" + litellm_params: + prompt_integration: "generic_prompt_management" + provider_specific_query_params: + project_name: litellm + slug: hello-world-prompt-2bac + api_base: http://localhost:8080 + api_key: os.environ/YOUR_PROMPT_API_KEY # optional + ignore_prompt_manager_model: true # optional, keep client's model + ignore_prompt_manager_optional_params: true # optional, don't merge prompt manager's params (e.g. temperature, max_tokens, etc.) +``` + +### Configuration Parameters + +- `prompt_integration`: Must be `"generic_prompt_management"` +- `provider_specific_query_params`: Custom query parameters sent to your API (optional) +- `api_base`: Base URL of your prompt management API +- `api_key`: Optional API key for authentication (sent as `Bearer` token) +- `ignore_prompt_manager_model`: If `true`, use the model specified by client instead of prompt's model (default: `false`) +- `ignore_prompt_manager_optional_params`: If `true`, don't merge prompt's optional params with client params (default: `false`) + +## Usage + +### Using with LiteLLM SDK + +**Basic usage with prompt ID:** + +```python +from litellm import completion + +response = completion( + model="gpt-4", + prompt_id="simple_prompt", + messages=[{"role": "user", "content": "Additional message"}] +) +``` + +**With prompt variables:** + +```python +response = completion( + model="gpt-4", + prompt_id="simple_prompt", + prompt_variables={ + "domain": "data science", + "task": "analyzing customer churn" + }, + messages=[{"role": "user", "content": "Please provide a detailed analysis"}] +) +``` + +The prompt template will have `{domain}` replaced with "data science" and `{task}` replaced with "analyzing customer churn". + +### Using with LiteLLM Proxy + +**1. Start the proxy with your config:** + +```bash +litellm --config /path/to/config.yaml +``` + +**2. Make requests with prompt_id:** + +```bash +curl http://0.0.0.0:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-4", + "prompt_id": "simple_prompt", + "prompt_variables": { + "domain": "healthcare", + "task": "patient risk assessment" + }, + "messages": [ + {"role": "user", "content": "Analyze the following data..."} + ] + }' +``` + +**3. Using with OpenAI SDK:** + +```python +from openai import OpenAI + +client = OpenAI( + base_url="http://0.0.0.0:4000", + api_key="sk-1234" +) + +response = client.chat.completions.create( + model="gpt-4", + messages=[ + {"role": "user", "content": "Analyze the data"} + ], + extra_body={ + "prompt_id": "simple_prompt", + "prompt_variables": { + "domain": "finance", + "task": "fraud detection" + } + } +) +``` + +## Implementation Example + +See [mock_prompt_management_server.py](https://github.com/BerriAI/litellm/blob/main/cookbook/mock_prompt_management_server/mock_prompt_management_server.py) for a complete reference implementation with multiple example prompts, authentication, and convenience endpoints. + +**Minimal FastAPI example:** + +```python +from fastapi import FastAPI, HTTPException, Header +from typing import Optional, Dict, Any, List +from pydantic import BaseModel + +app = FastAPI() + +# In-memory prompt storage (replace with your database) +PROMPTS = { + "hello-world-prompt": { + "prompt_id": "hello-world-prompt", + "prompt_template": [ + { + "role": "system", + "content": "You are a helpful assistant specialized in {domain}." + }, + { + "role": "user", + "content": "Help me with: {task}" + } + ], + "prompt_template_model": "gpt-4", + "prompt_template_optional_params": { + "temperature": 0.7, + "max_tokens": 500 + } + }, + "code-review-prompt": { + "prompt_id": "code-review-prompt", + "prompt_template": [ + { + "role": "system", + "content": "You are an expert code reviewer. Review code for {language}." + }, + { + "role": "user", + "content": "Review the following code:\n\n{code}" + } + ], + "prompt_template_model": "gpt-4-turbo", + "prompt_template_optional_params": { + "temperature": 0.3, + "max_tokens": 1000 + } + } +} + +class PromptResponse(BaseModel): + prompt_id: str + prompt_template: List[Dict[str, str]] + prompt_template_model: Optional[str] = None + prompt_template_optional_params: Optional[Dict[str, Any]] = None + +@app.get("/beta/litellm_prompt_management", response_model=PromptResponse) +async def get_prompt( + prompt_id: str, + authorization: Optional[str] = Header(None), + project_name: Optional[str] = None, + slug: Optional[str] = None, +): + """ + Get a prompt by ID with optional filtering by project_name and slug. + + Args: + prompt_id: The ID of the prompt to fetch + authorization: Optional Bearer token for authentication + project_name: Optional project name filter + slug: Optional slug filter + """ + + # Optional: Validate authorization + if authorization: + token = authorization.replace("Bearer ", "") + # Validate your token here + if not is_valid_token(token): + raise HTTPException(status_code=401, detail="Invalid API key") + + # Optional: Apply additional filtering based on custom params + if project_name or slug: + # You can use these parameters to filter or validate access + # For example, check if the user has access to this project + pass + + # Fetch the prompt from your storage + if prompt_id not in PROMPTS: + raise HTTPException( + status_code=404, + detail=f"Prompt '{prompt_id}' not found" + ) + + prompt_data = PROMPTS[prompt_id] + + return PromptResponse(**prompt_data) + +def is_valid_token(token: str) -> bool: + """Validate API token - implement your logic here""" + # Example: Check against your database or secret store + valid_tokens = ["your-secret-token", "another-valid-token"] + return token in valid_tokens + +# Optional: Health check endpoint +@app.get("/health") +async def health_check(): + return {"status": "healthy"} + +# Optional: List all prompts endpoint +@app.get("/prompts") +async def list_prompts(authorization: Optional[str] = Header(None)): + """List all available prompts""" + if authorization: + token = authorization.replace("Bearer ", "") + if not is_valid_token(token): + raise HTTPException(status_code=401, detail="Invalid API key") + + return { + "prompts": [ + {"prompt_id": pid, "model": p.get("prompt_template_model")} + for pid, p in PROMPTS.items() + ] + } + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8080) +``` + +### Running the Example Server + +1. Install dependencies: +```bash +pip install fastapi uvicorn +``` + +2. Save the code above to `prompt_server.py` + +3. Run the server: +```bash +python prompt_server.py +``` + +4. Test the endpoint: +```bash +curl "http://localhost:8080/beta/litellm_prompt_management?prompt_id=hello-world-prompt&project_name=litellm&slug=hello-world-prompt-2bac" +``` + +Expected response: +```json +{ + "prompt_id": "hello-world-prompt", + "prompt_template": [ + { + "role": "system", + "content": "You are a helpful assistant specialized in {domain}." + }, + { + "role": "user", + "content": "Help me with: {task}" + } + ], + "prompt_template_model": "gpt-4", + "prompt_template_optional_params": { + "temperature": 0.7, + "max_tokens": 500 + } +} +``` + +## Advanced Features + +### Variable Substitution + +LiteLLM automatically substitutes variables in your prompt templates using the `{variable}` syntax. Both `{variable}` and `{{variable}}` formats are supported. + +**Example prompt template:** +```json +{ + "prompt_template": [ + { + "role": "system", + "content": "You are an expert in {domain} with {years} years of experience." + } + ] +} +``` + +**Client request:** +```python +completion( + model="gpt-4", + prompt_id="expert_prompt", + prompt_variables={ + "domain": "machine learning", + "years": "10" + } +) +``` + +**Result:** +``` +"You are an expert in machine learning with 10 years of experience." +``` + +### Caching + +LiteLLM automatically caches fetched prompts in memory. The cache key includes: +- `prompt_id` +- `prompt_label` (if provided) +- `prompt_version` (if provided) + +This means your API endpoint is only called once per unique prompt configuration. + +### Model Override Behavior + +**Default behavior (without `ignore_prompt_manager_model`):** +```yaml +prompts: + - prompt_id: "my_prompt" + litellm_params: + prompt_integration: "generic_prompt_management" + api_base: http://localhost:8080 +``` + +If your API returns `"prompt_template_model": "gpt-4"`, LiteLLM will use `gpt-4` regardless of what the client specified. + +**With `ignore_prompt_manager_model: true`:** +```yaml +prompts: + - prompt_id: "my_prompt" + litellm_params: + prompt_integration: "generic_prompt_management" + api_base: http://localhost:8080 + ignore_prompt_manager_model: true +``` + +LiteLLM will use the model specified by the client, ignoring the prompt's model. + +### Parameter Merging Behavior + +**Default behavior (without `ignore_prompt_manager_optional_params`):** + +Client params are merged with prompt params, with prompt params taking precedence: +```python +# Prompt returns: {"temperature": 0.7, "max_tokens": 500} +# Client sends: {"temperature": 0.9, "top_p": 0.95} +# Final params: {"temperature": 0.7, "max_tokens": 500, "top_p": 0.95} +``` + +**With `ignore_prompt_manager_optional_params: true`:** + +Only client params are used: +```python +# Prompt returns: {"temperature": 0.7, "max_tokens": 500} +# Client sends: {"temperature": 0.9, "top_p": 0.95} +# Final params: {"temperature": 0.9, "top_p": 0.95} +``` + +## Security Considerations + +1. **Authentication**: Use the `api_key` parameter to secure your prompt management API +2. **Authorization**: Implement team/user-based access control using the custom query parameters +3. **Rate Limiting**: Add rate limiting to prevent abuse of your API +4. **Input Validation**: Validate all query parameters before processing +5. **HTTPS**: Always use HTTPS in production for encrypted communication +6. **Secrets**: Store API keys in environment variables, not in config files + +## Use Cases + +✅ **Use Generic Prompt Management API when:** +- You want instant integration without waiting for PRs +- You maintain your own prompt management service +- You need full control over prompt versioning and updates +- You want to build custom prompt management features +- You need to integrate with your internal systems + +✅ **Common scenarios:** +- Internal prompt management system for your organization +- Multi-tenant prompt management with team-based access control +- A/B testing different prompt versions +- Prompt experimentation and analytics +- Integration with existing prompt engineering workflows + +## When to Use This + +✅ **Use Generic Prompt Management API when:** +- You want instant integration without waiting for PRs +- You maintain your own prompt management service +- You need full control over updates and features +- You want custom prompt storage and versioning logic + +❌ **Make a PR when:** +- You want deeper integration with LiteLLM internals +- Your integration requires complex LiteLLM-specific logic +- You want to be featured as a built-in provider +- You're building a reusable integration for the community + +## Troubleshooting + +### Prompt not found +- Verify the `prompt_id` matches exactly (case-sensitive) +- Check that your API endpoint is accessible from LiteLLM +- Verify authentication if using `api_key` + +### Variables not substituted +- Ensure variables use `{variable}` or `{{variable}}` syntax +- Check that variable names in `prompt_variables` match template exactly +- Variables are case-sensitive + +### Model not being overridden +- Check if `ignore_prompt_manager_model: true` is set in config +- Verify your API is returning `prompt_template_model` in the response + +### Parameters not being applied +- Check if `ignore_prompt_manager_optional_params: true` is set +- Verify your API is returning `prompt_template_optional_params` +- Ensure parameter names match OpenAI's parameter names + +## Questions? + +This is a **beta API**. We're actively improving it based on feedback. Open an issue or PR if you need additional capabilities. + +## Related Documentation + +- [Prompt Management Overview](../proxy/prompt_management.md) +- [Generic Guardrail API](./generic_guardrail_api.md) +- [LiteLLM Proxy Setup](../proxy/quick_start.md) + diff --git a/docs/my-website/docs/providers/gemini.md b/docs/my-website/docs/providers/gemini.md index 32dea2069b..4e2ea45925 100644 --- a/docs/my-website/docs/providers/gemini.md +++ b/docs/my-website/docs/providers/gemini.md @@ -1185,6 +1185,9 @@ When responding to Computer Use tool calls, include the URL and screenshot: + + + ## Thought Signatures Thought signatures are encrypted representations of the model's internal reasoning process for a given turn in a conversation. By passing thought signatures back to the model in subsequent requests, you provide it with the context of its previous thoughts, allowing it to build upon its reasoning and maintain a coherent line of inquiry. diff --git a/docs/my-website/docs/proxy/prompt_management.md b/docs/my-website/docs/proxy/prompt_management.md index 0c7ff96f53..fae9b7386a 100644 --- a/docs/my-website/docs/proxy/prompt_management.md +++ b/docs/my-website/docs/proxy/prompt_management.md @@ -11,6 +11,7 @@ Run experiments or change the specific model (e.g. from gpt-4o to gpt4o-mini fin | Native LiteLLM GitOps (.prompt files) | [Get Started](native_litellm_prompt) | | Langfuse | [Get Started](https://langfuse.com/docs/prompts/get-started) | | Humanloop | [Get Started](../observability/humanloop) | +| Generic Prompt Management API | [Get Started](../adding_provider/generic_prompt_management_api) | ## Onboarding Prompts via config.yaml @@ -34,7 +35,7 @@ prompts: - prompt_id: "my_prompt_id" litellm_params: prompt_id: "my_prompt_id" - prompt_integration: "dotprompt" # or langfuse, bitbucket, gitlab, custom + prompt_integration: "dotprompt" # or langfuse, bitbucket, gitlab, generic_prompt_management, custom # integration-specific parameters below ``` @@ -46,6 +47,7 @@ The `prompt_integration` field determines where and how prompts are loaded: - **`langfuse`**: Fetch prompts from Langfuse prompt management - **`bitbucket`**: Load from BitBucket repository `.prompt` files (team-based access control) - **`gitlab`**: Load from GitLab repository `.prompt` files (team-based access control) +- **`generic_prompt_management`**: Integrate any prompt management system via a simple API endpoint (no PR required) - **`custom`**: Use your own custom prompt management implementation Each integration has its own configuration parameters and access control mechanisms. @@ -207,6 +209,57 @@ System: You are a helpful assistant. User: {{user_message}} ``` + + + + +```yaml +prompts: + - prompt_id: "simple_prompt" + litellm_params: + prompt_integration: "generic_prompt_management" + provider_specific_query_params: + project_name: litellm + slug: hello-world-prompt-2bac + api_base: http://localhost:8080 + api_key: os.environ/BRAINTRUST_API_KEY + ignore_prompt_manager_model: true # optional + ignore_prompt_manager_optional_params: true # optional +``` + +**What you need to implement:** + +A GET endpoint at `/beta/litellm_prompt_management` that returns: + +```json +{ + "prompt_id": "simple_prompt", + "prompt_template": [ + { + "role": "system", + "content": "You are a helpful assistant." + }, + { + "role": "user", + "content": "Help me with {task}" + } + ], + "prompt_template_model": "gpt-4", + "prompt_template_optional_params": { + "temperature": 0.7, + "max_tokens": 500 + } +} +``` + +**Benefits:** +- No PR required - integrate any prompt management system +- Full control over your prompt storage and versioning +- Support for variable substitution with `{variable}` syntax +- Custom query parameters for filtering and access control + +**Learn more:** [Generic Prompt Management API Documentation](../adding_provider/generic_prompt_management_api) + diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 3b0f399f8e..bde5b8927a 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -96,6 +96,13 @@ const sidebars = { type: "category", label: "[Beta] Prompt Management", items: [ + { + type: "category", + label: "Contributing to Prompt Management", + items: [ + "adding_provider/generic_prompt_management_api", + ] + }, "proxy/litellm_prompt_management", "proxy/custom_prompt_management", "proxy/native_litellm_prompt",