diff --git a/docker/Dockerfile.health_check b/docker/Dockerfile.health_check new file mode 100644 index 0000000000..de62e4bd72 --- /dev/null +++ b/docker/Dockerfile.health_check @@ -0,0 +1,16 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Copy health check script and requirements +COPY scripts/health_check/health_check_client.py /app/health_check_client.py +COPY scripts/health_check/health_check_requirements.txt /app/requirements.txt + +# Install dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Make script executable +RUN chmod +x /app/health_check_client.py + +# Set entrypoint +ENTRYPOINT ["python", "/app/health_check_client.py"] diff --git a/scripts/health_check/health_check_client.py b/scripts/health_check/health_check_client.py new file mode 100644 index 0000000000..337c754a75 --- /dev/null +++ b/scripts/health_check/health_check_client.py @@ -0,0 +1,406 @@ +#!/usr/bin/env python3 +""" +LiteLLM Health Check Client + +A sentinel health check tool that tests all configured models on a LiteLLM proxy. +Similar to HRT's health check system, this script: +- Can read models from YAML config file (like HRT) or fetch from proxy API +- Sends a simple test request to each model concurrently +- Reports health status for each model +- Supports both chat/completion and embedding models +""" + +import asyncio +import json +import os +import sys +import time +from typing import Dict, List, Optional, Tuple + +import httpx +import yaml + + +class LiteLLMHealthCheckClient: + """Client for health checking LiteLLM proxy models.""" + + def __init__( + self, + base_url: str, + api_key: str, + timeout: int = 120, # Match Go implementation's 120s timeout + completion_prompt: str = "Say this is a test", # Match Go implementation + embedding_text: str = "This is a test for vectorization.", # Match Go implementation + ): + """ + Initialize the health check client. + + Args: + base_url: Base URL of the LiteLLM proxy (e.g., https://litellm.example.com) + api_key: API key for authentication + timeout: Request timeout in seconds (default: 120, matching Go implementation) + completion_prompt: Test prompt for chat/completion models + embedding_text: Test text for embedding models + """ + self.base_url = base_url.rstrip("/") + self.api_key = api_key + self.timeout = timeout + self.completion_prompt = completion_prompt + self.embedding_text = embedding_text + self.headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + + def load_models_from_yaml(self, yaml_path: str) -> List[Dict]: + """ + Load models from a YAML config file (similar to Go implementation). + + Args: + yaml_path: Path to the YAML config file + + Returns: + List of model dictionaries with 'id' and 'mode' keys + """ + try: + with open(yaml_path, "r") as f: + config = yaml.safe_load(f) + + model_list = config.get("model_list", []) + models = [] + + for entry in model_list: + model_name = entry.get("model_name", "") + litellm_params = entry.get("litellm_params", {}) + model_info = litellm_params.get("model_info", {}) + mode = model_info.get("mode", "") + + # Use model_name as the ID (this is what gets sent to the API) + models.append( + { + "id": model_name, + "mode": mode.lower() if mode else "", + "provider": model_info.get("provider", ""), + } + ) + + return models + except Exception as e: + print(f"Error loading models from YAML file {yaml_path}: {e}", file=sys.stderr) + return [] + + async def fetch_models(self, client: httpx.AsyncClient) -> List[Dict]: + """ + Fetch all available models from the proxy API. + + Returns: + List of model dictionaries with 'id' and 'mode' keys + """ + try: + # Try /v1/models first (OpenAI-compatible endpoint) + response = await client.get( + f"{self.base_url}/v1/models", + headers=self.headers, + timeout=self.timeout, + ) + response.raise_for_status() + data = response.json() + models_data = data.get("data", []) + models = [] + for m in models_data: + models.append({"id": m["id"], "mode": "", "provider": ""}) + return models + except Exception as e: + print(f"Error fetching models from /v1/models: {e}", file=sys.stderr) + # Fallback to /model/info endpoint which has more details + try: + response = await client.get( + f"{self.base_url}/model/info", + headers=self.headers, + timeout=self.timeout, + ) + response.raise_for_status() + data = response.json() + if isinstance(data, dict) and "data" in data: + models_data = data["data"] + elif isinstance(data, list): + models_data = data + else: + models_data = [] + + models = [] + for m in models_data: + model_info = m.get("model_info", {}) + mode = model_info.get("mode", "") + models.append( + { + "id": m.get("model_name", m.get("id", "unknown")), + "mode": mode.lower() if mode else "", + "provider": model_info.get("provider", ""), + } + ) + return models + except Exception as e2: + print(f"Error fetching models from /model/info: {e2}", file=sys.stderr) + return [] + + async def check_model_health( + self, client: httpx.AsyncClient, model: Dict + ) -> Tuple[str, Dict]: + """ + Check health of a single model by sending a test request. + + Args: + client: HTTP client + model: Model dictionary with 'id' and 'mode' keys + + Returns: + Tuple of (model_id, result_dict) + """ + model_id = model["id"] + mode = model.get("mode", "") + + start_time = time.time() + result = { + "model": model_id, + "healthy": False, + "error": None, + "response_time_ms": None, + "mode": mode, + } + + try: + # Determine if this is an embedding model + # Check mode first (from config), then fall back to name-based detection + is_embedding = ( + mode == "embedding" + or any( + keyword in model_id.lower() + for keyword in ["embedding", "embed", "text-embedding"] + ) + ) + + if is_embedding: + # Test embedding endpoint (matching Go implementation) + embedding_response = await client.post( + f"{self.base_url}/v1/embeddings", + headers=self.headers, + json={ + "model": model_id, + "input": self.embedding_text, + }, + timeout=self.timeout, + ) + embedding_response.raise_for_status() + embedding_data = embedding_response.json() + dimensions = 0 + if "data" in embedding_data and len(embedding_data["data"]) > 0: + dimensions = len(embedding_data["data"][0].get("embedding", [])) + + result["healthy"] = True + result["mode"] = "embedding" + result["dimensions"] = dimensions + else: + # Test chat completion endpoint (matching Go implementation) + completion_response = await client.post( + f"{self.base_url}/v1/chat/completions", + headers=self.headers, + json={ + "model": model_id, + "messages": [ + {"role": "user", "content": self.completion_prompt} + ], + "max_tokens": 10, # Minimal tokens for health check + }, + timeout=self.timeout, + ) + completion_response.raise_for_status() + completion_data = completion_response.json() + response_text = "" + if "choices" in completion_data and len(completion_data["choices"]) > 0: + response_text = ( + completion_data["choices"][0] + .get("message", {}) + .get("content", "") + ) + + result["healthy"] = True + result["mode"] = "chat" + result["response_text"] = response_text[:100] # Truncate for display + + elapsed_ms = (time.time() - start_time) * 1000 + result["response_time_ms"] = round(elapsed_ms, 2) + + except httpx.HTTPStatusError as e: + result["error"] = f"HTTP {e.response.status_code}: {e.response.text[:200]}" + except httpx.TimeoutException: + result["error"] = f"Request timeout after {self.timeout}s" + except Exception as e: + result["error"] = str(e)[:200] + + return model_id, result + + async def run_health_checks( + self, + models: Optional[List[Dict]] = None, + models_only: Optional[List[str]] = None, + ) -> Dict[str, Dict]: + """ + Run health checks on all models concurrently. + + Args: + models: Optional list of models to check. If None, fetches from proxy. + models_only: Optional list of model IDs to check. If set, only these + models are health-checked (must exist in the models list). + + Returns: + Dictionary mapping model_id to health check result + """ + async with httpx.AsyncClient() as client: + if models is None: + models = await self.fetch_models(client) + + if not models: + print("No models found to health check", file=sys.stderr) + return {} + + if models_only: + allowlist = {m.strip() for m in models_only if m and m.strip()} + models = [m for m in models if m.get("id") in allowlist] + print( + f"Filtering to only check {len(models)} models: {', '.join(sorted(allowlist))}", + file=sys.stderr, + ) + if not models: + print( + "No models matched LITELLM_MODELS_ONLY filter", + file=sys.stderr, + ) + return {} + + print(f"Running health checks on {len(models)} models...", file=sys.stderr) + + # Run all health checks concurrently + tasks = [self.check_model_health(client, model) for model in models] + results_list = await asyncio.gather(*tasks, return_exceptions=True) + + # Convert to dictionary format + results = {} + for result in results_list: + if isinstance(result, Exception): + print( + f"Exception in health check task: {result}", file=sys.stderr + ) + continue + # Type narrowing: after checking it's not an Exception, it's a Tuple + if isinstance(result, tuple) and len(result) == 2: + model_id, result_dict = result + results[model_id] = result_dict + + return results + + def print_results(self, results: Dict[str, Dict], json_output: bool = False): + """ + Print health check results. + + Args: + results: Dictionary of health check results + json_output: If True, output as JSON + """ + if json_output: + print(json.dumps(results, indent=2)) + return + + healthy_count = sum(1 for r in results.values() if r.get("healthy")) + unhealthy_count = len(results) - healthy_count + + # Print detailed results for each model (matching Go output format) + print(f"\n{'='*60}", file=sys.stderr) + print(f"Starting health check queries\n", file=sys.stderr) + + for model_id, result in results.items(): + if result.get("healthy"): + if result.get("mode") == "embedding": + dimensions = result.get("dimensions", 0) + print( + f"---- {model_id} ----\n✅ Success. " + f"Generated embedding vector with {dimensions} dimensions.\n\n", + file=sys.stderr, + ) + else: + response_text = result.get("response_text", "") + print( + f"---- {model_id} ----\n✅ Success. " + f"Response:\n{response_text}\n\n", + file=sys.stderr, + ) + else: + error = result.get("error", "Unknown error") + print(f"---- {model_id} ----\n❌ ERROR: {error}\n\n", file=sys.stderr) + + print(f"{'='*60}", file=sys.stderr) + print(f"Health Check Summary", file=sys.stderr) + print(f"{'='*60}", file=sys.stderr) + print(f"Total models: {len(results)}", file=sys.stderr) + print(f"Healthy: {healthy_count}", file=sys.stderr) + print(f"Unhealthy: {unhealthy_count}", file=sys.stderr) + print(f"{'='*60}\n", file=sys.stderr) + + # Exit with non-zero code if any models are unhealthy + if unhealthy_count > 0: + sys.exit(1) + else: + sys.exit(0) + + +async def main(): + """Main entry point.""" + base_url = os.environ.get("LITELLM_BASE_URL", "http://localhost:4000") + api_key = os.environ.get("LITELLM_API_KEY", "sk-1234") + yaml_path = os.environ.get("LITELLM_MODELS_YAML") + + if not base_url: + print("Error: LITELLM_BASE_URL environment variable not set", file=sys.stderr) + sys.exit(1) + + if not api_key: + print("Error: LITELLM_API_KEY environment variable not set", file=sys.stderr) + sys.exit(1) + + timeout = int(os.environ.get("LITELLM_TIMEOUT", "120")) # Match Go's 120s default + completion_prompt = os.environ.get( + "LITELLM_COMPLETION_PROMPT", "Say this is a test" + ) + embedding_text = os.environ.get( + "LITELLM_EMBEDDING_TEXT", "This is a test for vectorization." + ) + json_output = os.environ.get("LITELLM_JSON_OUTPUT", "").lower() == "true" + # Optional: only health-check these model IDs (comma-separated). E.g.: + # LITELLM_MODELS_ONLY=claude-3.7-sonnet,claude-3.5-sonnet,claude-4.5-haiku + models_only_raw = os.environ.get("LITELLM_MODELS_ONLY", "") + models_only = [m.strip() for m in models_only_raw.split(",") if m.strip()] or None + + client = LiteLLMHealthCheckClient( + base_url=base_url, + api_key=api_key, + timeout=timeout, + completion_prompt=completion_prompt, + embedding_text=embedding_text, + ) + + # Load models from YAML if provided, otherwise fetch from API + models = None + if yaml_path: + models = client.load_models_from_yaml(yaml_path) + if models: + print( + f"Successfully loaded {len(models)} models from {yaml_path}", + file=sys.stderr, + ) + + results = await client.run_health_checks(models=models, models_only=models_only) + client.print_results(results, json_output=json_output) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/scripts/health_check/health_check_client_README.md b/scripts/health_check/health_check_client_README.md new file mode 100644 index 0000000000..e3132499e0 --- /dev/null +++ b/scripts/health_check/health_check_client_README.md @@ -0,0 +1,246 @@ +# LiteLLM Health Check Client + +A health check tool for testing all configured models on a LiteLLM proxy. Tests each model with completion/embedding requests and reports health status, errors, and response times. + +## Features + +- **YAML Config Support**: Reads models from YAML config file OR fetches from proxy API +- **Smart Mode Detection**: Detects embedding vs chat models from config or model name +- **Concurrent Testing**: Tests all models concurrently using asyncio +- **Containerized**: Docker image for easy deployment +- **Parallel Execution**: Supports parallel execution for stress testing +- **Configurable**: Customizable timeouts (default 120s) and test prompts + +## Quick Start + +### As a Python Script + +**Option 1: Fetch models from proxy API** +```bash +export LITELLM_BASE_URL="https://litellm.example.com" +export LITELLM_API_KEY="your-api-key" +python scripts/health_check/health_check_client.py +``` + +**Option 2: Use YAML config file** +```bash +export LITELLM_BASE_URL="https://litellm.example.com" +export LITELLM_API_KEY="your-api-key" +export LITELLM_MODELS_YAML="/path/to/config.yaml" +python scripts/health_check/health_check_client.py +``` + +### As a Docker Container + +1. Build the Docker image: + +```bash +docker build -f docker/Dockerfile.health_check -t litellm/litellm-health-check:latest . +``` + +2. Run a single health check: + +```bash +docker run --rm \ + -e LITELLM_BASE_URL="https://litellm.example.com" \ + -e LITELLM_API_KEY="your-api-key" \ + litellm/litellm-health-check:latest +``` + +### Parallel Execution (Stress Testing) + +Run multiple health check containers in parallel: + +**PowerShell:** +```powershell +$env:LITELLM_BASE_URL="https://litellm.example.com" +$env:LITELLM_API_KEY="your-api-key" +.\scripts\health_check\run_parallel_health_checks.ps1 16 +``` + +**Bash/Shell:** +```bash +export LITELLM_BASE_URL="https://litellm.example.com" +export LITELLM_API_KEY="your-api-key" +./scripts/health_check/run_parallel_health_checks.sh 16 +``` + + +## Configuration + +### Environment Variables + +- `LITELLM_BASE_URL` (required): Base URL of the LiteLLM proxy + - Example: `https://litellm.example.com` +- `LITELLM_API_KEY` (required): API key for authentication +- `LITELLM_MODELS_YAML` (optional): Path to YAML config file with model_list + - If provided, reads models from YAML instead of fetching from API + - Example: `/path/to/config.yaml` +- `LITELLM_TIMEOUT` (optional): Request timeout in seconds (default: 120) +- `LITELLM_COMPLETION_PROMPT` (optional): Test prompt for chat/completion models (default: "Say this is a test") +- `LITELLM_EMBEDDING_TEXT` (optional): Test text for embedding models (default: "This is a test for vectorization.") +- `LITELLM_JSON_OUTPUT` (optional): Output results as JSON (default: false) + +## Output + +### Standard Output (Human-Readable) + +Example output format: + +``` +============================================================ +Starting health check queries + +---- gpt-4o ---- +✅ Success. Response: +This is a test + +---- text-embedding-3-small ---- +✅ Success. Generated embedding vector with 1536 dimensions. + +---- gpt-5-codex ---- +❌ ERROR: HTTP 503: Service unavailable + +============================================================ +Health Check Summary +============================================================ +Total models: 47 +Healthy: 45 +Unhealthy: 2 +============================================================ +``` + +Exit code: `0` if all models are healthy, `1` if any models are unhealthy. + +### JSON Output + +When `LITELLM_JSON_OUTPUT=true`, outputs JSON: + +```json +{ + "gpt-4o": { + "model": "gpt-4o", + "healthy": true, + "error": null, + "response_time_ms": 245.67, + "mode": "chat", + "response_text": "This is a test" + }, + "text-embedding-3-small": { + "model": "text-embedding-3-small", + "healthy": true, + "error": null, + "response_time_ms": 123.45, + "mode": "embedding", + "dimensions": 1536 + } +} +``` + +## How It Works + +1. **Model Discovery**: + - If `LITELLM_MODELS_YAML` is set: Reads models from YAML config file + - Otherwise: Queries `/v1/models` (OpenAI-compatible) or `/model/info` to get all configured models +2. **Mode Detection**: + - Checks `mode` field from YAML config, or falls back to model name patterns (embedding, embed, text-embedding) +3. **Concurrent Testing**: + - Chat models: `POST /v1/chat/completions` with configurable prompt (default: "Say this is a test") + - Embedding models: `POST /v1/embeddings` with configurable text (default: "This is a test for vectorization.") +4. **Reporting**: Health status, errors, response times, and response details are reported + +## Use Cases + +### 1. Regular Health Monitoring + +Run as a cron job or scheduled task: + +```bash +# Cron job: Run every 5 minutes +*/5 * * * * /path/to/health_check.sh +``` + +### 2. Load/Stress Testing + +Run multiple health checks in parallel: + +**PowerShell:** +```powershell +.\scripts\health_check\run_parallel_health_checks.ps1 16 +``` + +### 3. CI/CD Integration + +Add to your deployment pipeline: + +```yaml +# GitHub Actions example +- name: Health Check + run: | + docker run --rm \ + -e LITELLM_BASE_URL="${{ secrets.LITELLM_BASE_URL }}" \ + -e LITELLM_API_KEY="${{ secrets.LITELLM_API_KEY }}" \ + litellm/litellm-health-check:latest +``` + +### 4. Kubernetes Deployment + +Deploy as a CronJob: + +```yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: litellm-health-check +spec: + schedule: "*/5 * * * *" # Every 5 minutes + jobTemplate: + spec: + template: + spec: + containers: + - name: health-check + image: litellm/litellm-health-check:latest + env: + - name: LITELLM_BASE_URL + value: "https://litellm.example.com" + - name: LITELLM_API_KEY + valueFrom: + secretKeyRef: + name: litellm-secrets + key: api-key + restartPolicy: OnFailure +``` + +## Troubleshooting + +### No Models Found + +- Verify `LITELLM_BASE_URL` is correct +- Check that the API key has permissions to list models +- Ensure the proxy is running and accessible +- If using YAML, verify `LITELLM_MODELS_YAML` path is correct + +### Timeout Errors + +- Increase `LITELLM_TIMEOUT` for slower models (default is 120s) +- Check network connectivity to the proxy +- Verify proxy isn't overloaded + +### Authentication Errors + +- Verify `LITELLM_API_KEY` is correct +- Check API key has not expired +- Ensure the key has necessary permissions + +## Dependencies + +- Python 3.11+ +- httpx (for async HTTP requests) +- pyyaml (for YAML config file support) +- Docker or Podman (for containerized execution) +- PowerShell (for parallel execution script on Windows) + +## License + +Same as LiteLLM project. diff --git a/scripts/health_check/health_check_requirements.txt b/scripts/health_check/health_check_requirements.txt new file mode 100644 index 0000000000..c9d2650c88 --- /dev/null +++ b/scripts/health_check/health_check_requirements.txt @@ -0,0 +1,2 @@ +httpx>=0.24.0 +pyyaml>=6.0 diff --git a/scripts/health_check/run_parallel_health_checks.ps1 b/scripts/health_check/run_parallel_health_checks.ps1 new file mode 100644 index 0000000000..856e7f20ec --- /dev/null +++ b/scripts/health_check/run_parallel_health_checks.ps1 @@ -0,0 +1,69 @@ +# Parallel LiteLLM Health Check Runner (PowerShell version) +# +# This script runs multiple health check containers in parallel. +# +# Usage: +# $env:LITELLM_BASE_URL="https://litellm.example.com" +# $env:LITELLM_API_KEY="your-api-key" +# .\run_parallel_health_checks.ps1 [num_parallel_jobs] [image_name] +# +# Defaults: +# - num_parallel_jobs: 16 +# - image_name: litellm/litellm-health-check:latest + +param( + [int]$NumParallelJobs = 16, + [string]$ImageName = "litellm/litellm-health-check:latest", + [string]$ContainerRuntime = "docker" +) + +# Set defaults for environment variables if not provided +if (-not $env:LITELLM_BASE_URL) { + $env:LITELLM_BASE_URL = "https://litellm-perf-cache-and-router.onrender.com" + Write-Warning "LITELLM_BASE_URL not set, using default: $env:LITELLM_BASE_URL" +} + +if (-not $env:LITELLM_API_KEY) { + $env:LITELLM_API_KEY = "sk-1234" + Write-Warning "LITELLM_API_KEY not set, using default: $env:LITELLM_API_KEY" +} + +# Check if container runtime is available +$runtimeExists = Get-Command $ContainerRuntime -ErrorAction SilentlyContinue +if (-not $runtimeExists) { + Write-Error "Error: $ContainerRuntime is not installed" + exit 1 +} + +Write-Host "Running $NumParallelJobs parallel health check containers..." -ForegroundColor Yellow +Write-Host "Using image: $ImageName" -ForegroundColor Yellow +Write-Host "Container runtime: $ContainerRuntime" -ForegroundColor Yellow +Write-Host "LiteLLM Base URL: $env:LITELLM_BASE_URL" -ForegroundColor Cyan +Write-Host "" +Write-Host "NOTE: This will run continuously. Press Ctrl+C to stop." -ForegroundColor Red +Write-Host "" +Write-Host "Troubleshooting:" -ForegroundColor Yellow +Write-Host " - If you see 'All connection attempts failed', check:" -ForegroundColor Yellow +Write-Host " 1. Is the LiteLLM proxy running on the expected port?" -ForegroundColor Yellow +Write-Host " 2. Set LITELLM_BASE_URL to the correct URL (e.g., http://host.docker.internal:PORT)" -ForegroundColor Yellow +Write-Host " 3. On Linux, you may need to use the host IP instead of host.docker.internal" -ForegroundColor Yellow +Write-Host "" + +# Run parallel health checks +# This creates an infinite loop that keeps spawning containers +# Each container tests all models, then exits, and a new one starts +while ($true) { + # Start up to NumParallelJobs containers in parallel + 1..$NumParallelJobs | ForEach-Object -Parallel { + $runtime = $using:ContainerRuntime + $imageName = $using:ImageName + $baseUrl = $env:LITELLM_BASE_URL + $apiKey = $env:LITELLM_API_KEY + + & $runtime run --rm ` + -e LITELLM_BASE_URL="$baseUrl" ` + -e LITELLM_API_KEY="$apiKey" ` + -e LITELLM_JSON_OUTPUT="true" ` + $imageName + } -ThrottleLimit $NumParallelJobs +} diff --git a/scripts/health_check/run_parallel_health_checks.sh b/scripts/health_check/run_parallel_health_checks.sh new file mode 100644 index 0000000000..9b6c5d9f39 --- /dev/null +++ b/scripts/health_check/run_parallel_health_checks.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# Parallel LiteLLM Health Check Runner (Bash version) +# +# This script runs multiple health check containers in parallel. +# +# Usage: +# export LITELLM_BASE_URL="https://litellm.example.com" +# export LITELLM_API_KEY="your-api-key" +# ./run_parallel_health_checks.sh [num_parallel_jobs] [image_name] [container_runtime] +# +# Defaults: +# - num_parallel_jobs: 16 +# - image_name: litellm/litellm-health-check:latest +# - container_runtime: docker + +set -e + +# Default values +NUM_PARALLEL_JOBS="${1:-16}" +IMAGE_NAME="${2:-litellm/litellm-health-check:latest}" +CONTAINER_RUNTIME="${3:-docker}" + +# Set defaults for environment variables if not provided +if [ -z "$LITELLM_BASE_URL" ]; then + export LITELLM_BASE_URL="https://litellm-perf-cache-and-router.onrender.com" + echo "Warning: LITELLM_BASE_URL not set, using default: $LITELLM_BASE_URL" >&2 +fi + +if [ -z "$LITELLM_API_KEY" ]; then + export LITELLM_API_KEY="sk-1234" + echo "Warning: LITELLM_API_KEY not set, using default: $LITELLM_API_KEY" >&2 +fi + +# Check if container runtime is available +if ! command -v "$CONTAINER_RUNTIME" &> /dev/null; then + echo "Error: $CONTAINER_RUNTIME is not installed" >&2 + exit 1 +fi + +# Print configuration +echo "Running $NUM_PARALLEL_JOBS parallel health check containers..." +echo "Using image: $IMAGE_NAME" +echo "Container runtime: $CONTAINER_RUNTIME" +echo "LiteLLM Base URL: $LITELLM_BASE_URL" +echo "" +echo "NOTE: This will run continuously. Press Ctrl+C to stop." +echo "" +echo "Troubleshooting:" +echo " - If you see 'All connection attempts failed', check:" +echo " 1. Is the LiteLLM proxy running on the expected port?" +echo " 2. Set LITELLM_BASE_URL to the correct URL (e.g., http://host.docker.internal:PORT)" +echo " 3. On Linux, you may need to use the host IP instead of host.docker.internal" +echo "" + +# Function to run a single health check container +run_health_check() { + "$CONTAINER_RUNTIME" run --rm \ + -e LITELLM_BASE_URL="$LITELLM_BASE_URL" \ + -e LITELLM_API_KEY="$LITELLM_API_KEY" \ + -e LITELLM_JSON_OUTPUT="true" \ + "$IMAGE_NAME" +} + +# Run parallel health checks +# This creates an infinite loop that keeps spawning containers +# Each container tests all models, then exits, and a new one starts +while true; do + # Start containers in parallel using background jobs + pids=() + for ((i=1; i<=NUM_PARALLEL_JOBS; i++)); do + run_health_check & + pids+=($!) + done + + # Wait for all background jobs to complete + for pid in "${pids[@]}"; do + wait "$pid" 2>/dev/null || true + done +done