Add health endpoint tests to CI with database and Redis support (#17877)

- Add database and Redis setup to litellm_mapped_tests_proxy job in CircleCI - Create shared test helpers in tests/test_litellm/proxy/conftest.py for proxy test setup - Refactor health endpoint tests to use shared helpers from conftest - Support automatic Redis cache configuration when REDIS_HOST is set - Ensure minimal config is created when Redis/database is needed
2025-12-12 07:35:50 -08:00 · 2025-12-12 07:35:50 -08:00 · c9063d13b1
commit c9063d13b1
parent 762b429d6c
3 changed files with 357 additions and 4 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@ -1388,6 +1388,53 @@ jobs:
    resource_class: xlarge
    steps:
      - setup_litellm_test_deps
+      - run:
+          name: Install dockerize
+          command: |
+            wget https://github.com/jwilder/dockerize/releases/download/v0.6.1/dockerize-linux-amd64-v0.6.1.tar.gz
+            sudo tar -C /usr/local/bin -xzvf dockerize-linux-amd64-v0.6.1.tar.gz
+            rm dockerize-linux-amd64-v0.6.1.tar.gz
+      - run:
+          name: Start PostgreSQL Database
+          command: |
+            docker run -d \
+              --name postgres-db \
+              -e POSTGRES_USER=postgres \
+              -e POSTGRES_PASSWORD=postgres \
+              -e POSTGRES_DB=circle_test \
+              -p 5432:5432 \
+              postgres:14
+      - run:
+          name: Wait for PostgreSQL to be ready
+          command: dockerize -wait tcp://localhost:5432 -timeout 1m
+      - run:
+          name: Start Redis
+          command: |
+            docker run -d \
+              --name redis-db \
+              -p 6379:6379 \
+              redis:7-alpine
+      - run:
+          name: Wait for Redis to be ready
+          command: dockerize -wait tcp://localhost:6379 -timeout 1m
+      - run:
+          name: Set DATABASE_URL and Redis environment variables
+          command: |
+            echo 'export DATABASE_URL="postgresql://postgres:postgres@localhost:5432/circle_test"' >> $BASH_ENV
+            echo 'export REDIS_HOST="localhost"' >> $BASH_ENV
+            echo 'export REDIS_PORT="6379"' >> $BASH_ENV
+            source $BASH_ENV
+      - run:
+          name: Install prisma for database setup
+          command: |
+            pip install prisma==0.11.0
+      - run:
+          name: Run prisma ./docker/entrypoint.sh
+          command: |
+            set +e
+            chmod +x docker/entrypoint.sh
+            ./docker/entrypoint.sh
+            set -e
      - run:
          name: Run proxy tests
          command: |
--- a/tests/test_litellm/proxy/conftest.py
+++ b/tests/test_litellm/proxy/conftest.py
@ -0,0 +1,166 @@
+"""
+Shared fixtures and helpers for proxy tests.
+
+This module provides reusable utilities for creating proxy test clients
+with database and Redis cache configuration.
+"""
+import asyncio
+import os
+import tempfile
+from typing import Dict, Optional
+
+import pytest
+import yaml
+from fastapi.testclient import TestClient
+
+
+def build_cache_config(enable_cache: bool = True) -> Optional[Dict]:
+    """
+    Build Redis cache configuration from environment variables.
+    
+    Args:
+        enable_cache: Whether to enable cache (default: True)
+    
+    Returns:
+        dict: Cache configuration dict with 'cache' and 'cache_params' keys, or None
+    """
+    if not enable_cache:
+        return None
+    
+    redis_host = os.getenv("REDIS_HOST")
+    if not redis_host:
+        return None
+    
+    redis_port = os.getenv("REDIS_PORT", "6379")
+    cache_params = {
+        "type": "redis",
+        "host": redis_host,
+        "port": int(redis_port) if redis_port.isdigit() else redis_port,
+    }
+    
+    redis_password = os.getenv("REDIS_PASSWORD")
+    if redis_password:
+        cache_params["password"] = redis_password
+    
+    return {
+        "cache": True,
+        "cache_params": cache_params
+    }
+
+
+def build_minimal_proxy_config(database_url: Optional[str] = None, **init_options) -> Dict:
+    """
+    Build a minimal proxy configuration YAML.
+    
+    Args:
+        database_url: Optional database URL (falls back to DATABASE_URL env var)
+        **init_options: Additional configuration options:
+            - master_key: API key for authentication (default: "sk-1234")
+            - enable_cache: Whether to enable Redis cache (default: True)
+            - success_callback: Callback function for success events
+    
+    Returns:
+        dict: Configuration dictionary ready to be written as YAML
+    """
+    config = {
+        "general_settings": {
+            "master_key": init_options.get("master_key", "sk-1234")
+        },
+        "litellm_settings": {}
+    }
+    
+    # Configure database
+    db_url = database_url or os.getenv("DATABASE_URL")
+    if db_url:
+        config["general_settings"]["database_url"] = db_url
+    
+    # Configure cache if Redis is available
+    enable_cache = init_options.get("enable_cache", True)
+    cache_config = build_cache_config(enable_cache=enable_cache)
+    if cache_config:
+        config["litellm_settings"].update(cache_config)
+    
+    # Add success_callback if provided (for realistic readiness endpoint)
+    if init_options.get("success_callback") is not None:
+        config["litellm_settings"]["success_callback"] = init_options["success_callback"]
+    
+    # Add any other litellm_settings from init_options
+    excluded_keys = {"master_key", "debug", "success_callback", "database_url", "enable_cache"}
+    for key, value in init_options.items():
+        if key not in excluded_keys and key not in config["litellm_settings"]:
+            config["litellm_settings"][key] = value
+    
+    return config
+
+
+def set_proxy_environment_variables(monkeypatch, database_url: Optional[str] = None) -> None:
+    """
+    Set environment variables for database and Redis.
+    
+    Args:
+        monkeypatch: pytest monkeypatch fixture
+        database_url: Optional database URL (falls back to DATABASE_URL env var)
+    """
+    # Set database URL
+    db_url = database_url or os.getenv("DATABASE_URL")
+    if db_url:
+        monkeypatch.setenv("DATABASE_URL", db_url)
+    
+    # Set Redis environment variables if available
+    redis_host = os.getenv("REDIS_HOST")
+    if redis_host:
+        monkeypatch.setenv("REDIS_HOST", redis_host)
+        monkeypatch.setenv("REDIS_PORT", os.getenv("REDIS_PORT", "6379"))
+        redis_password = os.getenv("REDIS_PASSWORD")
+        if redis_password:
+            monkeypatch.setenv("REDIS_PASSWORD", redis_password)
+
+
+def create_proxy_test_client(monkeypatch, database_url: Optional[str] = None, **init_options) -> TestClient:
+    """
+    Create a proxy TestClient with optional database and Redis cache configuration.
+    
+    Args:
+        monkeypatch: pytest monkeypatch fixture
+        database_url: Optional database URL (falls back to DATABASE_URL env var)
+        **init_options: Additional configuration options:
+            - master_key: API key for authentication (default: "sk-1234")
+            - enable_cache: Whether to enable Redis cache (default: True)
+            - success_callback: Callback function for success events
+            - debug: Enable debug mode
+    
+    Returns:
+        TestClient: FastAPI test client for the proxy server
+    """
+    from litellm.proxy.proxy_server import cleanup_router_config_variables, initialize, app
+
+    cleanup_router_config_variables()
+    
+    # Get config file path
+    filepath = os.path.dirname(os.path.abspath(__file__))
+    default_config_fp = os.path.join(filepath, "test_configs", "test_config_no_auth.yaml")
+    
+    # Check if we need to create a minimal config with Redis/database
+    enable_cache = init_options.get("enable_cache", True)
+    needs_redis = enable_cache and os.getenv("REDIS_HOST") is not None
+    needs_db = (database_url or os.getenv("DATABASE_URL")) is not None
+    
+    # Create minimal config if:
+    # 1. Default config file doesn't exist, OR
+    # 2. We need Redis/database config that might not be in the default config
+    if not os.path.exists(default_config_fp) or needs_redis or needs_db:
+        minimal_config = build_minimal_proxy_config(database_url=database_url, **init_options)
+        
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
+            yaml.dump(minimal_config, f)
+            config_fp = f.name
+    else:
+        config_fp = default_config_fp
+    
+    # Set environment variables
+    set_proxy_environment_variables(monkeypatch, database_url=database_url)
+    
+    # Initialize proxy
+    asyncio.run(initialize(config=config_fp, debug=init_options.get("debug", False)))
+    return TestClient(app)
+
--- a/tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py
+++ b/tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py
@ -1,7 +1,6 @@
-import asyncio
-import json
 import os
 import sys
+import time
 from datetime import datetime, timedelta
 from unittest.mock import MagicMock, patch, AsyncMock

@ -11,14 +10,15 @@ sys.path.insert(

 import pytest
 from prisma.errors import ClientNotConnectedError, HTTPClientClosedError, PrismaError
-
-from litellm.proxy._types import ProxyErrorTypes, ProxyException
 from litellm.proxy.health_endpoints._health_endpoints import (
    _db_health_readiness_check,
    db_health_cache,
    health_services_endpoint,
 )

+# Import shared proxy test helpers from conftest
+from tests.test_litellm.proxy.conftest import create_proxy_test_client
+

@pytest.mark.asyncio
@pytest.mark.parametrize(
@ -126,3 +126,143 @@ async def test_health_services_endpoint_sqs(status, error_message):
        assert result["message"] == error_message
        mock_instance.async_health_check.assert_awaited_once()

+
+@pytest.fixture(scope="function")
+def proxy_client(monkeypatch):
+    """
+    Fixture that starts a proxy server instance for testing.
+    Uses the actual FastAPI app from proxy_server which includes all routers.
+    
+    Note: TestClient doesn't start a real HTTP server - it runs the FastAPI app
+    in-process. However, it DOES trigger FastAPI's lifespan events (startup/shutdown)
+    when used as a context manager, which initializes the proxy server components.
+    
+    Database access:
+    - If DATABASE_URL is set in environment, the proxy will automatically connect
+    - Database connection happens during lifespan startup events
+    - To enable database access, set DATABASE_URL environment variable before running tests
+    
+    Redis cache:
+    - If REDIS_HOST is set in environment, Redis cache will be automatically configured
+    - Cache configuration is included in /health/readiness endpoint response
+    """
+    client = create_proxy_test_client(monkeypatch)
+    with client:
+        yield client
+
+
+def test_health_liveliness_endpoint(proxy_client):
+    """
+    Test that /health/liveliness endpoint returns 200 OK with "I'm alive!" message.
+    This is a critical orchestration endpoint that must be simple and fast.
+    """
+    # Measure the time taken for the health check call
+    start_time = time.perf_counter()
+    
+    # Make GET request to /health/liveliness
+    response = proxy_client.get("/health/liveliness")
+    
+    end_time = time.perf_counter()
+    duration_ms = (end_time - start_time) * 1000
+    
+    # Assert response status
+    assert response.status_code == 200, f"Expected 200 OK, got {response.status_code}: {response.text}"
+    
+    # Assert response content (FastAPI JSON-encodes the string)
+    assert response.json() == "I'm alive!", f"Expected 'I'm alive!' message, got: {response.json()}"
+    
+    # Verify response is fast (should be < 100ms for a simple endpoint)
+    # This is critical for orchestration systems that poll frequently
+    assert duration_ms < 100, f"Health check took {duration_ms:.2f}ms, expected < 100ms for a simple endpoint"
+    
+    # Log the duration for visibility (useful for CI/CD monitoring)
+    print(f"\n/health/liveliness response time: {duration_ms:.2f}ms")
+
+
+def test_health_liveness_endpoint(proxy_client):
+    """
+    Test that /health/liveness endpoint (Kubernetes standard name) also works.
+    """
+    # Measure the time taken for the health check call
+    start_time = time.perf_counter()
+    
+    # Make GET request to /health/liveness
+    response = proxy_client.get("/health/liveness")
+    
+    end_time = time.perf_counter()
+    duration_ms = (end_time - start_time) * 1000
+    
+    # Assert response status
+    assert response.status_code == 200, f"Expected 200 OK, got {response.status_code}: {response.text}"
+    
+    # Assert response content (FastAPI JSON-encodes the string)
+    assert response.json() == "I'm alive!", f"Expected 'I'm alive!' message, got: {response.json()}"
+    
+    # Verify response is fast (should be < 100ms for a simple endpoint)
+    assert duration_ms < 100, f"Health check took {duration_ms:.2f}ms, expected < 100ms for a simple endpoint"
+    
+    # Log the duration for visibility (useful for CI/CD monitoring)
+    print(f"\n/health/liveness response time: {duration_ms:.2f}ms")
+
+
+def test_health_readiness_with_database(proxy_client):
+    """
+    Test /health/readiness endpoint when database is available.
+    This test requires DATABASE_URL to be set in the environment.
+    
+    Example:
+        DATABASE_URL=postgresql://user:pass@localhost:5432/litellm pytest tests/test_litellm/proxy/health_endpoints/test_health_endpoints.py::test_health_readiness_with_database -v
+    """
+    # Measure the time taken for the health check call
+    start_time = time.perf_counter()
+    
+    # Make GET request to /health/readiness
+    response = proxy_client.get("/health/readiness")
+    
+    end_time = time.perf_counter()
+    duration_ms = (end_time - start_time) * 1000
+    
+    # Assert response status
+    assert response.status_code == 200, f"Expected 200 OK, got {response.status_code}: {response.text}"
+    
+    # Verify response is fast (readiness includes DB check, so < 500ms is reasonable)
+    # This is critical for orchestration systems (Kubernetes) that poll frequently
+    assert duration_ms < 500, f"Health check took {duration_ms:.2f}ms, expected < 500ms for readiness endpoint (includes DB check)"
+    
+    # Assert response contains expected fields
+    response_data = response.json()
+    assert "status" in response_data, "Response should contain 'status' field"
+    assert "litellm_version" in response_data, "Response should contain 'litellm_version' field"
+    
+    # Display all health endpoint response fields (matches what /health/readiness returns)
+    print("\n" + "-"*60)
+    print("HEALTH ENDPOINT RESPONSE")
+    print("-"*60)
+    print(f"Status: {response_data.get('status', 'unknown')}")
+    print(f"Database: {response_data.get('db', 'not reported')}")
+    print(f"LiteLLM Version: {response_data.get('litellm_version', 'unknown')}")
+    print(f"Success Callbacks: {response_data.get('success_callbacks', [])}")
+    print(f"Cache: {response_data.get('cache', 'none')}")
+    print(f"Use AioHTTP Transport: {response_data.get('use_aiohttp_transport', 'unknown')}")
+    print(f"Response time: {duration_ms:.2f}ms")
+    
+    # If database is connected, verify it's reported
+    if "db" in response_data:
+        db_status = response_data["db"]
+        print(f"\n✓ Database status from endpoint: {db_status}")
+        
+        # Database status should be "connected" or similar when DB is available
+        assert db_status in ["connected", "disconnected", "unknown"], \
+            f"Unexpected db status: {db_status}"
+        
+        if db_status == "connected":
+            print("  ✓ Database connection confirmed by proxy server")
+        elif db_status == "disconnected":
+            print("  ⚠ Database reported as disconnected by proxy server")
+        else:
+            print(f"  ? Database status: {db_status}")
+    else:
+        print("\n⚠ Database status not reported in health endpoint response")
+    
+    print("="*60 + "\n")
+