diff --git a/.github/observatory/litellm_config.yaml b/.github/observatory/litellm_config.yaml new file mode 100644 index 0000000000..fe95c023bc --- /dev/null +++ b/.github/observatory/litellm_config.yaml @@ -0,0 +1,19 @@ +# LiteLLM Observatory Test Configuration +# This config is used by CI to spin up a temporary LiteLLM instance +# for running observatory tests against RC/stable releases. +# +# Add model definitions for the providers you want to test. +# Provider API keys are injected via environment variables in CI. + +model_list: + - model_name: gpt-4o + litellm_params: + model: azure/gpt-4o + api_key: os.environ/AZURE_API_KEY + api_base: os.environ/AZURE_API_BASE + + - model_name: gpt-4o-mini + litellm_params: + model: azure/gpt-4o-mini + api_key: os.environ/AZURE_API_KEY + api_base: os.environ/AZURE_API_BASE diff --git a/.github/workflows/ghcr_deploy.yml b/.github/workflows/ghcr_deploy.yml index f67538a427..c317309d91 100644 --- a/.github/workflows/ghcr_deploy.yml +++ b/.github/workflows/ghcr_deploy.yml @@ -299,6 +299,15 @@ jobs: ${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-spend_logs:main-stable', env.REGISTRY) || '' }} platforms: local,linux/amd64,linux/arm64,linux/arm64/v8 + run-observatory-tests: + if: github.event.inputs.release_type == 'rc' || github.event.inputs.release_type == 'stable' + needs: [docker-hub-deploy] + uses: ./.github/workflows/run_observatory_tests.yml + with: + tag: ${{ github.event.inputs.tag }} + commit_hash: ${{ github.event.inputs.commit_hash }} + secrets: inherit + build-and-push-helm-chart: if: github.event.inputs.release_type != 'dev' needs: [docker-hub-deploy, build-and-push-image, build-and-push-image-database] diff --git a/.github/workflows/run_observatory_tests.yml b/.github/workflows/run_observatory_tests.yml new file mode 100644 index 0000000000..f526567bdc --- /dev/null +++ b/.github/workflows/run_observatory_tests.yml @@ -0,0 +1,144 @@ +name: Run Observatory Tests +on: + workflow_dispatch: + inputs: + tag: + description: "Docker image tag to test (e.g. v1.61.0.rc1)" + required: true + type: string + commit_hash: + description: "Commit hash (defaults to HEAD of current branch)" + required: false + type: string + workflow_call: + inputs: + tag: + description: "Docker image tag to test" + required: true + type: string + commit_hash: + description: "Commit hash of the release" + required: true + type: string + +env: + LITELLM_MASTER_KEY: ${{ secrets.LITELLM_MASTER_KEY_STAGING }} + +jobs: + observatory-tests: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit_hash || github.sha }} + + - name: Start LiteLLM container + run: | + docker run -d \ + --name litellm-rc \ + -p 4000:4000 \ + -v ${{ github.workspace }}/.github/observatory/litellm_config.yaml:/app/config.yaml \ + -e LITELLM_MASTER_KEY="${{ env.LITELLM_MASTER_KEY }}" \ + -e AZURE_API_KEY="${{ secrets.AZURE_API_KEY }}" \ + -e AZURE_API_BASE="${{ secrets.AZURE_API_BASE }}" \ + litellm/litellm:${{ inputs.tag }} \ + --config /app/config.yaml --port 4000 + + - name: Wait for LiteLLM health check + run: | + echo "Waiting for LiteLLM to be ready..." + for i in $(seq 1 30); do + if curl -s -f http://localhost:4000/health/liveliness > /dev/null 2>&1; then + echo "LiteLLM is healthy" + exit 0 + fi + echo "Attempt $i/30 - not ready yet, waiting 10s..." + sleep 10 + done + echo "LiteLLM failed to start within 5 minutes" + docker logs litellm-rc + exit 1 + + - name: Start cloudflared tunnel + run: | + # Install cloudflared + curl -sL https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -o /usr/local/bin/cloudflared + chmod +x /usr/local/bin/cloudflared + + # Start a quick tunnel (no account needed) and capture the URL + cloudflared tunnel --url http://localhost:4000 --no-autoupdate > /tmp/cloudflared.log 2>&1 & + CLOUDFLARED_PID=$! + echo "CLOUDFLARED_PID=$CLOUDFLARED_PID" >> $GITHUB_ENV + + # Wait for tunnel URL to appear in logs + echo "Waiting for tunnel URL..." + for i in $(seq 1 30); do + TUNNEL_URL=$(grep -oP 'https://[a-z0-9-]+\.trycloudflare\.com' /tmp/cloudflared.log | head -1 || true) + if [ -n "$TUNNEL_URL" ]; then + echo "Tunnel URL: $TUNNEL_URL" + echo "TUNNEL_URL=$TUNNEL_URL" >> $GITHUB_ENV + exit 0 + fi + sleep 2 + done + echo "Failed to get tunnel URL" + cat /tmp/cloudflared.log + exit 1 + + - name: Verify tunnel connectivity + run: | + echo "Testing tunnel at ${{ env.TUNNEL_URL }}..." + curl -sf "${{ env.TUNNEL_URL }}/health/liveliness" + echo "Tunnel is working" + + - name: Trigger observatory test run + run: | + OBSERVATORY_URL="${{ secrets.OBSERVATORY_URL }}" + RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "${OBSERVATORY_URL}/run-test" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${{ secrets.OBSERVATORY_API_KEY }}" \ + -d '{ + "deployment_url": "${{ env.TUNNEL_URL }}", + "api_key": "${{ env.LITELLM_MASTER_KEY }}", + "test_suite": "TestOAIAzureRelease", + "models": ["gpt-4o-mini", "gpt-4o"] + }') + HTTP_CODE=$(echo "$RESPONSE" | tail -1) + BODY=$(echo "$RESPONSE" | head -n -1) + echo "Response ($HTTP_CODE): $BODY" + if [ "$HTTP_CODE" -ge 400 ]; then + echo "Failed to trigger test run" + exit 1 + fi + + - name: Poll for test completion + run: | + OBSERVATORY_URL="${{ secrets.OBSERVATORY_URL }}" + TIMEOUT=900 # 15 minutes + INTERVAL=30 + ELAPSED=0 + while [ $ELAPSED -lt $TIMEOUT ]; do + STATUS=$(curl -s "${OBSERVATORY_URL}/queue-status" \ + -H "Authorization: Bearer ${{ secrets.OBSERVATORY_API_KEY }}") + echo "Queue status (${ELAPSED}s elapsed): $STATUS" + + PENDING=$(echo "$STATUS" | jq -r '.pending // 0') + ACTIVE=$(echo "$STATUS" | jq -r '.active // 0') + + if [ "$PENDING" = "0" ] && [ "$ACTIVE" = "0" ]; then + echo "All tests completed" + exit 0 + fi + + sleep $INTERVAL + ELAPSED=$((ELAPSED + INTERVAL)) + done + echo "Timed out waiting for tests to complete after ${TIMEOUT}s" + exit 1 + + - name: Print LiteLLM logs on failure + if: failure() + run: | + docker logs litellm-rc + cat /tmp/cloudflared.log 2>/dev/null || true diff --git a/.gitignore b/.gitignore index c43df98a9e..76cf6fdba2 100644 --- a/.gitignore +++ b/.gitignore @@ -89,6 +89,7 @@ tests/test_custom_dir/* test.py litellm_config.yaml +!.github/observatory/litellm_config.yaml .cursor .vscode/launch.json litellm/proxy/to_delete_loadtest_work/*