Add observatory test workflow for RC/stable releases
- New reusable workflow that spins up a LiteLLM container from the release image, exposes it via cloudflared tunnel, and triggers test runs on the Railway-hosted observatory - Integrates into ghcr_deploy.yml for RC and stable releases - Can also be triggered manually via workflow_dispatch - Add placeholder litellm_config.yaml for observatory test models Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
8053be60df
commit
d7dd7ef33b
19
.github/observatory/litellm_config.yaml
vendored
Normal file
19
.github/observatory/litellm_config.yaml
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
# LiteLLM Observatory Test Configuration
|
||||
# This config is used by CI to spin up a temporary LiteLLM instance
|
||||
# for running observatory tests against RC/stable releases.
|
||||
#
|
||||
# Add model definitions for the providers you want to test.
|
||||
# Provider API keys are injected via environment variables in CI.
|
||||
|
||||
model_list:
|
||||
- model_name: gpt-4o
|
||||
litellm_params:
|
||||
model: azure/gpt-4o
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
|
||||
- model_name: gpt-4o-mini
|
||||
litellm_params:
|
||||
model: azure/gpt-4o-mini
|
||||
api_key: os.environ/AZURE_API_KEY
|
||||
api_base: os.environ/AZURE_API_BASE
|
||||
9
.github/workflows/ghcr_deploy.yml
vendored
9
.github/workflows/ghcr_deploy.yml
vendored
@ -299,6 +299,15 @@ jobs:
|
||||
${{ github.event.inputs.release_type == 'stable' && format('{0}/berriai/litellm-spend_logs:main-stable', env.REGISTRY) || '' }}
|
||||
platforms: local,linux/amd64,linux/arm64,linux/arm64/v8
|
||||
|
||||
run-observatory-tests:
|
||||
if: github.event.inputs.release_type == 'rc' || github.event.inputs.release_type == 'stable'
|
||||
needs: [docker-hub-deploy]
|
||||
uses: ./.github/workflows/run_observatory_tests.yml
|
||||
with:
|
||||
tag: ${{ github.event.inputs.tag }}
|
||||
commit_hash: ${{ github.event.inputs.commit_hash }}
|
||||
secrets: inherit
|
||||
|
||||
build-and-push-helm-chart:
|
||||
if: github.event.inputs.release_type != 'dev'
|
||||
needs: [docker-hub-deploy, build-and-push-image, build-and-push-image-database]
|
||||
|
||||
144
.github/workflows/run_observatory_tests.yml
vendored
Normal file
144
.github/workflows/run_observatory_tests.yml
vendored
Normal file
@ -0,0 +1,144 @@
|
||||
name: Run Observatory Tests
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: "Docker image tag to test (e.g. v1.61.0.rc1)"
|
||||
required: true
|
||||
type: string
|
||||
commit_hash:
|
||||
description: "Commit hash (defaults to HEAD of current branch)"
|
||||
required: false
|
||||
type: string
|
||||
workflow_call:
|
||||
inputs:
|
||||
tag:
|
||||
description: "Docker image tag to test"
|
||||
required: true
|
||||
type: string
|
||||
commit_hash:
|
||||
description: "Commit hash of the release"
|
||||
required: true
|
||||
type: string
|
||||
|
||||
env:
|
||||
LITELLM_MASTER_KEY: ${{ secrets.LITELLM_MASTER_KEY_STAGING }}
|
||||
|
||||
jobs:
|
||||
observatory-tests:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ inputs.commit_hash || github.sha }}
|
||||
|
||||
- name: Start LiteLLM container
|
||||
run: |
|
||||
docker run -d \
|
||||
--name litellm-rc \
|
||||
-p 4000:4000 \
|
||||
-v ${{ github.workspace }}/.github/observatory/litellm_config.yaml:/app/config.yaml \
|
||||
-e LITELLM_MASTER_KEY="${{ env.LITELLM_MASTER_KEY }}" \
|
||||
-e AZURE_API_KEY="${{ secrets.AZURE_API_KEY }}" \
|
||||
-e AZURE_API_BASE="${{ secrets.AZURE_API_BASE }}" \
|
||||
litellm/litellm:${{ inputs.tag }} \
|
||||
--config /app/config.yaml --port 4000
|
||||
|
||||
- name: Wait for LiteLLM health check
|
||||
run: |
|
||||
echo "Waiting for LiteLLM to be ready..."
|
||||
for i in $(seq 1 30); do
|
||||
if curl -s -f http://localhost:4000/health/liveliness > /dev/null 2>&1; then
|
||||
echo "LiteLLM is healthy"
|
||||
exit 0
|
||||
fi
|
||||
echo "Attempt $i/30 - not ready yet, waiting 10s..."
|
||||
sleep 10
|
||||
done
|
||||
echo "LiteLLM failed to start within 5 minutes"
|
||||
docker logs litellm-rc
|
||||
exit 1
|
||||
|
||||
- name: Start cloudflared tunnel
|
||||
run: |
|
||||
# Install cloudflared
|
||||
curl -sL https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -o /usr/local/bin/cloudflared
|
||||
chmod +x /usr/local/bin/cloudflared
|
||||
|
||||
# Start a quick tunnel (no account needed) and capture the URL
|
||||
cloudflared tunnel --url http://localhost:4000 --no-autoupdate > /tmp/cloudflared.log 2>&1 &
|
||||
CLOUDFLARED_PID=$!
|
||||
echo "CLOUDFLARED_PID=$CLOUDFLARED_PID" >> $GITHUB_ENV
|
||||
|
||||
# Wait for tunnel URL to appear in logs
|
||||
echo "Waiting for tunnel URL..."
|
||||
for i in $(seq 1 30); do
|
||||
TUNNEL_URL=$(grep -oP 'https://[a-z0-9-]+\.trycloudflare\.com' /tmp/cloudflared.log | head -1 || true)
|
||||
if [ -n "$TUNNEL_URL" ]; then
|
||||
echo "Tunnel URL: $TUNNEL_URL"
|
||||
echo "TUNNEL_URL=$TUNNEL_URL" >> $GITHUB_ENV
|
||||
exit 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
echo "Failed to get tunnel URL"
|
||||
cat /tmp/cloudflared.log
|
||||
exit 1
|
||||
|
||||
- name: Verify tunnel connectivity
|
||||
run: |
|
||||
echo "Testing tunnel at ${{ env.TUNNEL_URL }}..."
|
||||
curl -sf "${{ env.TUNNEL_URL }}/health/liveliness"
|
||||
echo "Tunnel is working"
|
||||
|
||||
- name: Trigger observatory test run
|
||||
run: |
|
||||
OBSERVATORY_URL="${{ secrets.OBSERVATORY_URL }}"
|
||||
RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "${OBSERVATORY_URL}/run-test" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer ${{ secrets.OBSERVATORY_API_KEY }}" \
|
||||
-d '{
|
||||
"deployment_url": "${{ env.TUNNEL_URL }}",
|
||||
"api_key": "${{ env.LITELLM_MASTER_KEY }}",
|
||||
"test_suite": "TestOAIAzureRelease",
|
||||
"models": ["gpt-4o-mini", "gpt-4o"]
|
||||
}')
|
||||
HTTP_CODE=$(echo "$RESPONSE" | tail -1)
|
||||
BODY=$(echo "$RESPONSE" | head -n -1)
|
||||
echo "Response ($HTTP_CODE): $BODY"
|
||||
if [ "$HTTP_CODE" -ge 400 ]; then
|
||||
echo "Failed to trigger test run"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Poll for test completion
|
||||
run: |
|
||||
OBSERVATORY_URL="${{ secrets.OBSERVATORY_URL }}"
|
||||
TIMEOUT=900 # 15 minutes
|
||||
INTERVAL=30
|
||||
ELAPSED=0
|
||||
while [ $ELAPSED -lt $TIMEOUT ]; do
|
||||
STATUS=$(curl -s "${OBSERVATORY_URL}/queue-status" \
|
||||
-H "Authorization: Bearer ${{ secrets.OBSERVATORY_API_KEY }}")
|
||||
echo "Queue status (${ELAPSED}s elapsed): $STATUS"
|
||||
|
||||
PENDING=$(echo "$STATUS" | jq -r '.pending // 0')
|
||||
ACTIVE=$(echo "$STATUS" | jq -r '.active // 0')
|
||||
|
||||
if [ "$PENDING" = "0" ] && [ "$ACTIVE" = "0" ]; then
|
||||
echo "All tests completed"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
sleep $INTERVAL
|
||||
ELAPSED=$((ELAPSED + INTERVAL))
|
||||
done
|
||||
echo "Timed out waiting for tests to complete after ${TIMEOUT}s"
|
||||
exit 1
|
||||
|
||||
- name: Print LiteLLM logs on failure
|
||||
if: failure()
|
||||
run: |
|
||||
docker logs litellm-rc
|
||||
cat /tmp/cloudflared.log 2>/dev/null || true
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@ -89,6 +89,7 @@ tests/test_custom_dir/*
|
||||
test.py
|
||||
|
||||
litellm_config.yaml
|
||||
!.github/observatory/litellm_config.yaml
|
||||
.cursor
|
||||
.vscode/launch.json
|
||||
litellm/proxy/to_delete_loadtest_work/*
|
||||
|
||||
Loading…
Reference in New Issue
Block a user