litellm/tests/pass_through_tests/test_vertex.test.js

const { VertexAI, RequestOptions } = require('@google-cloud/vertexai');
const fs = require('fs');
const path = require('path');
const os = require('os');
const { writeFileSync } = require('fs');


// Import fetch if the SDK uses it
const originalFetch = global.fetch || require('node-fetch');

const { runVertexRequestOrSkip } = require('./vertex_test_helpers');

// Monkey-patch the fetch used internally
global.fetch = async function patchedFetch(url, options) {
    // Modify the URL to use HTTP instead of HTTPS
    if (url.startsWith('https://localhost:4000')) {
        url = url.replace('https://', 'http://');
    }
    console.log('Patched fetch sending request to:', url);
    return originalFetch(url, options);
};

function loadVertexAiCredentials() {
    console.log("loading vertex ai credentials");
    const filepath = path.dirname(__filename);
    const vertexKeyPath = path.join(filepath, "vertex_key.json");

    // Initialize default empty service account data
    let serviceAccountKeyData = {};

    // Try to read existing vertex_key.json
    try {
        const content = fs.readFileSync(vertexKeyPath, 'utf8');
        if (content && content.trim()) {
            serviceAccountKeyData = JSON.parse(content);
        }
    } catch (error) {
        // File doesn't exist or is invalid, continue with empty object
    }

    // Update with environment variables
    const privateKeyId = process.env.VERTEX_AI_PRIVATE_KEY_ID || "";
    const privateKey = (process.env.VERTEX_AI_PRIVATE_KEY || "").replace(/\\n/g, "\n");

    serviceAccountKeyData.private_key_id = privateKeyId;
    serviceAccountKeyData.private_key = privateKey;

    // Create temporary file
    const tempFilePath = path.join(os.tmpdir(), `vertex-credentials-${Date.now()}.json`);
    writeFileSync(tempFilePath, JSON.stringify(serviceAccountKeyData, null, 2));

    // Set environment variable
    process.env.GOOGLE_APPLICATION_CREDENTIALS = tempFilePath;
}

// Run credential loading before tests
beforeAll(() => {
    loadVertexAiCredentials();
});

// Configure Jest to retry flaky tests up to 3 times (useful for 429 rate limiting)
jest.retryTimes(3);

// Non-streaming Vertex generateContent can exceed 5s in CI / under load
const VERTEX_TEST_TIMEOUT_MS = 30000;

describe('Vertex AI Tests', () => {
    test(
        'should successfully generate content from Vertex AI',
        async () => {
            const vertexAI = new VertexAI({
                project: 'litellm-ci-cd',
                location: 'global',
                apiEndpoint: "localhost:4000/vertex-ai"
            });

            const customHeaders = new Headers({
                "x-litellm-api-key": "sk-1234"
            });

            const requestOptions = {
                customHeaders: customHeaders
            };

            const generativeModel = vertexAI.getGenerativeModel(
                { model: 'gemini-3.1-flash-lite' },
                requestOptions
            );

            const request = {
                contents: [{role: 'user', parts: [{text: 'How are you doing today tell me your name?'}]}],
            };

            const streamingResult = await runVertexRequestOrSkip(() =>
                generativeModel.generateContentStream(request)
            );
            if (streamingResult === null) {
                return;
            }

            // Add some assertions
            expect(streamingResult).toBeDefined();

            for await (const item of streamingResult.stream) {
                console.log('stream chunk:', JSON.stringify(item));
                expect(item).toBeDefined();
            }

            const aggregatedResponse = await streamingResult.response;
            console.log('aggregated response:', JSON.stringify(aggregatedResponse));
            expect(aggregatedResponse).toBeDefined();
        },
        VERTEX_TEST_TIMEOUT_MS
    );

    test(
        'should successfully generate non-streaming content from Vertex AI',
        async () => {
            const vertexAI = new VertexAI({
                project: 'litellm-ci-cd',
                location: 'global',
                apiEndpoint: "localhost:4000/vertex-ai"
            });
            const customHeaders = new Headers({"x-litellm-api-key": "sk-1234"});
            const requestOptions = {customHeaders: customHeaders};
            const generativeModel = vertexAI.getGenerativeModel(
                {model: 'gemini-3.1-flash-lite'},
                requestOptions
            );
            const request = {contents: [{role: 'user', parts: [{text: 'What is 2+2?'}]}]};

            const result = await runVertexRequestOrSkip(() =>
                generativeModel.generateContent(request)
            );
            if (result === null) {
                return;
            }
            expect(result).toBeDefined();
            expect(result.response).toBeDefined();
            console.log('non-streaming response:', JSON.stringify(result.response));
        },
        VERTEX_TEST_TIMEOUT_MS
    );
});