fix(status): avoid build attempts during device probe

This commit is contained in:
cocoon 2026-04-07 23:18:58 +08:00
parent c2f3a40372
commit 26e3d0c077
3 changed files with 38 additions and 8 deletions

View File

@ -461,10 +461,10 @@ async function showStatus(): Promise<void> {
} }
// Device / GPU info // Device / GPU info
console.log(`\n${c.bold}Device${c.reset}`);
try { try {
const llm = getDefaultLlamaCpp(); const llm = getDefaultLlamaCpp();
const device = await llm.getDeviceInfo(); const device = await llm.getDeviceInfo({ allowBuild: false });
console.log(`\n${c.bold}Device${c.reset}`);
if (device.gpu) { if (device.gpu) {
console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`); console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
if (device.gpuDevices.length > 0) { if (device.gpuDevices.length > 0) {
@ -486,8 +486,11 @@ async function showStatus(): Promise<void> {
console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`); console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
} }
console.log(` CPU: ${device.cpuCores} math cores`); console.log(` CPU: ${device.cpuCores} math cores`);
} catch { } catch (error) {
// Don't fail status if LLM init fails console.log(` Status: ${c.dim}skipped${c.reset} (status probe does not build llama.cpp backends)`);
if (error instanceof Error && error.message) {
console.log(` ${c.dim}${error.message}${c.reset}`);
}
} }
// Tips section // Tips section

View File

@ -550,7 +550,7 @@ export class LlamaCpp implements LLM {
/** /**
* Initialize the llama instance (lazy) * Initialize the llama instance (lazy)
*/ */
private async ensureLlama(): Promise<Llama> { private async ensureLlama(allowBuild = true): Promise<Llama> {
if (!this.llama) { if (!this.llama) {
// Allow override via QMD_LLAMA_GPU: "false" | "off" | "none" forces CPU // Allow override via QMD_LLAMA_GPU: "false" | "off" | "none" forces CPU
const gpuOverride = (process.env.QMD_LLAMA_GPU ?? "").toLowerCase(); const gpuOverride = (process.env.QMD_LLAMA_GPU ?? "").toLowerCase();
@ -558,9 +558,10 @@ export class LlamaCpp implements LLM {
const loadLlama = async (gpu: "auto" | false) => const loadLlama = async (gpu: "auto" | false) =>
await getLlama({ await getLlama({
build: "autoAttempt", build: allowBuild ? "autoAttempt" : "never",
logLevel: LlamaLogLevel.error, logLevel: LlamaLogLevel.error,
gpu, gpu,
skipDownload: !allowBuild,
}); });
let llama: Llama; let llama: Llama;
@ -1244,14 +1245,14 @@ export class LlamaCpp implements LLM {
* Get device/GPU info for status display. * Get device/GPU info for status display.
* Initializes llama if not already done. * Initializes llama if not already done.
*/ */
async getDeviceInfo(): Promise<{ async getDeviceInfo(options: { allowBuild?: boolean } = {}): Promise<{
gpu: string | false; gpu: string | false;
gpuOffloading: boolean; gpuOffloading: boolean;
gpuDevices: string[]; gpuDevices: string[];
vram?: { total: number; used: number; free: number }; vram?: { total: number; used: number; free: number };
cpuCores: number; cpuCores: number;
}> { }> {
const llama = await this.ensureLlama(); const llama = await this.ensureLlama(options.allowBuild ?? true);
const gpuDevices = await llama.getGpuDeviceNames(); const gpuDevices = await llama.getGpuDeviceNames();
let vram: { total: number; used: number; free: number } | undefined; let vram: { total: number; used: number; free: number } | undefined;
if (llama.gpu) { if (llama.gpu) {

View File

@ -193,6 +193,32 @@ describe("LlamaCpp rerank deduping", () => {
}); });
}); });
describe("LlamaCpp.getDeviceInfo", () => {
test("can skip build attempts for status probes", async () => {
const llm = new LlamaCpp({}) as any;
const fakeLlama = {
gpu: "metal",
supportsGpuOffloading: true,
cpuMathCores: 8,
getGpuDeviceNames: vi.fn().mockResolvedValue(["Apple GPU"]),
getVramState: vi.fn().mockResolvedValue({ total: 1024, used: 256, free: 768 }),
};
llm.ensureLlama = vi.fn().mockResolvedValue(fakeLlama);
const device = await llm.getDeviceInfo({ allowBuild: false });
expect(llm.ensureLlama).toHaveBeenCalledWith(false);
expect(device).toEqual({
gpu: "metal",
gpuOffloading: true,
gpuDevices: ["Apple GPU"],
vram: { total: 1024, used: 256, free: 768 },
cpuCores: 8,
});
});
});
// ============================================================================= // =============================================================================
// Integration Tests (require actual models) // Integration Tests (require actual models)
// ============================================================================= // =============================================================================