diff --git a/CHANGELOG.md b/CHANGELOG.md index d530dfa..fedaa0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ `.toLowerCase()` call made indexed paths unreachable on case-sensitive filesystems (Linux). `qmd update` automatically migrates legacy lowercase paths without re-embedding. +- CLI: make `qmd status` skip native `node-llama-cpp` device probing by + default so status stays safe on machines with broken or unsupported GPU + drivers. Set `QMD_STATUS_DEVICE_PROBE=1` to opt in. ## [2.1.0] - 2026-04-05 diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts index 50ae764..bfcd392 100755 --- a/src/cli/qmd.ts +++ b/src/cli/qmd.ts @@ -468,35 +468,40 @@ async function showStatus(): Promise { } // Device / GPU info - console.log(`\n${c.bold}Device${c.reset}`); - try { - const llm = getDefaultLlamaCpp(); - const device = await llm.getDeviceInfo({ allowBuild: false }); - if (device.gpu) { - console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`); - if (device.gpuDevices.length > 0) { - // Deduplicate and count GPUs - const counts = new Map(); - for (const name of device.gpuDevices) { - counts.set(name, (counts.get(name) || 0) + 1); + // Important: probing node-llama-cpp can abort the whole process on machines with + // incompatible GPU drivers (for example Vulkan loader present but no usable driver). + // Keep `qmd status` safe by default and make the expensive/native probe opt-in. + if (process.env.QMD_STATUS_DEVICE_PROBE === "1") { + console.log(`\n${c.bold}Device${c.reset}`); + try { + const llm = getDefaultLlamaCpp(); + const device = await llm.getDeviceInfo({ allowBuild: false }); + if (device.gpu) { + console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`); + if (device.gpuDevices.length > 0) { + // Deduplicate and count GPUs + const counts = new Map(); + for (const name of device.gpuDevices) { + counts.set(name, (counts.get(name) || 0) + 1); + } + const deviceStr = Array.from(counts.entries()) + .map(([name, count]) => count > 1 ? `${count}× ${name}` : name) + .join(', '); + console.log(` Devices: ${deviceStr}`); } - const deviceStr = Array.from(counts.entries()) - .map(([name, count]) => count > 1 ? `${count}× ${name}` : name) - .join(', '); - console.log(` Devices: ${deviceStr}`); + if (device.vram) { + console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`); + } + } else { + console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`); + console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`); } - if (device.vram) { - console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`); + console.log(` CPU: ${device.cpuCores} math cores`); + } catch (error) { + console.log(` Status: ${c.dim}probe failed${c.reset}`); + if (error instanceof Error && error.message) { + console.log(` ${c.dim}${error.message}${c.reset}`); } - } else { - console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`); - console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`); - } - console.log(` CPU: ${device.cpuCores} math cores`); - } catch (error) { - console.log(` Status: ${c.dim}skipped${c.reset} (status probe does not build llama.cpp backends)`); - if (error instanceof Error && error.message) { - console.log(` ${c.dim}${error.message}${c.reset}`); } } diff --git a/test/cli.test.ts b/test/cli.test.ts index 6b953f3..2e49deb 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -381,6 +381,12 @@ describe("CLI Status Command", () => { // Should show collection info expect(stdout).toContain("Collection"); }); + + test("skips device probing by default", async () => { + const { stdout, exitCode } = await runQmd(["status"]); + expect(exitCode).toBe(0); + expect(stdout).not.toContain("Device"); + }); }); describe("CLI Search Command", () => {