fix(status): avoid build attempts during device probe
This commit is contained in:
parent
c2f3a40372
commit
26e3d0c077
@ -461,10 +461,10 @@ async function showStatus(): Promise<void> {
|
||||
}
|
||||
|
||||
// Device / GPU info
|
||||
console.log(`\n${c.bold}Device${c.reset}`);
|
||||
try {
|
||||
const llm = getDefaultLlamaCpp();
|
||||
const device = await llm.getDeviceInfo();
|
||||
console.log(`\n${c.bold}Device${c.reset}`);
|
||||
const device = await llm.getDeviceInfo({ allowBuild: false });
|
||||
if (device.gpu) {
|
||||
console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
|
||||
if (device.gpuDevices.length > 0) {
|
||||
@ -486,8 +486,11 @@ async function showStatus(): Promise<void> {
|
||||
console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
|
||||
}
|
||||
console.log(` CPU: ${device.cpuCores} math cores`);
|
||||
} catch {
|
||||
// Don't fail status if LLM init fails
|
||||
} catch (error) {
|
||||
console.log(` Status: ${c.dim}skipped${c.reset} (status probe does not build llama.cpp backends)`);
|
||||
if (error instanceof Error && error.message) {
|
||||
console.log(` ${c.dim}${error.message}${c.reset}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Tips section
|
||||
|
||||
@ -550,7 +550,7 @@ export class LlamaCpp implements LLM {
|
||||
/**
|
||||
* Initialize the llama instance (lazy)
|
||||
*/
|
||||
private async ensureLlama(): Promise<Llama> {
|
||||
private async ensureLlama(allowBuild = true): Promise<Llama> {
|
||||
if (!this.llama) {
|
||||
// Allow override via QMD_LLAMA_GPU: "false" | "off" | "none" forces CPU
|
||||
const gpuOverride = (process.env.QMD_LLAMA_GPU ?? "").toLowerCase();
|
||||
@ -558,9 +558,10 @@ export class LlamaCpp implements LLM {
|
||||
|
||||
const loadLlama = async (gpu: "auto" | false) =>
|
||||
await getLlama({
|
||||
build: "autoAttempt",
|
||||
build: allowBuild ? "autoAttempt" : "never",
|
||||
logLevel: LlamaLogLevel.error,
|
||||
gpu,
|
||||
skipDownload: !allowBuild,
|
||||
});
|
||||
|
||||
let llama: Llama;
|
||||
@ -1244,14 +1245,14 @@ export class LlamaCpp implements LLM {
|
||||
* Get device/GPU info for status display.
|
||||
* Initializes llama if not already done.
|
||||
*/
|
||||
async getDeviceInfo(): Promise<{
|
||||
async getDeviceInfo(options: { allowBuild?: boolean } = {}): Promise<{
|
||||
gpu: string | false;
|
||||
gpuOffloading: boolean;
|
||||
gpuDevices: string[];
|
||||
vram?: { total: number; used: number; free: number };
|
||||
cpuCores: number;
|
||||
}> {
|
||||
const llama = await this.ensureLlama();
|
||||
const llama = await this.ensureLlama(options.allowBuild ?? true);
|
||||
const gpuDevices = await llama.getGpuDeviceNames();
|
||||
let vram: { total: number; used: number; free: number } | undefined;
|
||||
if (llama.gpu) {
|
||||
|
||||
@ -193,6 +193,32 @@ describe("LlamaCpp rerank deduping", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("LlamaCpp.getDeviceInfo", () => {
|
||||
test("can skip build attempts for status probes", async () => {
|
||||
const llm = new LlamaCpp({}) as any;
|
||||
const fakeLlama = {
|
||||
gpu: "metal",
|
||||
supportsGpuOffloading: true,
|
||||
cpuMathCores: 8,
|
||||
getGpuDeviceNames: vi.fn().mockResolvedValue(["Apple GPU"]),
|
||||
getVramState: vi.fn().mockResolvedValue({ total: 1024, used: 256, free: 768 }),
|
||||
};
|
||||
|
||||
llm.ensureLlama = vi.fn().mockResolvedValue(fakeLlama);
|
||||
|
||||
const device = await llm.getDeviceInfo({ allowBuild: false });
|
||||
|
||||
expect(llm.ensureLlama).toHaveBeenCalledWith(false);
|
||||
expect(device).toEqual({
|
||||
gpu: "metal",
|
||||
gpuOffloading: true,
|
||||
gpuDevices: ["Apple GPU"],
|
||||
vram: { total: 1024, used: 256, free: 768 },
|
||||
cpuCores: 8,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// =============================================================================
|
||||
// Integration Tests (require actual models)
|
||||
// =============================================================================
|
||||
|
||||
Loading…
Reference in New Issue
Block a user