fix(cli): keep status from importing llama

This commit is contained in:
Tobi Lütke 2026-05-09 18:12:37 +00:00
parent d58fedf4b5
commit 3d991b2a47
No known key found for this signature in database
3 changed files with 49 additions and 9 deletions

View File

@ -12,6 +12,9 @@
- CLI: make `qmd status` skip native `node-llama-cpp` device probing by
default so status stays safe on machines with broken or unsupported GPU
drivers. Set `QMD_STATUS_DEVICE_PROBE=1` to opt in.
- CLI: lazy-load `node-llama-cpp` so lightweight commands such as
`qmd status` do not import native ML dependencies or trigger llama.cpp
builds on ARM/no-GPU machines. #491
## [2.1.0] - 2026-04-05

View File

@ -4,16 +4,28 @@
* Provides embeddings, text generation, and reranking using local GGUF models.
*/
import {
getLlama,
resolveModelFile,
LlamaChatSession,
LlamaLogLevel,
type Llama,
type LlamaModel,
type LlamaEmbeddingContext,
type Token as LlamaToken,
import type {
Llama,
LlamaModel,
LlamaEmbeddingContext,
Token as LlamaToken,
} from "node-llama-cpp";
type NodeLlamaCppModule = {
getLlama: (options: Record<string, unknown>) => Promise<Llama>;
resolveModelFile: (model: string, cacheDir: string) => Promise<string>;
LlamaChatSession: new (options: { contextSequence: unknown }) => {
prompt: (prompt: string, options?: Record<string, unknown>) => Promise<string>;
};
LlamaLogLevel: { error: unknown };
};
let nodeLlamaCppImport: Promise<NodeLlamaCppModule> | null = null;
async function loadNodeLlamaCpp(): Promise<NodeLlamaCppModule> {
nodeLlamaCppImport ??= import("node-llama-cpp") as Promise<NodeLlamaCppModule>;
return nodeLlamaCppImport;
}
import { homedir } from "os";
import { join } from "path";
import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync, writeFileSync, openSync, readSync, closeSync } from "fs";
@ -344,6 +356,7 @@ export async function pullModels(
}
}
const { resolveModelFile } = await loadNodeLlamaCpp();
const path = await resolveModelFile(model, cacheDir);
validateGgufFile(path, model);
const sizeBytes = existsSync(path) ? statSync(path).size : 0;
@ -619,6 +632,7 @@ export class LlamaCpp implements LLM {
if (!this.llama) {
const gpuMode = resolveLlamaGpuMode();
const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
const loadLlama = async (gpu: LlamaGpuMode) =>
await getLlama({
build: allowBuild ? "autoAttempt" : "never",
@ -661,6 +675,7 @@ export class LlamaCpp implements LLM {
private async resolveModel(modelUri: string): Promise<string> {
this.ensureModelCacheDir();
// resolveModelFile handles HF URIs and downloads to the cache dir
const { resolveModelFile } = await loadNodeLlamaCpp();
const modelPath = await resolveModelFile(modelUri, this.modelCacheDir);
validateGgufFile(modelPath, modelUri);
return modelPath;
@ -1079,6 +1094,7 @@ export class LlamaCpp implements LLM {
// Create fresh context -> sequence -> session for each call
const context = await this.generateModel!.createContext();
const sequence = context.getSequence();
const { LlamaChatSession } = await loadNodeLlamaCpp();
const session = new LlamaChatSession({ contextSequence: sequence });
const maxTokens = options.maxTokens ?? 150;
@ -1158,6 +1174,7 @@ export class LlamaCpp implements LLM {
contextSize: this.expandContextSize,
});
const sequence = genContext.getSequence();
const { LlamaChatSession } = await loadNodeLlamaCpp();
const session = new LlamaChatSession({ contextSequence: sequence });
try {

View File

@ -0,0 +1,20 @@
import { describe, expect, test } from "vitest";
import { readFileSync } from "fs";
import { join } from "path";
describe("LLM module loading", () => {
test("node-llama-cpp is only dynamically imported by LLM operations", () => {
const source = readFileSync(join(process.cwd(), "src", "llm.ts"), "utf-8");
expect(source).not.toMatch(/import\s+(?!type\b)[\s\S]*?from\s+["']node-llama-cpp["']/);
expect(source).toContain('import("node-llama-cpp")');
});
test("importing the CLI for lightweight commands succeeds", async () => {
const mod = await import("../src/cli/qmd.ts");
expect(mod).toMatchObject({
buildEditorUri: expect.any(Function),
termLink: expect.any(Function),
});
});
});