diff --git a/CHANGELOG.md b/CHANGELOG.md index fedaa0f..fbfcde6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,9 @@ - CLI: make `qmd status` skip native `node-llama-cpp` device probing by default so status stays safe on machines with broken or unsupported GPU drivers. Set `QMD_STATUS_DEVICE_PROBE=1` to opt in. +- CLI: lazy-load `node-llama-cpp` so lightweight commands such as + `qmd status` do not import native ML dependencies or trigger llama.cpp + builds on ARM/no-GPU machines. #491 ## [2.1.0] - 2026-04-05 diff --git a/src/llm.ts b/src/llm.ts index 7cccc3f..7d2bbe0 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -4,16 +4,28 @@ * Provides embeddings, text generation, and reranking using local GGUF models. */ -import { - getLlama, - resolveModelFile, - LlamaChatSession, - LlamaLogLevel, - type Llama, - type LlamaModel, - type LlamaEmbeddingContext, - type Token as LlamaToken, +import type { + Llama, + LlamaModel, + LlamaEmbeddingContext, + Token as LlamaToken, } from "node-llama-cpp"; + +type NodeLlamaCppModule = { + getLlama: (options: Record) => Promise; + resolveModelFile: (model: string, cacheDir: string) => Promise; + LlamaChatSession: new (options: { contextSequence: unknown }) => { + prompt: (prompt: string, options?: Record) => Promise; + }; + LlamaLogLevel: { error: unknown }; +}; + +let nodeLlamaCppImport: Promise | null = null; +async function loadNodeLlamaCpp(): Promise { + nodeLlamaCppImport ??= import("node-llama-cpp") as Promise; + return nodeLlamaCppImport; +} + import { homedir } from "os"; import { join } from "path"; import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync, writeFileSync, openSync, readSync, closeSync } from "fs"; @@ -344,6 +356,7 @@ export async function pullModels( } } + const { resolveModelFile } = await loadNodeLlamaCpp(); const path = await resolveModelFile(model, cacheDir); validateGgufFile(path, model); const sizeBytes = existsSync(path) ? statSync(path).size : 0; @@ -619,6 +632,7 @@ export class LlamaCpp implements LLM { if (!this.llama) { const gpuMode = resolveLlamaGpuMode(); + const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp(); const loadLlama = async (gpu: LlamaGpuMode) => await getLlama({ build: allowBuild ? "autoAttempt" : "never", @@ -661,6 +675,7 @@ export class LlamaCpp implements LLM { private async resolveModel(modelUri: string): Promise { this.ensureModelCacheDir(); // resolveModelFile handles HF URIs and downloads to the cache dir + const { resolveModelFile } = await loadNodeLlamaCpp(); const modelPath = await resolveModelFile(modelUri, this.modelCacheDir); validateGgufFile(modelPath, modelUri); return modelPath; @@ -1079,6 +1094,7 @@ export class LlamaCpp implements LLM { // Create fresh context -> sequence -> session for each call const context = await this.generateModel!.createContext(); const sequence = context.getSequence(); + const { LlamaChatSession } = await loadNodeLlamaCpp(); const session = new LlamaChatSession({ contextSequence: sequence }); const maxTokens = options.maxTokens ?? 150; @@ -1158,6 +1174,7 @@ export class LlamaCpp implements LLM { contextSize: this.expandContextSize, }); const sequence = genContext.getSequence(); + const { LlamaChatSession } = await loadNodeLlamaCpp(); const session = new LlamaChatSession({ contextSequence: sequence }); try { diff --git a/test/cli-lazy-llm-import.test.ts b/test/cli-lazy-llm-import.test.ts new file mode 100644 index 0000000..5df3a09 --- /dev/null +++ b/test/cli-lazy-llm-import.test.ts @@ -0,0 +1,20 @@ +import { describe, expect, test } from "vitest"; +import { readFileSync } from "fs"; +import { join } from "path"; + +describe("LLM module loading", () => { + test("node-llama-cpp is only dynamically imported by LLM operations", () => { + const source = readFileSync(join(process.cwd(), "src", "llm.ts"), "utf-8"); + + expect(source).not.toMatch(/import\s+(?!type\b)[\s\S]*?from\s+["']node-llama-cpp["']/); + expect(source).toContain('import("node-llama-cpp")'); + }); + + test("importing the CLI for lightweight commands succeeds", async () => { + const mod = await import("../src/cli/qmd.ts"); + expect(mod).toMatchObject({ + buildEditorUri: expect.any(Function), + termLink: expect.any(Function), + }); + }); +});