fix(cli): keep status from importing llama
This commit is contained in:
parent
d58fedf4b5
commit
3d991b2a47
@ -12,6 +12,9 @@
|
||||
- CLI: make `qmd status` skip native `node-llama-cpp` device probing by
|
||||
default so status stays safe on machines with broken or unsupported GPU
|
||||
drivers. Set `QMD_STATUS_DEVICE_PROBE=1` to opt in.
|
||||
- CLI: lazy-load `node-llama-cpp` so lightweight commands such as
|
||||
`qmd status` do not import native ML dependencies or trigger llama.cpp
|
||||
builds on ARM/no-GPU machines. #491
|
||||
|
||||
## [2.1.0] - 2026-04-05
|
||||
|
||||
|
||||
35
src/llm.ts
35
src/llm.ts
@ -4,16 +4,28 @@
|
||||
* Provides embeddings, text generation, and reranking using local GGUF models.
|
||||
*/
|
||||
|
||||
import {
|
||||
getLlama,
|
||||
resolveModelFile,
|
||||
LlamaChatSession,
|
||||
LlamaLogLevel,
|
||||
type Llama,
|
||||
type LlamaModel,
|
||||
type LlamaEmbeddingContext,
|
||||
type Token as LlamaToken,
|
||||
import type {
|
||||
Llama,
|
||||
LlamaModel,
|
||||
LlamaEmbeddingContext,
|
||||
Token as LlamaToken,
|
||||
} from "node-llama-cpp";
|
||||
|
||||
type NodeLlamaCppModule = {
|
||||
getLlama: (options: Record<string, unknown>) => Promise<Llama>;
|
||||
resolveModelFile: (model: string, cacheDir: string) => Promise<string>;
|
||||
LlamaChatSession: new (options: { contextSequence: unknown }) => {
|
||||
prompt: (prompt: string, options?: Record<string, unknown>) => Promise<string>;
|
||||
};
|
||||
LlamaLogLevel: { error: unknown };
|
||||
};
|
||||
|
||||
let nodeLlamaCppImport: Promise<NodeLlamaCppModule> | null = null;
|
||||
async function loadNodeLlamaCpp(): Promise<NodeLlamaCppModule> {
|
||||
nodeLlamaCppImport ??= import("node-llama-cpp") as Promise<NodeLlamaCppModule>;
|
||||
return nodeLlamaCppImport;
|
||||
}
|
||||
|
||||
import { homedir } from "os";
|
||||
import { join } from "path";
|
||||
import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync, writeFileSync, openSync, readSync, closeSync } from "fs";
|
||||
@ -344,6 +356,7 @@ export async function pullModels(
|
||||
}
|
||||
}
|
||||
|
||||
const { resolveModelFile } = await loadNodeLlamaCpp();
|
||||
const path = await resolveModelFile(model, cacheDir);
|
||||
validateGgufFile(path, model);
|
||||
const sizeBytes = existsSync(path) ? statSync(path).size : 0;
|
||||
@ -619,6 +632,7 @@ export class LlamaCpp implements LLM {
|
||||
if (!this.llama) {
|
||||
const gpuMode = resolveLlamaGpuMode();
|
||||
|
||||
const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
|
||||
const loadLlama = async (gpu: LlamaGpuMode) =>
|
||||
await getLlama({
|
||||
build: allowBuild ? "autoAttempt" : "never",
|
||||
@ -661,6 +675,7 @@ export class LlamaCpp implements LLM {
|
||||
private async resolveModel(modelUri: string): Promise<string> {
|
||||
this.ensureModelCacheDir();
|
||||
// resolveModelFile handles HF URIs and downloads to the cache dir
|
||||
const { resolveModelFile } = await loadNodeLlamaCpp();
|
||||
const modelPath = await resolveModelFile(modelUri, this.modelCacheDir);
|
||||
validateGgufFile(modelPath, modelUri);
|
||||
return modelPath;
|
||||
@ -1079,6 +1094,7 @@ export class LlamaCpp implements LLM {
|
||||
// Create fresh context -> sequence -> session for each call
|
||||
const context = await this.generateModel!.createContext();
|
||||
const sequence = context.getSequence();
|
||||
const { LlamaChatSession } = await loadNodeLlamaCpp();
|
||||
const session = new LlamaChatSession({ contextSequence: sequence });
|
||||
|
||||
const maxTokens = options.maxTokens ?? 150;
|
||||
@ -1158,6 +1174,7 @@ export class LlamaCpp implements LLM {
|
||||
contextSize: this.expandContextSize,
|
||||
});
|
||||
const sequence = genContext.getSequence();
|
||||
const { LlamaChatSession } = await loadNodeLlamaCpp();
|
||||
const session = new LlamaChatSession({ contextSequence: sequence });
|
||||
|
||||
try {
|
||||
|
||||
20
test/cli-lazy-llm-import.test.ts
Normal file
20
test/cli-lazy-llm-import.test.ts
Normal file
@ -0,0 +1,20 @@
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { readFileSync } from "fs";
|
||||
import { join } from "path";
|
||||
|
||||
describe("LLM module loading", () => {
|
||||
test("node-llama-cpp is only dynamically imported by LLM operations", () => {
|
||||
const source = readFileSync(join(process.cwd(), "src", "llm.ts"), "utf-8");
|
||||
|
||||
expect(source).not.toMatch(/import\s+(?!type\b)[\s\S]*?from\s+["']node-llama-cpp["']/);
|
||||
expect(source).toContain('import("node-llama-cpp")');
|
||||
});
|
||||
|
||||
test("importing the CLI for lightweight commands succeeds", async () => {
|
||||
const mod = await import("../src/cli/qmd.ts");
|
||||
expect(mod).toMatchObject({
|
||||
buildEditorUri: expect.any(Function),
|
||||
termLink: expect.any(Function),
|
||||
});
|
||||
});
|
||||
});
|
||||
Loading…
Reference in New Issue
Block a user