From 50ce17bbfa643cb9c806e6bc6df3706444480af4 Mon Sep 17 00:00:00 2001 From: JohnRichardEnders Date: Fri, 3 Apr 2026 17:10:47 +0200 Subject: [PATCH] feat(llm): resolve models as config > env > default Separate hardcoded default from env var in DEFAULT_EMBED_MODEL so the constructor can resolve: config param > env var > hardcoded default. Also add env var support for QMD_GENERATE_MODEL and QMD_RERANK_MODEL. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/llm.ts | 11 +++++------ test/llm.test.ts | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/src/llm.ts b/src/llm.ts index dde9548..b8f5334 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -193,7 +193,7 @@ export type RerankDocument = { // HuggingFace model URIs for node-llama-cpp // Format: hf:// // Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf) -const DEFAULT_EMBED_MODEL = process.env.QMD_EMBED_MODEL ?? "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf"; +const DEFAULT_EMBED_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf"; const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf"; // const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf"; const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf"; @@ -436,9 +436,9 @@ export class LlamaCpp implements LLM { constructor(config: LlamaCppConfig = {}) { - this.embedModelUri = config.embedModel || DEFAULT_EMBED_MODEL; - this.generateModelUri = config.generateModel || DEFAULT_GENERATE_MODEL; - this.rerankModelUri = config.rerankModel || DEFAULT_RERANK_MODEL; + this.embedModelUri = config.embedModel || process.env.QMD_EMBED_MODEL || DEFAULT_EMBED_MODEL; + this.generateModelUri = config.generateModel || process.env.QMD_GENERATE_MODEL || DEFAULT_GENERATE_MODEL; + this.rerankModelUri = config.rerankModel || process.env.QMD_RERANK_MODEL || DEFAULT_RERANK_MODEL; this.modelCacheDir = config.modelCacheDir || MODEL_CACHE_DIR; this.expandContextSize = resolveExpandContextSize(config.expandContextSize); this.inactivityTimeoutMs = config.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS; @@ -1559,8 +1559,7 @@ let defaultLlamaCpp: LlamaCpp | null = null; */ export function getDefaultLlamaCpp(): LlamaCpp { if (!defaultLlamaCpp) { - const embedModel = process.env.QMD_EMBED_MODEL; - defaultLlamaCpp = new LlamaCpp(embedModel ? { embedModel } : {}); + defaultLlamaCpp = new LlamaCpp(); } return defaultLlamaCpp; } diff --git a/test/llm.test.ts b/test/llm.test.ts index b5de9e0..d336036 100644 --- a/test/llm.test.ts +++ b/test/llm.test.ts @@ -117,6 +117,50 @@ describe("LlamaCpp expand context size config", () => { }); }); +describe("LlamaCpp model resolution (config > env > default)", () => { + const HARDCODED_EMBED = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf"; + const HARDCODED_RERANK = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf"; + const HARDCODED_GENERATE = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf"; + + test("uses hardcoded default when no config or env is set", () => { + const prev = process.env.QMD_EMBED_MODEL; + delete process.env.QMD_EMBED_MODEL; + try { + const llm = new LlamaCpp({}) as any; + expect(llm.embedModelUri).toBe(HARDCODED_EMBED); + expect(llm.rerankModelUri).toBe(HARDCODED_RERANK); + expect(llm.generateModelUri).toBe(HARDCODED_GENERATE); + } finally { + if (prev === undefined) delete process.env.QMD_EMBED_MODEL; + else process.env.QMD_EMBED_MODEL = prev; + } + }); + + test("env var overrides hardcoded default", () => { + const prev = process.env.QMD_EMBED_MODEL; + process.env.QMD_EMBED_MODEL = "hf:custom/embed-model.gguf"; + try { + const llm = new LlamaCpp({}) as any; + expect(llm.embedModelUri).toBe("hf:custom/embed-model.gguf"); + } finally { + if (prev === undefined) delete process.env.QMD_EMBED_MODEL; + else process.env.QMD_EMBED_MODEL = prev; + } + }); + + test("config overrides env var", () => { + const prev = process.env.QMD_EMBED_MODEL; + process.env.QMD_EMBED_MODEL = "hf:env/model.gguf"; + try { + const llm = new LlamaCpp({ embedModel: "hf:config/model.gguf" }) as any; + expect(llm.embedModelUri).toBe("hf:config/model.gguf"); + } finally { + if (prev === undefined) delete process.env.QMD_EMBED_MODEL; + else process.env.QMD_EMBED_MODEL = prev; + } + }); +}); + describe("LlamaCpp rerank deduping", () => { test("deduplicates identical document texts before scoring", async () => { const llm = new LlamaCpp({}) as any;