From 3d991b2a476992f89639f72e37c926371239a8c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 18:12:37 +0000 Subject: [PATCH 01/17] fix(cli): keep status from importing llama --- CHANGELOG.md | 3 +++ src/llm.ts | 35 ++++++++++++++++++++++++-------- test/cli-lazy-llm-import.test.ts | 20 ++++++++++++++++++ 3 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 test/cli-lazy-llm-import.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index fedaa0f..fbfcde6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,9 @@ - CLI: make `qmd status` skip native `node-llama-cpp` device probing by default so status stays safe on machines with broken or unsupported GPU drivers. Set `QMD_STATUS_DEVICE_PROBE=1` to opt in. +- CLI: lazy-load `node-llama-cpp` so lightweight commands such as + `qmd status` do not import native ML dependencies or trigger llama.cpp + builds on ARM/no-GPU machines. #491 ## [2.1.0] - 2026-04-05 diff --git a/src/llm.ts b/src/llm.ts index 7cccc3f..7d2bbe0 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -4,16 +4,28 @@ * Provides embeddings, text generation, and reranking using local GGUF models. */ -import { - getLlama, - resolveModelFile, - LlamaChatSession, - LlamaLogLevel, - type Llama, - type LlamaModel, - type LlamaEmbeddingContext, - type Token as LlamaToken, +import type { + Llama, + LlamaModel, + LlamaEmbeddingContext, + Token as LlamaToken, } from "node-llama-cpp"; + +type NodeLlamaCppModule = { + getLlama: (options: Record) => Promise; + resolveModelFile: (model: string, cacheDir: string) => Promise; + LlamaChatSession: new (options: { contextSequence: unknown }) => { + prompt: (prompt: string, options?: Record) => Promise; + }; + LlamaLogLevel: { error: unknown }; +}; + +let nodeLlamaCppImport: Promise | null = null; +async function loadNodeLlamaCpp(): Promise { + nodeLlamaCppImport ??= import("node-llama-cpp") as Promise; + return nodeLlamaCppImport; +} + import { homedir } from "os"; import { join } from "path"; import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync, writeFileSync, openSync, readSync, closeSync } from "fs"; @@ -344,6 +356,7 @@ export async function pullModels( } } + const { resolveModelFile } = await loadNodeLlamaCpp(); const path = await resolveModelFile(model, cacheDir); validateGgufFile(path, model); const sizeBytes = existsSync(path) ? statSync(path).size : 0; @@ -619,6 +632,7 @@ export class LlamaCpp implements LLM { if (!this.llama) { const gpuMode = resolveLlamaGpuMode(); + const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp(); const loadLlama = async (gpu: LlamaGpuMode) => await getLlama({ build: allowBuild ? "autoAttempt" : "never", @@ -661,6 +675,7 @@ export class LlamaCpp implements LLM { private async resolveModel(modelUri: string): Promise { this.ensureModelCacheDir(); // resolveModelFile handles HF URIs and downloads to the cache dir + const { resolveModelFile } = await loadNodeLlamaCpp(); const modelPath = await resolveModelFile(modelUri, this.modelCacheDir); validateGgufFile(modelPath, modelUri); return modelPath; @@ -1079,6 +1094,7 @@ export class LlamaCpp implements LLM { // Create fresh context -> sequence -> session for each call const context = await this.generateModel!.createContext(); const sequence = context.getSequence(); + const { LlamaChatSession } = await loadNodeLlamaCpp(); const session = new LlamaChatSession({ contextSequence: sequence }); const maxTokens = options.maxTokens ?? 150; @@ -1158,6 +1174,7 @@ export class LlamaCpp implements LLM { contextSize: this.expandContextSize, }); const sequence = genContext.getSequence(); + const { LlamaChatSession } = await loadNodeLlamaCpp(); const session = new LlamaChatSession({ contextSequence: sequence }); try { diff --git a/test/cli-lazy-llm-import.test.ts b/test/cli-lazy-llm-import.test.ts new file mode 100644 index 0000000..5df3a09 --- /dev/null +++ b/test/cli-lazy-llm-import.test.ts @@ -0,0 +1,20 @@ +import { describe, expect, test } from "vitest"; +import { readFileSync } from "fs"; +import { join } from "path"; + +describe("LLM module loading", () => { + test("node-llama-cpp is only dynamically imported by LLM operations", () => { + const source = readFileSync(join(process.cwd(), "src", "llm.ts"), "utf-8"); + + expect(source).not.toMatch(/import\s+(?!type\b)[\s\S]*?from\s+["']node-llama-cpp["']/); + expect(source).toContain('import("node-llama-cpp")'); + }); + + test("importing the CLI for lightweight commands succeeds", async () => { + const mod = await import("../src/cli/qmd.ts"); + expect(mod).toMatchObject({ + buildEditorUri: expect.any(Function), + termLink: expect.any(Function), + }); + }); +}); From d045a8bab6f1e484a3d0b37ec8d88d9442085fb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 18:12:37 +0000 Subject: [PATCH 02/17] fix(search): support CJK FTS queries --- src/store.ts | 131 ++++++++++++++++++++++++++++++++++++++++----- test/store.test.ts | 77 ++++++++++++++++++++++---- 2 files changed, 184 insertions(+), 24 deletions(-) diff --git a/src/store.ts b/src/store.ts index 1f296f7..d6d5cd7 100644 --- a/src/store.ts +++ b/src/store.ts @@ -733,6 +733,73 @@ export function verifySqliteVecLoaded(db: Database): void { let _sqliteVecAvailable: boolean | null = null; +const CJK_CHAR_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u; +const CJK_RUN_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]+/gu; +const FTS_CJK_NORMALIZED_VERSION = "1"; + +/** + * FTS5's unicode61 tokenizer does not segment CJK text into searchable words. + * Normalize CJK runs by spacing every character so exact CJK queries can be + * translated into phrase queries while Latin text keeps the default tokenizer. + */ +export function normalizeCjkForFTS(text: string): string { + return text.replace(CJK_RUN_PATTERN, run => ` ${Array.from(run).join(' ')} `); +} + +function containsCjk(text: string): boolean { + return CJK_CHAR_PATTERN.test(text); +} + +function sanitizeFTS5Phrase(phrase: string): string { + return normalizeCjkForFTS(phrase) + .split(/\s+/) + .map(t => sanitizeFTS5Term(t)) + .filter(t => t) + .join(' '); +} + +function rebuildFTSForCjkNormalization(db: Database): void { + const version = db.prepare(`SELECT value FROM store_config WHERE key = 'fts_cjk_normalized_version'`).get() as { value?: string } | undefined; + if (version?.value === FTS_CJK_NORMALIZED_VERSION) return; + + try { + db.exec(`DELETE FROM documents_fts WHERE rowid >= 0`); + } catch { + // Some older/corrupt FTS5 shadow-table states can reject bulk deletes even + // though reads still work. Recreate the virtual table; documents_fts is a + // derived index, so rebuilding it from documents/content is safe. + db.exec(`DROP TABLE IF EXISTS documents_fts`); + db.exec(` + CREATE VIRTUAL TABLE documents_fts USING fts5( + filepath, title, body, + tokenize='porter unicode61' + ) + `); + } + const rows = db.prepare(` + SELECT d.id, d.collection, d.path, d.title, content.doc as body + FROM documents d + JOIN content ON content.hash = d.hash + WHERE d.active = 1 + `).all() as { id: number; collection: string; path: string; title: string; body: string }[]; + const insert = db.prepare(`INSERT INTO documents_fts(rowid, filepath, title, body) VALUES (?, ?, ?, ?)`); + const rebuild = db.transaction(() => { + for (const row of rows) { + insert.run( + row.id, + normalizeCjkForFTS(`${row.collection}/${row.path}`), + normalizeCjkForFTS(row.title), + normalizeCjkForFTS(row.body) + ); + } + }); + rebuild(); + db.prepare(` + INSERT OR REPLACE INTO store_config(key, value) + VALUES ('fts_cjk_normalized_version', ?) + `).run(FTS_CJK_NORMALIZED_VERSION); +} + function initializeDatabase(db: Database): void { try { loadSqliteVec(db); @@ -838,9 +905,12 @@ function initializeDatabase(db: Database): void { ) `); - // Triggers to keep FTS in sync + // Triggers keep FTS in sync for callers that write directly to documents. + // Production indexing paths rebuild entries in TypeScript so CJK text can be + // normalized before it reaches the unicode61 tokenizer. + db.exec(`DROP TRIGGER IF EXISTS documents_ai`); db.exec(` - CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents + CREATE TRIGGER documents_ai AFTER INSERT ON documents WHEN new.active = 1 BEGIN INSERT INTO documents_fts(rowid, filepath, title, body) @@ -853,14 +923,16 @@ function initializeDatabase(db: Database): void { END `); + db.exec(`DROP TRIGGER IF EXISTS documents_ad`); db.exec(` - CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN + CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN DELETE FROM documents_fts WHERE rowid = old.id; END `); + db.exec(`DROP TRIGGER IF EXISTS documents_au`); db.exec(` - CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents + CREATE TRIGGER documents_au AFTER UPDATE ON documents BEGIN -- Delete from FTS if no longer active DELETE FROM documents_fts WHERE rowid = old.id AND new.active = 0; @@ -875,6 +947,8 @@ function initializeDatabase(db: Database): void { WHERE new.active = 1; END `); + + rebuildFTSForCjkNormalization(db); } // ============================================================================= @@ -2077,6 +2151,28 @@ export function insertContent(db: Database, hash: string, content: string, creat .run(hash, content, createdAt); } +function rebuildDocumentFTS(db: Database, documentId: number): void { + const row = db.prepare(` + SELECT d.id, d.collection, d.path, d.title, content.doc as body + FROM documents d + JOIN content ON content.hash = d.hash + WHERE d.id = ? AND d.active = 1 + `).get(documentId) as { id: number; collection: string; path: string; title: string; body: string } | undefined; + + db.prepare(`DELETE FROM documents_fts WHERE rowid = ?`).run(documentId); + if (!row) return; + + db.prepare(` + INSERT INTO documents_fts(rowid, filepath, title, body) + VALUES (?, ?, ?, ?) + `).run( + row.id, + normalizeCjkForFTS(`${row.collection}/${row.path}`), + normalizeCjkForFTS(row.title), + normalizeCjkForFTS(row.body) + ); +} + /** * Insert a new document into the documents table. */ @@ -2098,6 +2194,9 @@ export function insertDocument( modified_at = excluded.modified_at, active = 1 `).run(collectionName, path, title, hash, createdAt, modifiedAt); + + const row = db.prepare(`SELECT id FROM documents WHERE collection = ? AND path = ?`).get(collectionName, path) as { id: number } | undefined; + if (row) rebuildDocumentFTS(db, row.id); } /** @@ -2148,15 +2247,7 @@ export function findOrMigrateLegacyDocument( if (result.changes === 0) return false; - // FTS5 does not reliably update via the documents_au trigger's - // INSERT OR REPLACE. Manually rebuild the FTS entry. - db.prepare(`DELETE FROM documents_fts WHERE rowid = ?`).run(legacy.id); - db.prepare(` - INSERT INTO documents_fts(rowid, filepath, title, body) - SELECT id, collection || '/' || path, title, - (SELECT doc FROM content WHERE hash = documents.hash) - FROM documents WHERE id = ? - `).run(legacy.id); + rebuildDocumentFTS(db, legacy.id); return true; }); @@ -2177,6 +2268,7 @@ export function updateDocumentTitle( ): void { db.prepare(`UPDATE documents SET title = ?, modified_at = ? WHERE id = ?`) .run(title, modifiedAt, documentId); + rebuildDocumentFTS(db, documentId); } /** @@ -2192,6 +2284,7 @@ export function updateDocument( ): void { db.prepare(`UPDATE documents SET title = ?, hash = ?, modified_at = ? WHERE id = ?`) .run(title, hash, modifiedAt, documentId); + rebuildDocumentFTS(db, documentId); } /** @@ -2940,7 +3033,7 @@ function buildFTS5Query(query: string): string | null { const phrase = s.slice(start, i).trim(); i++; // skip closing quote if (phrase.length > 0) { - const sanitized = phrase.split(/\s+/).map(t => sanitizeFTS5Term(t)).filter(t => t).join(' '); + const sanitized = sanitizeFTS5Phrase(phrase); if (sanitized) { const ftsPhrase = `"${sanitized}"`; // Exact phrase, no prefix match if (negated) { @@ -2968,6 +3061,16 @@ function buildFTS5Query(query: string): string | null { positive.push(ftsPhrase); } } + } else if (containsCjk(term)) { + const sanitized = sanitizeFTS5Phrase(term); + if (sanitized) { + const ftsPhrase = `"${sanitized}"`; // CJK phrase over character tokens + if (negated) { + negative.push(ftsPhrase); + } else { + positive.push(ftsPhrase); + } + } } else { const sanitized = sanitizeFTS5Term(term); if (sanitized) { diff --git a/test/store.test.ts b/test/store.test.ts index 848ec96..a172064 100644 --- a/test/store.test.ts +++ b/test/store.test.ts @@ -48,6 +48,8 @@ import { syncConfigToDb, STRONG_SIGNAL_MIN_SCORE, STRONG_SIGNAL_MIN_GAP, + insertContent, + insertDocument, generateEmbeddings, type Store, type DocumentResult, @@ -156,18 +158,18 @@ async function insertTestDocument( const hash = opts.hash || await hashContent(body); // Insert content (with OR IGNORE for deduplication) - db.prepare(` - INSERT OR IGNORE INTO content (hash, doc, created_at) - VALUES (?, ?, ?) - `).run(hash, body, now); + insertContent(db, hash, body, now); - // Insert document - const result = db.prepare(` - INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active) - VALUES (?, ?, ?, ?, ?, ?, ?) - `).run(collectionName, path, title, hash, now, now, active); + insertDocument(db, collectionName, path, title, hash, now, now); + const row = db.prepare(` + SELECT id FROM documents WHERE collection = ? AND path = ? + `).get(collectionName, path) as { id: number } | undefined; - return Number(result.lastInsertRowid); + if (active === 0 && row) { + db.prepare(`UPDATE documents SET active = 0 WHERE id = ?`).run(row.id); + } + + return row?.id ?? 0; } /** Sync YAML config file to SQLite store_collections in the current test store */ @@ -1250,6 +1252,61 @@ describe("FTS Search", () => { await cleanupTestDb(store); }); + test("searchFTS finds CJK documents by exact and mixed queries", async () => { + const store = await createTestStore(); + const collectionName = await createTestCollection(); + + await insertTestDocument(store.db, collectionName, { + name: "zh", + title: "中文检索说明", + body: "这里介绍 vector 数据库和关键词检索。", + displayPath: "cjk/zh.md", + }); + await insertTestDocument(store.db, collectionName, { + name: "ja", + title: "日本語検索メモ", + body: "この文書は検索品質とトークン化について説明します。", + displayPath: "cjk/ja.md", + }); + await insertTestDocument(store.db, collectionName, { + name: "ko", + title: "한국어 검색 노트", + body: "이 문서는 검색 품질과 토큰화 문제를 설명합니다.", + displayPath: "cjk/ko.md", + }); + + expect(store.searchFTS("关键词检索", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/zh.md`); + expect(store.searchFTS("検索品質", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/ja.md`); + expect(store.searchFTS("검색 품질", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/ko.md`); + expect(store.searchFTS("vector 关键词", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/zh.md`); + + await cleanupTestDb(store); + }); + + test("searchFTS keeps English behavior while indexing CJK text", async () => { + const store = await createTestStore(); + const collectionName = await createTestCollection(); + + await insertTestDocument(store.db, collectionName, { + name: "english", + title: "Vector Search Notes", + body: "The quick brown fox explains vector search and BM25 ranking.", + displayPath: "english.md", + }); + await insertTestDocument(store.db, collectionName, { + name: "zh", + title: "中文检索说明", + body: "这里介绍向量数据库和关键词检索。", + displayPath: "zh.md", + }); + + const foxResults = store.searchFTS("quick fox", 10); + expect(foxResults.map(r => r.displayPath)).toContain(`${collectionName}/english.md`); + expect(foxResults.map(r => r.displayPath)).not.toContain(`${collectionName}/zh.md`); + + await cleanupTestDb(store); + }); + test("searchFTS handles special characters in query", async () => { const store = await createTestStore(); const collectionName = await createTestCollection(); From 5b9f4728495d482d59ae4b4d3152743c00335183 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 18:12:37 +0000 Subject: [PATCH 03/17] fix(embed): honor collection filter --- CHANGELOG.md | 5 +++ src/cli/qmd.ts | 14 +++++-- src/index.ts | 3 ++ src/store.ts | 95 ++++++++++++++++++++++++++++++++++++++++-------- test/sdk.test.ts | 86 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 185 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fbfcde6..fee012c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ ### Fixes +- Embedding: `qmd embed -c ` now scopes pending-doc selection + to the requested collection instead of embedding global pending work. + Scoped `--force` clears only collection-owned vectors, preserves shared + hashes referenced by sibling collections, and drops `vectors_vec` only + when the scoped clear empties all vectors. - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529 - Fix: preserve original filename case in `handelize()`. The previous `.toLowerCase()` call made indexed paths unreachable on case-sensitive diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts index f42d1be..4ceecbe 100755 --- a/src/cli/qmd.ts +++ b/src/cli/qmd.ts @@ -1684,7 +1684,7 @@ function parseChunkStrategy(value: unknown): ChunkStrategy | undefined { async function vectorIndex( model: string = DEFAULT_EMBED_MODEL_URI, force: boolean = false, - batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy }, + batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy; collection?: string }, ): Promise { const storeInstance = getStore(); const db = storeInstance.db; @@ -1694,7 +1694,7 @@ async function vectorIndex( } // Check if there's work to do before starting - const hashesToEmbed = getHashesNeedingEmbedding(db); + const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection); if (hashesToEmbed === 0 && !force) { console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`); closeDb(); @@ -1715,6 +1715,7 @@ async function vectorIndex( const result = await generateEmbeddings(storeInstance, { force, model, + collection: batchOptions?.collection, maxDocsPerBatch: batchOptions?.maxDocsPerBatch, maxBatchBytes: batchOptions?.maxBatchBytes, chunkStrategy: batchOptions?.chunkStrategy, @@ -2727,7 +2728,7 @@ function showHelp(): void { console.log("Maintenance:"); console.log(" qmd status - View index + collection health"); console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)"); - console.log(" qmd embed [-f] - Generate/refresh vector embeddings"); + console.log(" qmd embed [-f] [-c ] - Generate/refresh vector embeddings"); console.log(" --max-docs-per-batch - Cap docs loaded into memory per embedding batch"); console.log(" --max-batch-mb - Cap UTF-8 MB loaded into memory per embedding batch"); console.log(" qmd cleanup - Clear caches, vacuum DB"); @@ -3120,10 +3121,17 @@ if (isMain) { const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]); const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]); const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]); + // Validate -c against configured collections before dispatching, so a + // typo errors with "Collection not found: X" instead of silently + // reporting success because no pending docs match a nonexistent name. + // embed operates on a single collection; only the first value is used. + const embedValidatedCollections = resolveCollectionFilter(cli.opts.collection, false); + const embedCollection = embedValidatedCollections[0]; await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, { maxDocsPerBatch, maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024, chunkStrategy: embedChunkStrategy, + collection: embedCollection, }); } catch (error) { console.error(error instanceof Error ? error.message : String(error)); diff --git a/src/index.ts b/src/index.ts index 6772347..3de13a5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -290,6 +290,8 @@ export interface QMDStore { embed(options?: { force?: boolean; model?: string; + /** Restrict embedding to documents in one collection. */ + collection?: string; maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy; @@ -516,6 +518,7 @@ export async function createStore(options: StoreOptions): Promise { return generateEmbeddings(internal, { force: embedOpts?.force, model: embedOpts?.model, + collection: embedOpts?.collection, maxDocsPerBatch: embedOpts?.maxDocsPerBatch, maxBatchBytes: embedOpts?.maxBatchBytes, chunkStrategy: embedOpts?.chunkStrategy, diff --git a/src/store.ts b/src/store.ts index d6d5cd7..71dc887 100644 --- a/src/store.ts +++ b/src/store.ts @@ -1374,6 +1374,11 @@ export type EmbedResult = { export type EmbedOptions = { force?: boolean; model?: string; + /** + * Restrict embedding to documents in a single collection. + * When omitted, all pending documents across every collection are embedded. + */ + collection?: string; maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy; @@ -1415,16 +1420,18 @@ function resolveEmbedOptions(options?: EmbedOptions): Required { } }); + test("store.embed scopes pending documents to the requested collection", async () => { + const store = await createStore({ + dbPath: freshDbPath(), + config: { + collections: { + docs: { path: docsDir, pattern: "**/*.md" }, + notes: { path: notesDir, pattern: "**/*.md" }, + }, + }, + }); + + const fakeLlm = createFakeEmbedLlm(); + setDefaultLlamaCpp(createFakeTokenizer() as any); + store.internal.llm = fakeLlm as any; + + try { + await store.update(); + const result = await store.embed({ collection: "docs" }); + + const vectorCounts = store.internal.db.prepare(` + SELECT d.collection, COUNT(DISTINCT v.hash) AS count + FROM documents d + LEFT JOIN content_vectors v ON v.hash = d.hash AND v.seq = 0 + WHERE d.active = 1 + GROUP BY d.collection + ORDER BY d.collection + `).all() as Array<{ collection: string; count: number }>; + + expect(result.docsProcessed).toBe(3); + expect(result.chunksEmbedded).toBe(3); + expect(vectorCounts).toEqual([ + { collection: "docs", count: 3 }, + { collection: "notes", count: 0 }, + ]); + } finally { + setDefaultLlamaCpp(null); + await store.close(); + } + }); + + test("store.embed with force only clears the requested collection", async () => { + const store = await createStore({ + dbPath: freshDbPath(), + config: { + collections: { + docs: { path: docsDir, pattern: "**/*.md" }, + notes: { path: notesDir, pattern: "**/*.md" }, + }, + }, + }); + + const fakeLlm = createFakeEmbedLlm(); + setDefaultLlamaCpp(createFakeTokenizer() as any); + store.internal.llm = fakeLlm as any; + + const vectorCounts = () => store.internal.db.prepare(` + SELECT d.collection, COUNT(DISTINCT v.hash) AS count + FROM documents d + LEFT JOIN content_vectors v ON v.hash = d.hash AND v.seq = 0 + WHERE d.active = 1 + GROUP BY d.collection + ORDER BY d.collection + `).all() as Array<{ collection: string; count: number }>; + + try { + await store.update(); + await store.embed(); + expect(vectorCounts()).toEqual([ + { collection: "docs", count: 3 }, + { collection: "notes", count: 3 }, + ]); + + const result = await store.embed({ force: true, collection: "docs" }); + + expect(result.docsProcessed).toBe(3); + expect(result.chunksEmbedded).toBe(3); + expect(vectorCounts()).toEqual([ + { collection: "docs", count: 3 }, + { collection: "notes", count: 3 }, + ]); + } finally { + setDefaultLlamaCpp(null); + await store.close(); + } + }); + test("store.embed rejects invalid batch limits", async () => { const store = await createStore({ dbPath: freshDbPath(), From 92aaded36e4aaf76f753dceb9e12dccae6b4704a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 17:53:44 +0000 Subject: [PATCH 04/17] fix(store): preserve inactive docs during orphan cleanup --- CHANGELOG.md | 3 ++ src/store.ts | 6 ++-- test/store.test.ts | 71 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 77 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fee012c..4931d92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,9 @@ - CLI: lazy-load `node-llama-cpp` so lightweight commands such as `qmd status` do not import native ML dependencies or trigger llama.cpp builds on ARM/no-GPU machines. #491 +- Store: keep content rows referenced by inactive documents during orphan + cleanup so `qmd update` preserves soft-deleted tombstones for removed + files. #585 ## [2.1.0] - 2026-04-05 diff --git a/src/store.ts b/src/store.ts index 71dc887..f5dd47a 100644 --- a/src/store.ts +++ b/src/store.ts @@ -2032,13 +2032,15 @@ export function deleteInactiveDocuments(db: Database): number { } /** - * Remove orphaned content hashes that are not referenced by any active document. + * Remove orphaned content hashes that are not referenced by any document. + * Inactive documents are soft-deleted tombstones, so their content rows must + * remain referenced until deleteInactiveDocuments() hard-deletes them. * Returns the number of orphaned content hashes deleted. */ export function cleanupOrphanedContent(db: Database): number { const result = db.prepare(` DELETE FROM content - WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1) + WHERE hash NOT IN (SELECT DISTINCT hash FROM documents) `).run(); return result.changes; } diff --git a/test/store.test.ts b/test/store.test.ts index a172064..2ed0b06 100644 --- a/test/store.test.ts +++ b/test/store.test.ts @@ -9,7 +9,7 @@ import { describe, test, expect, beforeAll, afterAll, beforeEach, afterEach, vi } from "vitest"; import { openDatabase, loadSqliteVec } from "../src/db.js"; import type { Database } from "../src/db.js"; -import { unlink, mkdtemp, rmdir, writeFile } from "node:fs/promises"; +import { unlink, mkdtemp, rmdir, writeFile, rm } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; import YAML from "yaml"; @@ -51,6 +51,7 @@ import { insertContent, insertDocument, generateEmbeddings, + reindexCollection, type Store, type DocumentResult, type SearchResult, @@ -2313,6 +2314,33 @@ describe("Vector Table", () => { await cleanupTestDb(store); }); + + test("insertEmbedding is idempotent for an existing vec0 hash_seq (#598)", async () => { + const store = await createTestStore(); + store.ensureVecTable(2); + + const hash = "existinghashseq"; + const first = new Float32Array([0.1, 0.2]); + const second = new Float32Array([0.3, 0.4]); + const now = new Date().toISOString(); + + store.db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash}_0`, first); + + // Reproduces sqlite-vec's broken conflict handling: vec0 does not honor OR REPLACE. + expect(() => { + store.db.prepare(`INSERT OR REPLACE INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash}_0`, second); + }).toThrow(/UNIQUE constraint failed/i); + + // QMD must therefore use DELETE + INSERT when upserting the vector row. + expect(() => store.insertEmbedding(hash, 0, 0, second, "test-model", now)).not.toThrow(); + + const vectorCount = store.db.prepare(`SELECT COUNT(*) AS count FROM vectors_vec WHERE hash_seq = ?`).get(`${hash}_0`) as { count: number }; + const metadataCount = store.db.prepare(`SELECT COUNT(*) AS count FROM content_vectors WHERE hash = ? AND seq = 0`).get(hash) as { count: number }; + expect(vectorCount.count).toBe(1); + expect(metadataCount.count).toBe(1); + + await cleanupTestDb(store); + }); }); // ============================================================================= @@ -2320,6 +2348,47 @@ describe("Vector Table", () => { // ============================================================================= describe("Integration", () => { + test("reindexCollection soft-deletes removed files and preserves inactive content (#585)", async () => { + const store = await createTestStore(); + const collectionDir = await mkdtemp(join(testDir, "orphan-regression-")); + const collectionName = "orphan-regression"; + + try { + for (let i = 1; i <= 5; i++) { + await writeFile(join(collectionDir, `doc-${i}.md`), `# Doc ${i}\n\nUnique body ${i}`); + } + + await createTestCollection({ pwd: collectionDir, glob: "**/*.md", name: collectionName }); + + const initial = await reindexCollection(store, collectionDir, "**/*.md", collectionName); + expect(initial.indexed).toBe(5); + expect(initial.removed).toBe(0); + + await rm(join(collectionDir, "doc-3.md")); + await rm(join(collectionDir, "doc-4.md")); + await rm(join(collectionDir, "doc-5.md")); + + const afterDelete = await reindexCollection(store, collectionDir, "**/*.md", collectionName); + expect(afterDelete.removed).toBe(3); + + const counts = store.db.prepare(` + SELECT + SUM(CASE WHEN active = 1 THEN 1 ELSE 0 END) AS active, + SUM(CASE WHEN active = 0 THEN 1 ELSE 0 END) AS inactive, + COUNT(*) AS total + FROM documents + WHERE collection = ? + `).get(collectionName) as { active: number; inactive: number; total: number }; + const contentCount = store.db.prepare(`SELECT COUNT(*) AS count FROM content`).get() as { count: number }; + + expect(counts).toEqual({ active: 2, inactive: 3, total: 5 }); + expect(contentCount.count).toBe(5); + } finally { + await rm(collectionDir, { recursive: true, force: true }); + await cleanupTestDb(store); + } + }); + test("full document lifecycle: create, search, retrieve", async () => { const store = await createTestStore(); const collectionName = await createTestCollection({ pwd: "/test/notes", glob: "**/*.md" }); From 004714af48650295318c6a9acabde237850e5fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 17:52:42 +0000 Subject: [PATCH 05/17] Fix hybrid RRF weighting by query type --- CHANGELOG.md | 1 + src/store.ts | 20 ++++++++++++++++++-- test/store.test.ts | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4931d92..54d84bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Scoped `--force` clears only collection-owned vectors, preserves shared hashes referenced by sibling collections, and drops `vectors_vec` only when the scoped clear empties all vectors. +- Hybrid search: weight RRF lists by query type so original FTS and original vector evidence get the intended 2x boost, instead of accidentally boosting the first lexical expansion. #591 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529 - Fix: preserve original filename case in `handelize()`. The previous `.toLowerCase()` call made indexed paths unreachable on case-sensitive diff --git a/src/store.ts b/src/store.ts index f5dd47a..5adafd9 100644 --- a/src/store.ts +++ b/src/store.ts @@ -4158,6 +4158,21 @@ export type RankedListMeta = { query: string; }; +/** + * RRF list weights for hybridQuery. + * + * Original-query retrieval paths are the primary evidence and get 2x weight: + * - original FTS + * - original vector search + * + * Expansion-derived lists (lex/vec/hyde) stay at 1x regardless of list order, + * so a lex expansion inserted before original vector search cannot steal the + * original vector boost. + */ +export function getHybridRrfWeights(rankedListMeta: RankedListMeta[]): number[] { + return rankedListMeta.map(meta => meta.queryType === "original" ? 2.0 : 1.0); +} + /** * Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking. * @@ -4289,8 +4304,9 @@ export async function hybridQuery( } } - // Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight - const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0); + // Step 4: RRF fusion — original-query FTS and vector lists get 2x weight; + // expansion-derived lists stay at 1x independent of insertion order. + const weights = getHybridRrfWeights(rankedListMeta); const fused = reciprocalRankFusion(rankedLists, weights); const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null; const candidates = fused.slice(0, candidateLimit); diff --git a/test/store.test.ts b/test/store.test.ts index 2ed0b06..24b5a10 100644 --- a/test/store.test.ts +++ b/test/store.test.ts @@ -52,10 +52,12 @@ import { insertDocument, generateEmbeddings, reindexCollection, + getHybridRrfWeights, type Store, type DocumentResult, type SearchResult, type RankedResult, + type RankedListMeta, } from "../src/store.js"; import type { CollectionConfig } from "../src/collections.js"; @@ -2046,6 +2048,38 @@ describe("Reciprocal Rank Fusion", () => { expect(fused[0]!.file).toBe("doc1"); }); + test("hybrid RRF weights boost original vector evidence over expansion-only hits", () => { + const originalFtsOnly = makeResult("original-fts-only.md", 0.95); + const expansionOnly = makeResult("lex-expansion-only.md", 0.95); + const originalVector = makeResult("original-vector.md", 0.95); + + // Mirrors hybridQuery's common list order when a lex expansion exists: + // original FTS, lex expansion FTS, original vector. + const rankedLists = [ + [originalFtsOnly], + [expansionOnly], + [originalVector], + ]; + const rankedListMeta: RankedListMeta[] = [ + { source: "fts", queryType: "original", query: "user query" }, + { source: "fts", queryType: "lex", query: "lex expansion" }, + { source: "vec", queryType: "original", query: "user query" }, + ]; + + const positionBasedWeights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0); + const buggyOrder = reciprocalRankFusion(rankedLists, positionBasedWeights); + + expect(buggyOrder.findIndex(r => r.file === "lex-expansion-only.md")) + .toBeLessThan(buggyOrder.findIndex(r => r.file === "original-vector.md")); + + const semanticWeights = getHybridRrfWeights(rankedListMeta); + const fixedOrder = reciprocalRankFusion(rankedLists, semanticWeights); + + expect(semanticWeights).toEqual([2.0, 1.0, 2.0]); + expect(fixedOrder.findIndex(r => r.file === "original-vector.md")) + .toBeLessThan(fixedOrder.findIndex(r => r.file === "lex-expansion-only.md")); + }); + test("RRF adds top-rank bonus", () => { // doc1 is #1 in list1, doc2 is #2 in list1 const list1 = [makeResult("doc1", 0.9), makeResult("doc2", 0.8)]; From 3f055e705db8ad45f30251c8ef7ff3839bafd00d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 17:53:58 +0000 Subject: [PATCH 06/17] Fix AST grammar packaging for Bun installs --- CHANGELOG.md | 3 +++ bun.lock | 22 +++++++++++++++------- package.json | 16 +++++++++------- scripts/check-package-grammars.mjs | 29 +++++++++++++++++++++++++++++ src/ast.ts | 30 +++++++++++++++++++++--------- test/ast.test.ts | 12 +++++++++++- test/package.test.ts | 27 +++++++++++++++++++++++++++ 7 files changed, 115 insertions(+), 24 deletions(-) create mode 100644 scripts/check-package-grammars.mjs create mode 100644 test/package.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 54d84bd..cde9802 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,9 @@ - Store: keep content rows referenced by inactive documents during orphan cleanup so `qmd update` preserves soft-deleted tombstones for removed files. #585 +- Packaging: install AST grammar WASM packages as required dependencies so + Bun global installs include TypeScript/TSX/JavaScript grammars, and add a + `smoke:package-grammars` verification command. #595 ## [2.1.0] - 2026-04-05 diff --git a/bun.lock b/bun.lock index a96f096..651b00f 100644 --- a/bun.lock +++ b/bun.lock @@ -11,6 +11,10 @@ "node-llama-cpp": "3.18.1", "picomatch": "4.0.4", "sqlite-vec": "0.1.9", + "tree-sitter-go": "0.23.4", + "tree-sitter-python": "0.23.4", + "tree-sitter-rust": "0.24.0", + "tree-sitter-typescript": "0.23.2", "web-tree-sitter": "0.26.7", "yaml": "2.8.3", "zod": "4.2.1", @@ -26,10 +30,6 @@ "sqlite-vec-linux-arm64": "0.1.9", "sqlite-vec-linux-x64": "0.1.9", "sqlite-vec-windows-x64": "0.1.9", - "tree-sitter-go": "0.23.4", - "tree-sitter-python": "0.23.4", - "tree-sitter-rust": "0.24.0", - "tree-sitter-typescript": "0.23.2", }, "peerDependencies": { "typescript": "^5.9.3", @@ -509,7 +509,7 @@ "node-abi": ["node-abi@3.87.0", "", { "dependencies": { "semver": "^7.3.5" } }, "sha512-+CGM1L1CgmtheLcBuleyYOn7NWPVu0s0EJH2C4puxgEZb9h8QpR9G2dBfZJOAUhi7VQxuBPMd0hiISWcTyiYyQ=="], - "node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="], + "node-addon-api": ["node-addon-api@8.7.0", "", {}, "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA=="], "node-api-headers": ["node-api-headers@1.8.0", "", {}, "sha512-jfnmiKWjRAGbdD1yQS28bknFM1tbHC1oucyuMPjmkEs+kpiu76aRs40WlTmBmyEgzDM76ge1DQ7XJ3R5deiVjQ=="], @@ -773,8 +773,6 @@ "micromatch/picomatch": ["picomatch@2.3.1", "", {}, "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA=="], - "node-llama-cpp/node-addon-api": ["node-addon-api@8.7.0", "", {}, "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA=="], - "ora/cli-spinners": ["cli-spinners@3.4.0", "", {}, "sha512-bXfOC4QcT1tKXGorxL3wbJm6XJPDqEnij2gQ2m7ESQuE+/z9YFIWnl/5RpTiKWbMq3EVKR4fRLJGn6DVfu0mpw=="], "postcss/nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="], @@ -793,6 +791,16 @@ "tinyglobby/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="], + "tree-sitter-go/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="], + + "tree-sitter-javascript/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="], + + "tree-sitter-python/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="], + + "tree-sitter-rust/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="], + + "tree-sitter-typescript/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="], + "vite/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="], "vitest/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="], diff --git a/package.json b/package.json index 0ec04c9..59a878a 100644 --- a/package.json +++ b/package.json @@ -17,6 +17,7 @@ "files": [ "bin/", "dist/", + "scripts/check-package-grammars.mjs", "LICENSE", "CHANGELOG.md" ], @@ -31,7 +32,8 @@ "vsearch": "tsx src/cli/qmd.ts vsearch", "rerank": "tsx src/cli/qmd.ts rerank", "inspector": "npx @modelcontextprotocol/inspector tsx src/cli/qmd.ts mcp", - "release": "./scripts/release.sh" + "release": "./scripts/release.sh", + "smoke:package-grammars": "node scripts/check-package-grammars.mjs" }, "publishConfig": { "access": "public" @@ -53,18 +55,18 @@ "sqlite-vec": "0.1.9", "web-tree-sitter": "0.26.7", "yaml": "2.8.3", - "zod": "4.2.1" + "zod": "4.2.1", + "tree-sitter-go": "0.23.4", + "tree-sitter-python": "0.23.4", + "tree-sitter-rust": "0.24.0", + "tree-sitter-typescript": "0.23.2" }, "optionalDependencies": { "sqlite-vec-darwin-arm64": "0.1.9", "sqlite-vec-darwin-x64": "0.1.9", "sqlite-vec-linux-arm64": "0.1.9", "sqlite-vec-linux-x64": "0.1.9", - "sqlite-vec-windows-x64": "0.1.9", - "tree-sitter-go": "0.23.4", - "tree-sitter-python": "0.23.4", - "tree-sitter-rust": "0.24.0", - "tree-sitter-typescript": "0.23.2" + "sqlite-vec-windows-x64": "0.1.9" }, "devDependencies": { "@types/better-sqlite3": "7.6.13", diff --git a/scripts/check-package-grammars.mjs b/scripts/check-package-grammars.mjs new file mode 100644 index 0000000..45d7854 --- /dev/null +++ b/scripts/check-package-grammars.mjs @@ -0,0 +1,29 @@ +#!/usr/bin/env node +import { createRequire } from "node:module"; + +const require = createRequire(import.meta.url); + +const grammars = [ + "tree-sitter-typescript/tree-sitter-typescript.wasm", + "tree-sitter-typescript/tree-sitter-tsx.wasm", + "tree-sitter-python/tree-sitter-python.wasm", + "tree-sitter-go/tree-sitter-go.wasm", + "tree-sitter-rust/tree-sitter-rust.wasm", +]; + +let ok = true; +for (const grammar of grammars) { + try { + const resolved = require.resolve(grammar); + console.log(`ok ${grammar} -> ${resolved}`); + } catch (err) { + ok = false; + console.error(`missing ${grammar}`); + console.error(err instanceof Error ? err.message : String(err)); + } +} + +if (!ok) { + console.error("\nAST grammar package smoke check failed. Run `bun install` locally or repair a broken global install with the matching `bun add tree-sitter-...@` command shown by `qmd status`."); + process.exit(1); +} diff --git a/src/ast.ts b/src/ast.ts index 5f8194e..a83dbc1 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -63,15 +63,22 @@ export function detectLanguage(filepath: string): SupportedLanguage | null { /** * Maps language to the npm package and wasm filename for the grammar. */ -const GRAMMAR_MAP: Record = { - typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" }, - tsx: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm" }, - javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" }, - python: { pkg: "tree-sitter-python", wasm: "tree-sitter-python.wasm" }, - go: { pkg: "tree-sitter-go", wasm: "tree-sitter-go.wasm" }, - rust: { pkg: "tree-sitter-rust", wasm: "tree-sitter-rust.wasm" }, +const GRAMMAR_MAP: Record = { + typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" }, + tsx: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm", version: "0.23.2" }, + javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" }, + python: { pkg: "tree-sitter-python", wasm: "tree-sitter-python.wasm", version: "0.23.4" }, + go: { pkg: "tree-sitter-go", wasm: "tree-sitter-go.wasm", version: "0.23.4" }, + rust: { pkg: "tree-sitter-rust", wasm: "tree-sitter-rust.wasm", version: "0.24.0" }, }; +export function formatGrammarLoadError(language: SupportedLanguage, err: unknown): string { + const grammar = GRAMMAR_MAP[language]; + const detail = err instanceof Error ? err.message : String(err); + return `${grammar.pkg}/${grammar.wasm} failed to load (${detail}); falling back to regex chunking. ` + + `Repair a broken global install with: bun add ${grammar.pkg}@${grammar.version}`; +} + // ============================================================================= // Per-Language Query Definitions // ============================================================================= @@ -176,6 +183,9 @@ let initPromise: Promise | null = null; /** Languages that have already failed to load — warn only once per process. */ const failedLanguages = new Set(); +/** Last grammar load error by language, for status output. */ +const grammarLoadErrors = new Map(); + /** Cached grammar load promises. */ const grammarCache = new Map>(); @@ -228,7 +238,9 @@ async function loadGrammar(language: SupportedLanguage): Promise { // Should either return some partial break points or empty array — not throw expect(Array.isArray(points)).toBe(true); }); + + test("explains missing grammar packages with a repair command", () => { + const msg = formatGrammarLoadError( + "typescript", + new Error("Cannot find module 'tree-sitter-typescript/tree-sitter-typescript.wasm'"), + ); + expect(msg).toContain("tree-sitter-typescript"); + expect(msg).toContain("bun add tree-sitter-typescript@0.23.2"); + expect(msg).toContain("falling back to regex"); + }); }); // ============================================================================= diff --git a/test/package.test.ts b/test/package.test.ts new file mode 100644 index 0000000..018087d --- /dev/null +++ b/test/package.test.ts @@ -0,0 +1,27 @@ +import { describe, expect, test } from "vitest"; +import { readFileSync } from "node:fs"; +import { join } from "node:path"; + +const root = new URL("..", import.meta.url); +const pkg = JSON.parse(readFileSync(new URL("package.json", root), "utf8")); + +describe("package grammar distribution", () => { + test("installs AST grammar wasm packages as required runtime dependencies", () => { + for (const dep of ["tree-sitter-typescript", "tree-sitter-python", "tree-sitter-go", "tree-sitter-rust"]) { + expect(pkg.dependencies, `${dep} should be a required dependency`).toHaveProperty(dep); + expect(pkg.optionalDependencies ?? {}, `${dep} should not be optional`).not.toHaveProperty(dep); + } + }); + + test("documents a packaging smoke check for grammar wasm availability", () => { + expect(pkg.scripts, "package.json scripts").toHaveProperty("smoke:package-grammars"); + expect(String(pkg.scripts["smoke:package-grammars"])).toContain("check-package-grammars"); + + expect(pkg.files, "published package files").toContain("scripts/check-package-grammars.mjs"); + + const scriptPath = join(root.pathname, "scripts", "check-package-grammars.mjs"); + const script = readFileSync(scriptPath, "utf8"); + expect(script).toContain("tree-sitter-typescript/tree-sitter-typescript.wasm"); + expect(script).toContain("tree-sitter-typescript/tree-sitter-tsx.wasm"); + }); +}); From 3653f6015c32e0487704eed5b25022eea5f8cf28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 17:54:08 +0000 Subject: [PATCH 07/17] Fix MCP stdio native log pollution --- CHANGELOG.md | 1 + bin/qmd | 10 ++++++++ test/cli.test.ts | 66 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cde9802..1af3da0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ hashes referenced by sibling collections, and drops `vectors_vec` only when the scoped clear empties all vectors. - Hybrid search: weight RRF lists by query type so original FTS and original vector evidence get the intended 2x boost, instead of accidentally boosting the first lexical expansion. #591 +- MCP: seed llama.cpp/GGML quiet env vars before launching `qmd mcp` so native logs cannot pollute stdio JSON-RPC framing. #593 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529 - Fix: preserve original filename case in `handelize()`. The previous `.toLowerCase()` call made indexed paths unreachable on case-sensitive diff --git a/bin/qmd b/bin/qmd index f658b3b..7522b2e 100755 --- a/bin/qmd +++ b/bin/qmd @@ -15,6 +15,16 @@ done # to avoid native module ABI mismatches (e.g., better-sqlite3 compiled for bun vs node) DIR="$(cd -P "$(dirname "$SOURCE")/.." && pwd)" +# MCP stdio reserves stdout exclusively for JSON-RPC frames. node-llama-cpp +# / llama.cpp / ggml can write native logs directly to stdout before JS-level +# log handlers are attached, so seed the native quiet env before Node/Bun imports +# the CLI and its LLM modules. Preserve explicit user values when provided. +if [ "$1" = "mcp" ]; then + export LLAMA_LOG_LEVEL="${LLAMA_LOG_LEVEL:-error}" + export GGML_LOG_LEVEL="${GGML_LOG_LEVEL:-error}" + export GGML_BACKEND_SILENT="${GGML_BACKEND_SILENT:-1}" +fi + # Detect the package manager that installed dependencies by checking lockfiles. # $BUN_INSTALL is intentionally NOT checked — it only indicates that bun exists # on the system, not that it was used to install this package (see #361). diff --git a/test/cli.test.ts b/test/cli.test.ts index 2e49deb..5748676 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -6,7 +6,7 @@ */ import { describe, test, expect, beforeAll, afterAll, beforeEach } from "vitest"; -import { mkdtemp, rm, writeFile, mkdir } from "fs/promises"; +import { chmod, copyFile, mkdtemp, rm, writeFile, mkdir } from "fs/promises"; import { existsSync, lstatSync, readFileSync, symlinkSync, writeFileSync, unlinkSync } from "fs"; import { tmpdir } from "os"; import { join, dirname } from "path"; @@ -1601,3 +1601,67 @@ describe("mcp http daemon", () => { try { unlinkSync(pidPath()); } catch {} }); }); + +// ============================================================================= +// MCP stdio stdout hygiene +// ============================================================================= + +describe("mcp stdio launcher", () => { + test("sets native llama/ggml quiet env before Node starts so stdout stays JSON-RPC only", async () => { + const tempPackage = await mkdtemp(join(tmpdir(), "qmd-bin-mcp-")); + try { + await mkdir(join(tempPackage, "bin"), { recursive: true }); + await mkdir(join(tempPackage, "dist", "cli"), { recursive: true }); + await mkdir(join(tempPackage, "fake-bin"), { recursive: true }); + + const qmdBin = join(tempPackage, "bin", "qmd"); + await copyFile(join(projectRoot, "bin", "qmd"), qmdBin); + await chmod(qmdBin, 0o755); + + // Force the wrapper down the Node branch, then put our fake `node` first + // in PATH. The fake node behaves like the native llama/ggml layer: it + // writes a non-JSON stdout line unless qmd pre-seeded the documented + // quiet env vars before launching JS. + await writeFile(join(tempPackage, "package-lock.json"), "{}\n"); + const fakeNode = join(tempPackage, "fake-bin", "node"); + await writeFile(fakeNode, `#!/bin/sh +if [ "\${GGML_BACKEND_SILENT:-}" != "1" ]; then + printf 'llama.cpp native log on stdout\\n' +fi +printf '{"jsonrpc":"2.0","id":1,"result":{"ok":true}}\\n' +`); + await chmod(fakeNode, 0o755); + + const proc = spawn(qmdBin, ["mcp"], { + cwd: tempPackage, + env: { + ...process.env, + PATH: `${join(tempPackage, "fake-bin")}:${process.env.PATH}`, + LLAMA_LOG_LEVEL: "", + GGML_LOG_LEVEL: "", + GGML_BACKEND_SILENT: "", + }, + stdio: ["ignore", "pipe", "pipe"], + }); + + let stdout = ""; + let stderr = ""; + proc.stdout?.on("data", (chunk: Buffer) => { stdout += chunk.toString(); }); + proc.stderr?.on("data", (chunk: Buffer) => { stderr += chunk.toString(); }); + const exitCode = await new Promise((resolve, reject) => { + proc.once("error", reject); + proc.on("close", (code) => resolve(code ?? 1)); + }); + + expect(exitCode).toBe(0); + expect(stderr).toBe(""); + const lines = stdout.trim().split("\n").filter(Boolean); + expect(lines.length).toBeGreaterThan(0); + for (const line of lines) { + expect(() => JSON.parse(line)).not.toThrow(); + } + } finally { + await rm(tempPackage, { recursive: true, force: true }); + } + }); +}); From 656707c6b40ede862983f8563a93e070cbd9a00d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 17:56:03 +0000 Subject: [PATCH 08/17] Fix Node ESM index path normalization --- CHANGELOG.md | 1 + src/cli/qmd.ts | 6 ++---- src/collections.ts | 6 ++---- test/esm-ambiguous-module.test.ts | 27 +++++++++++++++++++++++++++ 4 files changed, 32 insertions(+), 8 deletions(-) create mode 100644 test/esm-ambiguous-module.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 1af3da0..4191964 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ when the scoped clear empties all vectors. - Hybrid search: weight RRF lists by query type so original FTS and original vector evidence get the intended 2x boost, instead of accidentally boosting the first lexical expansion. #591 - MCP: seed llama.cpp/GGML quiet env vars before launching `qmd mcp` so native logs cannot pollute stdio JSON-RPC framing. #593 +- CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index ` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529 - Fix: preserve original filename case in `handelize()`. The previous `.toLowerCase()` call made indexed paths unreachable on case-sensitive diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts index 4ceecbe..0c3a1e1 100755 --- a/src/cli/qmd.ts +++ b/src/cli/qmd.ts @@ -3,7 +3,7 @@ import type { Database } from "../db.js"; import fastGlob from "fast-glob"; import { execSync, spawn as nodeSpawn } from "child_process"; import { fileURLToPath } from "url"; -import { dirname, join as pathJoin, relative as relativePath } from "path"; +import { dirname, join as pathJoin, relative as relativePath, resolve as pathResolve } from "path"; import { parseArgs } from "util"; import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync } from "fs"; import { createInterface } from "readline/promises"; @@ -173,9 +173,7 @@ function setIndexName(name: string | null): void { let normalizedName = name; // Normalize relative paths to prevent malformed database paths if (name && name.includes('/')) { - const { resolve } = require('path'); - const { cwd } = require('process'); - const absolutePath = resolve(cwd(), name); + const absolutePath = pathResolve(process.cwd(), name); // Replace path separators with underscores to create a valid filename normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, ''); } diff --git a/src/collections.ts b/src/collections.ts index e68ff65..a295de7 100644 --- a/src/collections.ts +++ b/src/collections.ts @@ -6,7 +6,7 @@ */ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs"; -import { join, dirname } from "path"; +import { join, dirname, resolve } from "path"; import { homedir } from "os"; import YAML from "yaml"; @@ -101,9 +101,7 @@ export function setConfigSource(source?: { configPath?: string; config?: Collect export function setConfigIndexName(name: string): void { // Resolve relative paths to absolute paths and sanitize for use as filename if (name.includes('/')) { - const { resolve } = require('path'); - const { cwd } = require('process'); - const absolutePath = resolve(cwd(), name); + const absolutePath = resolve(process.cwd(), name); // Replace path separators with underscores to create a valid filename currentIndexName = absolutePath.replace(/\//g, '_').replace(/^_/, ''); } else { diff --git a/test/esm-ambiguous-module.test.ts b/test/esm-ambiguous-module.test.ts new file mode 100644 index 0000000..80e61b7 --- /dev/null +++ b/test/esm-ambiguous-module.test.ts @@ -0,0 +1,27 @@ +import { describe, expect, test } from "vitest"; +import { execFileSync } from "child_process"; +import { mkdtempSync } from "fs"; +import { tmpdir } from "os"; +import { dirname, join, resolve } from "path"; +import { fileURLToPath } from "url"; + +const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), ".."); + +describe("Node ESM entrypoints", () => { + test("CLI --index path normalizes via setIndexName/setConfigIndexName under Node 22+", () => { + execFileSync("bun", ["run", "build"], { + cwd: repoRoot, + encoding: "utf-8", + stdio: "pipe", + }); + + const indexPath = join(mkdtempSync(join(tmpdir(), "qmd-index-")), "nested", "idx"); + const output = execFileSync("node", ["dist/cli/qmd.js", "--index", indexPath, "--version"], { + cwd: repoRoot, + encoding: "utf-8", + stdio: "pipe", + }); + + expect(output).toContain("qmd "); + }, 120_000); +}); From dff6513693647495869942240b1a9efb5236412c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 17:56:25 +0000 Subject: [PATCH 09/17] Preserve document IDs across case-only renames --- src/store.ts | 14 ++++++----- test/store.test.ts | 63 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/src/store.ts b/src/store.ts index 5adafd9..6ec8ac1 100644 --- a/src/store.ts +++ b/src/store.ts @@ -2226,8 +2226,8 @@ export function findActiveDocument( } /** - * Find an active document, falling back to a legacy lowercase path. - * If found under the legacy path, renames it in-place and rebuilds the + * Find an active document, falling back to a case-insensitive path match. + * If found under a different casing, renames it in-place and rebuilds the * FTS entry. Embeddings are keyed by content hash, so the rename is * safe — no re-embedding required. * @@ -2242,10 +2242,12 @@ export function findOrMigrateLegacyDocument( const existing = findActiveDocument(db, collectionName, path); if (existing) return existing; - const legacyPath = path.toLowerCase(); - if (legacyPath === path) return null; - - const legacy = findActiveDocument(db, collectionName, legacyPath); + const legacy = db.prepare(` + SELECT id, hash, title FROM documents + WHERE collection = ? AND path COLLATE NOCASE = ? AND active = 1 + ORDER BY id + LIMIT 1 + `).get(collectionName, path) as { id: number; hash: string; title: string } | undefined; if (!legacy) return null; // Wrap rename + FTS rebuild in a transaction for atomicity. diff --git a/test/store.test.ts b/test/store.test.ts index 24b5a10..9f82624 100644 --- a/test/store.test.ts +++ b/test/store.test.ts @@ -9,7 +9,7 @@ import { describe, test, expect, beforeAll, afterAll, beforeEach, afterEach, vi } from "vitest"; import { openDatabase, loadSqliteVec } from "../src/db.js"; import type { Database } from "../src/db.js"; -import { unlink, mkdtemp, rmdir, writeFile, rm } from "node:fs/promises"; +import { unlink, mkdtemp, rmdir, writeFile, rm, mkdir, rename } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; import YAML from "yaml"; @@ -46,12 +46,12 @@ import { normalizeDocid, isDocid, syncConfigToDb, + reindexCollection, STRONG_SIGNAL_MIN_SCORE, STRONG_SIGNAL_MIN_GAP, insertContent, insertDocument, generateEmbeddings, - reindexCollection, getHybridRrfWeights, type Store, type DocumentResult, @@ -2112,6 +2112,65 @@ describe("Reciprocal Rank Fusion", () => { }); }); +// ============================================================================= +// Reindex Collection Tests +// ============================================================================= + +describe("Reindex Collection", () => { + test("preserves document id and embeddings when file path changes only by case", async () => { + const store = await createTestStore(); + const collectionName = "docs"; + const collectionPath = join(testDir, `case-rename-${Date.now()}-${Math.random().toString(36).slice(2)}`); + await mkdir(collectionPath, { recursive: true }); + + const originalPath = join(collectionPath, "README.md"); + const renamedPath = join(collectionPath, "readme.md"); + const body = "# Case Rename\n\nContent that should keep the same embedding."; + await writeFile(originalPath, body); + + const firstResult = await reindexCollection(store, collectionPath, "**/*.md", collectionName); + expect(firstResult.indexed).toBe(1); + + const before = store.db.prepare(` + SELECT id, path, hash FROM documents + WHERE collection = ? AND active = 1 + `).get(collectionName) as { id: number; path: string; hash: string }; + expect(before.path).toBe("README.md"); + + store.db.prepare(` + INSERT INTO content_vectors (hash, seq, pos, model, embedded_at) + VALUES (?, 0, 0, 'test-model', ?) + `).run(before.hash, new Date().toISOString()); + + await rename(originalPath, renamedPath); + + const secondResult = await reindexCollection(store, collectionPath, "**/*.md", collectionName); + expect(secondResult.indexed).toBe(0); + expect(secondResult.unchanged).toBe(1); + expect(secondResult.removed).toBe(0); + + const afterRows = store.db.prepare(` + SELECT id, path, hash, active FROM documents + WHERE collection = ? + ORDER BY id + `).all(collectionName) as { id: number; path: string; hash: string; active: number }[]; + expect(afterRows).toHaveLength(1); + expect(afterRows[0]).toMatchObject({ id: before.id, path: "readme.md", hash: before.hash, active: 1 }); + + const vectorCount = store.db.prepare(` + SELECT COUNT(*) AS count FROM content_vectors WHERE hash = ? + `).get(before.hash) as { count: number }; + expect(vectorCount.count).toBe(1); + + const ftsRows = store.db.prepare(` + SELECT rowid, filepath FROM documents_fts WHERE rowid = ? + `).all(before.id) as { rowid: number; filepath: string }[]; + expect(ftsRows).toEqual([{ rowid: before.id, filepath: "docs/readme.md" }]); + + await cleanupTestDb(store); + }); +}); + // ============================================================================= // Index Status Tests // ============================================================================= From e8229d8bfb0f59da9a5def5d009a90502abac44b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 17:56:28 +0000 Subject: [PATCH 10/17] Fix Windows CUDA context parallelism --- CHANGELOG.md | 1 + README.md | 2 ++ src/llm.ts | 44 ++++++++++++++++++++++++++++++++++++++++---- test/llm.test.ts | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4191964..7014380 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Hybrid search: weight RRF lists by query type so original FTS and original vector evidence get the intended 2x boost, instead of accidentally boosting the first lexical expansion. #591 - MCP: seed llama.cpp/GGML quiet env vars before launching `qmd mcp` so native logs cannot pollute stdio JSON-RPC framing. #593 - CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index ` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634 +- Windows CUDA: serialize llama.cpp embedding/reranking contexts by default to avoid intermittent `ggml-cuda.cu:98` crashes in `qmd query`; set `QMD_EMBED_PARALLELISM` to opt back into parallel contexts if your driver is stable. #519 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529 - Fix: preserve original filename case in `handelize()`. The previous `.toLowerCase()` call made indexed paths unreachable on case-sensitive diff --git a/README.md b/README.md index 6f31844..02e4b1e 100644 --- a/README.md +++ b/README.md @@ -797,6 +797,8 @@ llm_cache -- Cached LLM responses (query expansion, rerank scores) | Variable | Default | Description | |----------|---------|-------------| | `XDG_CACHE_HOME` | `~/.cache` | Cache directory location | +| `QMD_LLAMA_GPU` | `auto` | Force llama.cpp GPU backend (`metal`, `vulkan`, `cuda`) or disable GPU with `false` | +| `QMD_EMBED_PARALLELISM` | automatic | Override embedding/reranking context parallelism (1-8). Windows CUDA defaults to `1` because parallel CUDA contexts can crash with `ggml-cuda.cu:98`; use Vulkan or raise this only if your driver is stable. | ## How It Works diff --git a/src/llm.ts b/src/llm.ts index 7d2bbe0..d469d36 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -451,7 +451,41 @@ export type LlamaCppConfig = { const DEFAULT_INACTIVITY_TIMEOUT_MS = 5 * 60 * 1000; const DEFAULT_EXPAND_CONTEXT_SIZE = 2048; -type LlamaGpuMode = "auto" | "metal" | "vulkan" | "cuda" | false; +export type LlamaGpuMode = "auto" | "metal" | "vulkan" | "cuda" | false; + +type ParallelismOptions = { + gpu: string | false; + platform?: NodeJS.Platform; + computed: number; + envValue?: string; +}; + +export function resolveParallelismOverride(envValue = process.env.QMD_EMBED_PARALLELISM): number | undefined { + const normalized = envValue?.trim() ?? ""; + if (!normalized) return undefined; + + const parsed = Number(normalized); + if (!Number.isInteger(parsed) || parsed < 1) { + process.stderr.write(`QMD Warning: invalid QMD_EMBED_PARALLELISM="${envValue}", using automatic parallelism.\n`); + return undefined; + } + + return Math.min(8, parsed); +} + +export function resolveSafeParallelism(options: ParallelismOptions): number { + const override = resolveParallelismOverride(options.envValue); + if (override !== undefined) return override; + + // node-llama-cpp/llama.cpp CUDA on Windows is unstable with multiple + // simultaneous contexts (ggml-cuda.cu:98 in #519). Vulkan and CPU do not + // show the same failure mode, so only serialize Windows CUDA by default. + if ((options.platform ?? process.platform) === "win32" && options.gpu === "cuda") { + return 1; + } + + return Math.max(1, options.computed); +} export function resolveLlamaGpuMode(envValue = process.env.QMD_LLAMA_GPU): LlamaGpuMode { const normalized = envValue?.trim().toLowerCase() ?? ""; @@ -726,16 +760,18 @@ export class LlamaCpp implements LLM { const vram = await llama.getVramState(); const freeMB = vram.free / (1024 * 1024); const maxByVram = Math.floor((freeMB * 0.25) / perContextMB); - return Math.max(1, Math.min(8, maxByVram)); + const computed = Math.max(1, Math.min(8, maxByVram)); + return resolveSafeParallelism({ gpu: llama.gpu, computed }); } catch { - return 2; + return resolveSafeParallelism({ gpu: llama.gpu, computed: 2 }); } } // CPU: split cores across contexts. At least 4 threads per context. const cores = llama.cpuMathCores || 4; const maxContexts = Math.floor(cores / 4); - return Math.max(1, Math.min(4, maxContexts)); + const computed = Math.max(1, Math.min(4, maxContexts)); + return resolveSafeParallelism({ gpu: false, computed }); } /** diff --git a/test/llm.test.ts b/test/llm.test.ts index 74b6430..3678bad 100644 --- a/test/llm.test.ts +++ b/test/llm.test.ts @@ -13,6 +13,8 @@ import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, resolveLlamaGpuMode, + resolveParallelismOverride, + resolveSafeParallelism, withLLMSession, canUnloadLLM, SessionReleasedError, @@ -88,6 +90,44 @@ describe("QMD_LLAMA_GPU resolution", () => { }); }); +describe("LLM context parallelism safety", () => { + test("defaults Windows CUDA to one context to avoid ggml-cuda.cu:98 crashes", () => { + expect(resolveSafeParallelism({ + gpu: "cuda", + platform: "win32", + computed: 8, + envValue: undefined, + })).toBe(1); + }); + + test("keeps non-Windows and non-CUDA backends on computed parallelism", () => { + expect(resolveSafeParallelism({ gpu: "cuda", platform: "linux", computed: 8 })).toBe(8); + expect(resolveSafeParallelism({ gpu: "vulkan", platform: "win32", computed: 8 })).toBe(8); + expect(resolveSafeParallelism({ gpu: false, platform: "win32", computed: 4 })).toBe(4); + }); + + test("QMD_EMBED_PARALLELISM overrides the Windows CUDA safety default", () => { + expect(resolveSafeParallelism({ + gpu: "cuda", + platform: "win32", + computed: 8, + envValue: "2", + })).toBe(2); + }); + + test("QMD_EMBED_PARALLELISM clamps invalid values and warns", () => { + const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true); + try { + expect(resolveParallelismOverride("0")).toBeUndefined(); + expect(resolveParallelismOverride("bad")).toBeUndefined(); + expect(stderrSpy).toHaveBeenCalledTimes(2); + expect(String(stderrSpy.mock.calls[0]?.[0] || "")).toContain("QMD_EMBED_PARALLELISM"); + } finally { + stderrSpy.mockRestore(); + } + }); +}); + describe("LlamaCpp expand context size config", () => { const defaultExpandContextSize = 2048; From b77559223025cbcff3f992df0bf01147497c3bab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 18:00:37 +0000 Subject: [PATCH 11/17] fix mcp --index store selection --- CHANGELOG.md | 1 + src/cli/qmd.ts | 9 +++---- src/mcp/server.ts | 15 ++++++++---- test/cli.test.ts | 61 ++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 75 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7014380..4cedf35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - MCP: seed llama.cpp/GGML quiet env vars before launching `qmd mcp` so native logs cannot pollute stdio JSON-RPC framing. #593 - CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index ` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634 - Windows CUDA: serialize llama.cpp embedding/reranking contexts by default to avoid intermittent `ggml-cuda.cu:98` crashes in `qmd query`; set `QMD_EMBED_PARALLELISM` to opt back into parallel contexts if your driver is stable. #519 +- MCP: make `qmd mcp --index ` use the selected index for both foreground and daemon HTTP servers instead of falling back to the default store. #343 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529 - Fix: preserve original filename case in `handelize()`. The previous `.toLowerCase()` call made indexed paths unreachable on case-sensitive diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts index 0c3a1e1..bbef459 100755 --- a/src/cli/qmd.ts +++ b/src/cli/qmd.ts @@ -3253,9 +3253,10 @@ if (isMain) { const logPath = resolve(cacheDir, "mcp.log"); const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run const selfPath = fileURLToPath(import.meta.url); + const indexArgs = cli.values.index ? ["--index", String(cli.values.index)] : []; const spawnArgs = selfPath.endsWith(".ts") - ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, "mcp", "--http", "--port", String(port)] - : [selfPath, "mcp", "--http", "--port", String(port)]; + ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)] + : [selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)]; const child = nodeSpawn(process.execPath, spawnArgs, { stdio: ["ignore", logFd, logFd], detached: true, @@ -3275,7 +3276,7 @@ if (isMain) { process.removeAllListeners("SIGINT"); const { startMcpHttpServer } = await import("../mcp/server.js"); try { - await startMcpHttpServer(port); + await startMcpHttpServer(port, { dbPath: getDbPath() }); } catch (e: any) { if (e?.code === "EADDRINUSE") { console.error(`Port ${port} already in use. Try a different port with --port.`); @@ -3286,7 +3287,7 @@ if (isMain) { } else { // Default: stdio transport const { startMcpServer } = await import("../mcp/server.js"); - await startMcpServer(); + await startMcpServer({ dbPath: getDbPath() }); } break; } diff --git a/src/mcp/server.ts b/src/mcp/server.ts index 4fd0d77..a3016e2 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -538,7 +538,11 @@ Intent-aware lex (C++ performance, not sports): // Transport: stdio (default) // ============================================================================= -export async function startMcpServer(): Promise { +export type McpStartupOptions = { + dbPath?: string; +}; + +export async function startMcpServer(options: McpStartupOptions = {}): Promise { // Opt into production mode when the MCP server is actually started, not // when this module is merely imported for its exports. Importing the module // at the top level flipped the global production flag and broke test @@ -547,7 +551,7 @@ export async function startMcpServer(): Promise { enableProductionMode(); const configPath = getConfigPath(); const store = await createStore({ - dbPath: getDefaultDbPath(), + dbPath: options.dbPath ?? getDefaultDbPath(), ...(existsSync(configPath) ? { configPath } : {}), }); const server = await createMcpServer(store); @@ -569,14 +573,17 @@ export type HttpServerHandle = { * Start MCP server over Streamable HTTP (JSON responses, no SSE). * Binds to localhost only. Returns a handle for shutdown and port discovery. */ -export async function startMcpHttpServer(port: number, options?: { quiet?: boolean }): Promise { +export async function startMcpHttpServer( + port: number, + options: ({ quiet?: boolean } & McpStartupOptions) = {}, +): Promise { // See startMcpServer() for the rationale — flip production mode here so the // HTTP transport resolves the real database path, without leaking state into // callers that only import this module for its exports (e.g. tests). enableProductionMode(); const configPath = getConfigPath(); const store = await createStore({ - dbPath: getDefaultDbPath(), + dbPath: options.dbPath ?? getDefaultDbPath(), ...(existsSync(configPath) ? { configPath } : {}), }); diff --git a/test/cli.test.ts b/test/cli.test.ts index 5748676..40c14c9 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -1403,13 +1403,17 @@ describe("mcp http daemon", () => { } /** Spawn a foreground HTTP server (non-blocking) and return the process */ - function spawnHttpServer(port: number): import("child_process").ChildProcess { - const proc = spawn(tsxBin, [qmdScript, "mcp", "--http", "--port", String(port)], { + function spawnHttpServer( + port: number, + options: { args?: string[]; env?: Record } = {}, + ): import("child_process").ChildProcess { + const proc = spawn(tsxBin, [qmdScript, ...(options.args ?? []), "mcp", "--http", "--port", String(port)], { cwd: fixturesDir, env: { ...process.env, INDEX_PATH: daemonDbPath, QMD_CONFIG_DIR: daemonConfigDir, + ...options.env, }, stdio: ["ignore", "pipe", "pipe"], }); @@ -1481,11 +1485,62 @@ describe("mcp http daemon", () => { const body = await res.json(); expect(body.status).toBe("ok"); } finally { + const closed = new Promise(r => proc.once("close", r)); proc.kill("SIGTERM"); - await new Promise(r => proc.on("close", r)); + await closed; } }); + test("foreground HTTP server honors --index when selecting the store", async () => { + const customIndex = "mcp-alt-index"; + const customCacheDir = join(daemonTestDir, `cache-index-${Date.now()}-${Math.random().toString(16).slice(2)}`); + const customConfigDir = join(daemonTestDir, `config-index-${Date.now()}-${Math.random().toString(16).slice(2)}`); + await mkdir(customCacheDir, { recursive: true }); + await mkdir(customConfigDir, { recursive: true }); + + const addResult = await runQmd( + ["--index", customIndex, "collection", "add", fixturesDir, "--name", "mcp-fixtures"], + { + dbPath: daemonDbPath, + configDir: customConfigDir, + env: { + INDEX_PATH: "", + XDG_CACHE_HOME: customCacheDir, + }, + }, + ); + expect(addResult.exitCode).toBe(0); + + const port = randomPort(); + const proc = spawnHttpServer(port, { + args: ["--index", customIndex], + env: { + INDEX_PATH: "", + XDG_CACHE_HOME: customCacheDir, + QMD_CONFIG_DIR: customConfigDir, + }, + }); + + try { + const ready = await waitForServer(port); + expect(ready).toBe(true); + + const res = await fetch(`http://localhost:${port}/query`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ searches: [{ type: "lex", query: "authentication" }], limit: 5 }), + }); + expect(res.status).toBe(200); + const body = await res.json(); + const files = body.results.map((r: { file: string }) => r.file); + expect(files.some((file: string) => file.includes("mcp-fixtures/notes/meeting.md"))).toBe(true); + } finally { + const closed = new Promise(r => proc.once("close", r)); + proc.kill("SIGTERM"); + await closed; + } + }, 10000); + // ------------------------------------------------------------------------- // Daemon lifecycle // ------------------------------------------------------------------------- From ddc969a5f48b78c8d451ef1127e1815025030b0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 18:08:56 +0000 Subject: [PATCH 12/17] fix embed model and qmd home resolution --- CHANGELOG.md | 2 + src/cli/qmd.ts | 8 ++- src/collections.ts | 4 +- src/paths.ts | 5 ++ src/store.ts | 23 ++++--- test/cli.test.ts | 27 +++++++- test/collections-config.test.ts | 15 +++- test/store.test.ts | 118 +++++++++++++++++++++++++++++++- 8 files changed, 184 insertions(+), 18 deletions(-) create mode 100644 src/paths.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cedf35..b2757c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ - CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index ` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634 - Windows CUDA: serialize llama.cpp embedding/reranking contexts by default to avoid intermittent `ggml-cuda.cu:98` crashes in `qmd query`; set `QMD_EMBED_PARALLELISM` to opt back into parallel contexts if your driver is stable. #519 - MCP: make `qmd mcp --index ` use the selected index for both foreground and daemon HTTP servers instead of falling back to the default store. #343 +- Embedding: respect `QMD_EMBED_MODEL` consistently for vector indexing and vector-backed search, with default-model fallback when unset. +- Config: use one home-directory resolver for YAML config and the default SQLite cache path, avoiding Windows CLI/MCP split-brain when `HOME` is unset. - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529 - Fix: preserve original filename case in `handelize()`. The previous `.toLowerCase()` call made indexed paths unreachable on case-sensitive diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts index bbef459..f576cde 100755 --- a/src/cli/qmd.ts +++ b/src/cli/qmd.ts @@ -1679,8 +1679,12 @@ function parseChunkStrategy(value: unknown): ChunkStrategy | undefined { throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`); } +export function resolveEmbedModelForCli(): string { + return process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL_URI; +} + async function vectorIndex( - model: string = DEFAULT_EMBED_MODEL_URI, + model: string = resolveEmbedModelForCli(), force: boolean = false, batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy; collection?: string }, ): Promise { @@ -3125,7 +3129,7 @@ if (isMain) { // embed operates on a single collection; only the first value is used. const embedValidatedCollections = resolveCollectionFilter(cli.opts.collection, false); const embedCollection = embedValidatedCollections[0]; - await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, { + await vectorIndex(resolveEmbedModelForCli(), !!cli.values.force, { maxDocsPerBatch, maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024, chunkStrategy: embedChunkStrategy, diff --git a/src/collections.ts b/src/collections.ts index a295de7..70185c6 100644 --- a/src/collections.ts +++ b/src/collections.ts @@ -7,7 +7,7 @@ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs"; import { join, dirname, resolve } from "path"; -import { homedir } from "os"; +import { qmdHomedir } from "./paths.js"; import YAML from "yaml"; // ============================================================================ @@ -118,7 +118,7 @@ function getConfigDir(): string { if (process.env.XDG_CONFIG_HOME) { return join(process.env.XDG_CONFIG_HOME, "qmd"); } - return join(homedir(), ".config", "qmd"); + return join(qmdHomedir(), ".config", "qmd"); } function getConfigFilePath(): string { diff --git a/src/paths.ts b/src/paths.ts new file mode 100644 index 0000000..07c51d3 --- /dev/null +++ b/src/paths.ts @@ -0,0 +1,5 @@ +import { homedir as osHomedir } from "node:os"; + +export function qmdHomedir(): string { + return process.env.HOME || process.env.USERPROFILE || osHomedir() || "/tmp"; +} diff --git a/src/store.ts b/src/store.ts index 6ec8ac1..52dd334 100644 --- a/src/store.ts +++ b/src/store.ts @@ -18,6 +18,7 @@ import { createHash } from "crypto"; import { readFileSync, realpathSync, statSync, mkdirSync } from "node:fs"; // Note: node:path resolve is not imported — we export our own cross-platform resolve() import fastGlob from "fast-glob"; +import { qmdHomedir } from "./paths.js"; import { LlamaCpp, getDefaultLlamaCpp, @@ -38,7 +39,6 @@ import type { // Configuration // ============================================================================= -const HOME = process.env.HOME || process.env.USERPROFILE || "/tmp"; export const DEFAULT_EMBED_MODEL = "embeddinggemma"; export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0"; export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B"; @@ -334,7 +334,7 @@ export type ExpandedQuery = { // ============================================================================= export function homedir(): string { - return HOME; + return qmdHomedir(); } /** @@ -1492,7 +1492,8 @@ export async function generateEmbeddings( options?: EmbedOptions ): Promise { const db = store.db; - const model = options?.model ?? DEFAULT_EMBED_MODEL; + const llm = getLlm(store); + const model = options?.model ?? llm.embedModelName ?? DEFAULT_EMBED_MODEL; const now = new Date().toISOString(); const { maxDocsPerBatch, maxBatchBytes } = resolveEmbedOptions(options); const encoder = new TextEncoder(); @@ -1511,8 +1512,7 @@ export async function generateEmbeddings( const startTime = Date.now(); // Use store's LlamaCpp or global singleton, wrapped in a session - const llm = getLlm(store); - const embedModelUri = llm.embedModelName; + const embedModelUri = model; // Create a session manager for this llm instance const result = await withLLMSessionForLlm(llm, async (session) => { @@ -4276,7 +4276,8 @@ export async function hybridQuery( // Batch embed all vector queries in a single call const llm = getLlm(store); - const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, llm.embedModelName)); + const embedModel = llm.embedModelName; + const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, embedModel)); hooks?.onEmbedStart?.(textsToEmbed.length); const embedStart = Date.now(); const embeddings = await llm.embedBatch(textsToEmbed); @@ -4288,7 +4289,7 @@ export async function hybridQuery( if (!embedding) continue; const vecResults = await store.searchVec( - vecQueries[i]!.text, DEFAULT_EMBED_MODEL, 20, collection, + vecQueries[i]!.text, embedModel, 20, collection, undefined, embedding ); if (vecResults.length > 0) { @@ -4519,10 +4520,11 @@ export async function vectorSearchQuery( options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart); // Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs + const embedModel = getLlm(store).embedModelName; const queryTexts = [query, ...vecExpanded.map(q => q.query)]; const allResults = new Map(); for (const q of queryTexts) { - const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit, collection); + const vecResults = await store.searchVec(q, embedModel, limit, collection); for (const r of vecResults) { const existing = allResults.get(r.filepath); if (!existing || r.score > existing.score) { @@ -4660,7 +4662,8 @@ export async function structuredSearch( ); if (vecSearches.length > 0) { const llm = getLlm(store); - const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, llm.embedModelName)); + const embedModel = llm.embedModelName; + const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, embedModel)); hooks?.onEmbedStart?.(textsToEmbed.length); const embedStart = Date.now(); const embeddings = await llm.embedBatch(textsToEmbed); @@ -4672,7 +4675,7 @@ export async function structuredSearch( for (const coll of collectionList) { const vecResults = await store.searchVec( - vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, coll, + vecSearches[i]!.query, embedModel, 20, coll, undefined, embedding ); if (vecResults.length > 0) { diff --git a/test/cli.test.ts b/test/cli.test.ts index 40c14c9..d239347 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -13,7 +13,8 @@ import { join, dirname } from "path"; import { fileURLToPath } from "url"; import { spawn } from "child_process"; import { setTimeout as sleep } from "timers/promises"; -import { buildEditorUri, termLink } from "../src/cli/qmd.ts"; +import { buildEditorUri, termLink, resolveEmbedModelForCli } from "../src/cli/qmd.ts"; +import { DEFAULT_EMBED_MODEL_URI } from "../src/llm.ts"; // Test fixtures directory and database path let testDir: string; @@ -243,6 +244,30 @@ describe("CLI Help", () => { }); describe("CLI Embed", () => { + test("prefers QMD_EMBED_MODEL for qmd embed", () => { + const prev = process.env.QMD_EMBED_MODEL; + process.env.QMD_EMBED_MODEL = "hf:env/embed-model.gguf"; + + try { + expect(resolveEmbedModelForCli()).toBe("hf:env/embed-model.gguf"); + } finally { + if (prev === undefined) delete process.env.QMD_EMBED_MODEL; + else process.env.QMD_EMBED_MODEL = prev; + } + }); + + test("falls back to the default embed model when QMD_EMBED_MODEL is unset", () => { + const prev = process.env.QMD_EMBED_MODEL; + delete process.env.QMD_EMBED_MODEL; + + try { + expect(resolveEmbedModelForCli()).toBe(DEFAULT_EMBED_MODEL_URI); + } finally { + if (prev === undefined) delete process.env.QMD_EMBED_MODEL; + else process.env.QMD_EMBED_MODEL = prev; + } + }); + test("rejects invalid --max-docs-per-batch", async () => { const { stderr, exitCode } = await runQmd(["embed", "--max-docs-per-batch", "0"]); expect(exitCode).toBe(1); diff --git a/test/collections-config.test.ts b/test/collections-config.test.ts index b6b15fe..3dd926b 100644 --- a/test/collections-config.test.ts +++ b/test/collections-config.test.ts @@ -7,7 +7,7 @@ import { describe, test, expect, beforeEach, afterEach } from "vitest"; import { join } from "path"; -import { homedir } from "os"; +import { qmdHomedir } from "../src/paths.js"; import { getConfigPath, setConfigIndexName } from "../src/collections.js"; // Save/restore env vars around each test @@ -15,6 +15,8 @@ let savedEnv: Record; beforeEach(() => { savedEnv = { + HOME: process.env.HOME, + USERPROFILE: process.env.USERPROFILE, QMD_CONFIG_DIR: process.env.QMD_CONFIG_DIR, XDG_CONFIG_HOME: process.env.XDG_CONFIG_HOME, }; @@ -38,7 +40,16 @@ describe("getConfigDir via getConfigPath", () => { test("defaults to ~/.config/qmd when no env vars are set", () => { delete process.env.QMD_CONFIG_DIR; delete process.env.XDG_CONFIG_HOME; - expect(getConfigPath()).toBe(join(homedir(), ".config", "qmd", "index.yml")); + expect(getConfigPath()).toBe(join(qmdHomedir(), ".config", "qmd", "index.yml")); + }); + + test("uses the same USERPROFILE fallback as default DB path when HOME is unset", () => { + delete process.env.HOME; + delete process.env.QMD_CONFIG_DIR; + delete process.env.XDG_CONFIG_HOME; + process.env.USERPROFILE = "/Users/windows-user"; + + expect(getConfigPath()).toBe(join("/Users/windows-user", ".config", "qmd", "index.yml")); }); test("QMD_CONFIG_DIR takes highest priority", () => { diff --git a/test/store.test.ts b/test/store.test.ts index 9f82624..8bfaae9 100644 --- a/test/store.test.ts +++ b/test/store.test.ts @@ -53,6 +53,10 @@ import { insertDocument, generateEmbeddings, getHybridRrfWeights, + _resetProductionModeForTesting, + hybridQuery, + structuredSearch, + vectorSearchQuery, type Store, type DocumentResult, type SearchResult, @@ -282,7 +286,9 @@ afterAll(async () => { describe("Store Creation", () => { test("createStore throws without explicit path in test mode", () => { - // In test mode, createStore without path should throw to prevent accidental writes + // In test mode, createStore without path should throw to prevent accidental writes. + // Other tests may enable production mode in the same Bun process, so reset first. + _resetProductionModeForTesting(); const originalIndexPath = process.env.INDEX_PATH; delete process.env.INDEX_PATH; @@ -3021,6 +3027,116 @@ describe("Embedding batching", () => { } }); + test("generateEmbeddings uses the active llm embed model when no explicit model is passed", async () => { + const store = await createTestStore(); + const db = store.db; + const fakeLlm = createFakeEmbedLlm(); + const model = "hf:env/embed-model.gguf"; + + setDefaultLlamaCpp(createFakeTokenizer() as any); + store.llm = { ...fakeLlm, embedModelName: model } as any; + + try { + await insertTestDocument(db, "docs", { name: "one", body: "# One\n\nAlpha" }); + + const result = await generateEmbeddings(store); + + expect(result.chunksEmbedded).toBe(1); + expect(fakeLlm.embedCalls[0]?.options?.model).toBe(model); + expect(fakeLlm.embedBatchModelCalls).toEqual([{ model }]); + expect(db.prepare(`SELECT DISTINCT model FROM content_vectors`).all()).toEqual([{ model }]); + } finally { + setDefaultLlamaCpp(null); + await cleanupTestDb(store); + } + }); + + test("vectorSearchQuery uses the active llm embed model for vector lookups", async () => { + const store = await createTestStore(); + const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf"; + const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any; + + store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`); + store.llm = { embedModelName: model } as any; + store.searchVec = searchVecSpy as any; + store.expandQuery = vi.fn(async () => []) as any; + + try { + await vectorSearchQuery(store, "custom query", { limit: 7, minScore: 0 }); + + expect(searchVecSpy).toHaveBeenCalledTimes(1); + expect(searchVecSpy.mock.calls[0]?.[0]).toBe("custom query"); + expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model); + expect(searchVecSpy.mock.calls[0]?.[2]).toBe(7); + } finally { + await cleanupTestDb(store); + } + }); + + test("hybridQuery uses the active llm embed model for precomputed vector lookups", async () => { + const store = await createTestStore(); + const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf"; + const embedBatchSpy = vi.fn(async (texts: string[]) => texts.map(() => ({ + embedding: [1, 2, 3], + model, + }))); + const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any; + + store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`); + store.llm = { + embedModelName: model, + embedBatch: embedBatchSpy, + } as any; + store.searchVec = searchVecSpy as any; + store.searchFTS = vi.fn(() => []) as any; + store.expandQuery = vi.fn(async () => []) as any; + + try { + await hybridQuery(store, "hybrid query", { limit: 5, minScore: 0, skipRerank: true }); + + expect(embedBatchSpy).toHaveBeenCalledTimes(1); + expect(searchVecSpy).toHaveBeenCalledTimes(1); + expect(searchVecSpy.mock.calls[0]?.[0]).toBe("hybrid query"); + expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model); + expect(searchVecSpy.mock.calls[0]?.[5]).toEqual([1, 2, 3]); + } finally { + await cleanupTestDb(store); + } + }); + + test("structuredSearch uses the active llm embed model for precomputed vector lookups", async () => { + const store = await createTestStore(); + const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf"; + const embedBatchSpy = vi.fn(async (texts: string[]) => texts.map(() => ({ + embedding: [1, 2, 3], + model, + }))); + const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any; + + store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`); + store.llm = { + embedModelName: model, + embedBatch: embedBatchSpy, + } as any; + store.searchVec = searchVecSpy as any; + + try { + await structuredSearch(store, [{ type: "vec", query: "structured query" }], { + limit: 5, + minScore: 0, + skipRerank: true, + }); + + expect(embedBatchSpy).toHaveBeenCalledTimes(1); + expect(searchVecSpy).toHaveBeenCalledTimes(1); + expect(searchVecSpy.mock.calls[0]?.[0]).toBe("structured query"); + expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model); + expect(searchVecSpy.mock.calls[0]?.[5]).toEqual([1, 2, 3]); + } finally { + await cleanupTestDb(store); + } + }); + test("generateEmbeddings rejects invalid batch limits", async () => { const store = await createTestStore(); From e627ca7de66136f4c4ab46ef35fb9a5e1c5f6aba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 18:20:26 +0000 Subject: [PATCH 13/17] test: allow slow CPU rerank fixture --- test/llm.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/llm.test.ts b/test/llm.test.ts index 3678bad..ff22c0c 100644 --- a/test/llm.test.ts +++ b/test/llm.test.ts @@ -694,7 +694,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { for (const doc of result.results) { console.log(` ${doc.file}: ${doc.score.toFixed(4)}`); } - }); + }, 30000); }); describe("expandQuery", () => { From 4505d8132e441f90475e0f6ef926a3e44334a5a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 18:31:26 +0000 Subject: [PATCH 14/17] ci(nix): update node module hashes Refresh fixed-output hashes after moving AST grammar packages into runtime dependencies so Nix CI builds the current locked dependency graph. --- flake.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flake.nix b/flake.nix index 3645013..7fd8014 100644 --- a/flake.nix +++ b/flake.nix @@ -44,8 +44,8 @@ }); nodeModulesHashes = { - x86_64-linux = "sha256-D0ezO4vqq4iswcAMU2DCql9ZAQvh3me6N9aDB5roq4w="; - aarch64-darwin = "sha256-qU+9KdR/nTocelyANS09I/4yaQ+7s1LvJNqB27IOK/c="; + x86_64-linux = "sha256-zee2c7LS+JxpZOpdWG2qyUKlS7EJq2PL/wSo+AewJ9g="; + aarch64-darwin = "sha256-qL80cpCrl3BbEWqmYStRuTDJlIIAFW1Y71YbJOeu/f0="; # Populate these on first build for additional hosts if/when needed. aarch64-linux = pkgs.lib.fakeHash; From b32ee4e66099a63b324b652e0aa8a8d1054dfb71 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 18:45:56 +0000 Subject: [PATCH 15/17] test: make CI fixture invocations portable --- test/cli.test.ts | 1 + test/esm-ambiguous-module.test.ts | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/cli.test.ts b/test/cli.test.ts index d239347..070113b 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -1438,6 +1438,7 @@ describe("mcp http daemon", () => { ...process.env, INDEX_PATH: daemonDbPath, QMD_CONFIG_DIR: daemonConfigDir, + PWD: fixturesDir, ...options.env, }, stdio: ["ignore", "pipe", "pipe"], diff --git a/test/esm-ambiguous-module.test.ts b/test/esm-ambiguous-module.test.ts index 80e61b7..d4602af 100644 --- a/test/esm-ambiguous-module.test.ts +++ b/test/esm-ambiguous-module.test.ts @@ -9,7 +9,7 @@ const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), ".."); describe("Node ESM entrypoints", () => { test("CLI --index path normalizes via setIndexName/setConfigIndexName under Node 22+", () => { - execFileSync("bun", ["run", "build"], { + execFileSync("npm", ["run", "build"], { cwd: repoRoot, encoding: "utf-8", stdio: "pipe", From 669e234d1e4c11fdfde9f6774d7683c6b1fc196b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 18:56:06 +0000 Subject: [PATCH 16/17] test: index MCP HTTP fixture before query --- test/cli.test.ts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/cli.test.ts b/test/cli.test.ts index 070113b..b5758c5 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -1537,6 +1537,19 @@ describe("mcp http daemon", () => { ); expect(addResult.exitCode).toBe(0); + const updateResult = await runQmd( + ["--index", customIndex, "update"], + { + dbPath: daemonDbPath, + configDir: customConfigDir, + env: { + INDEX_PATH: "", + XDG_CACHE_HOME: customCacheDir, + }, + }, + ); + expect(updateResult.exitCode).toBe(0); + const port = randomPort(); const proc = spawnHttpServer(port, { args: ["--index", customIndex], From e36ab96567553df07ff1ff42cc933b381f166a35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 9 May 2026 19:03:17 +0000 Subject: [PATCH 17/17] fix: allow HTTP query rerank control --- src/mcp/server.ts | 1 + test/cli.test.ts | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mcp/server.ts b/src/mcp/server.ts index a3016e2..2f5482f 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -693,6 +693,7 @@ export async function startMcpHttpServer( limit: params.limit ?? 10, minScore: params.minScore ?? 0, intent: params.intent, + rerank: params.rerank, }); // Use first lex or vec query for snippet extraction diff --git a/test/cli.test.ts b/test/cli.test.ts index b5758c5..9c575f8 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -1567,7 +1567,7 @@ describe("mcp http daemon", () => { const res = await fetch(`http://localhost:${port}/query`, { method: "POST", headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ searches: [{ type: "lex", query: "authentication" }], limit: 5 }), + body: JSON.stringify({ searches: [{ type: "lex", query: "authentication" }], limit: 5, rerank: false }), }); expect(res.status).toBe(200); const body = await res.json();