From 3d991b2a476992f89639f72e37c926371239a8c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 18:12:37 +0000
Subject: [PATCH 01/17] fix(cli): keep status from importing llama

---
 CHANGELOG.md                     |  3 +++
 src/llm.ts                       | 35 ++++++++++++++++++++++++--------
 test/cli-lazy-llm-import.test.ts | 20 ++++++++++++++++++
 3 files changed, 49 insertions(+), 9 deletions(-)
 create mode 100644 test/cli-lazy-llm-import.test.ts
diff --git a/CHANGELOG.md b/CHANGELOG.md
index fedaa0f..fbfcde6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,9 @@
 - CLI: make `qmd status` skip native `node-llama-cpp` device probing by
   default so status stays safe on machines with broken or unsupported GPU
   drivers. Set `QMD_STATUS_DEVICE_PROBE=1` to opt in.
+- CLI: lazy-load `node-llama-cpp` so lightweight commands such as
+  `qmd status` do not import native ML dependencies or trigger llama.cpp
+  builds on ARM/no-GPU machines. #491
 
 ## [2.1.0] - 2026-04-05
 
diff --git a/src/llm.ts b/src/llm.ts
index 7cccc3f..7d2bbe0 100644
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -4,16 +4,28 @@
  * Provides embeddings, text generation, and reranking using local GGUF models.
  */
 
-import {
-  getLlama,
-  resolveModelFile,
-  LlamaChatSession,
-  LlamaLogLevel,
-  type Llama,
-  type LlamaModel,
-  type LlamaEmbeddingContext,
-  type Token as LlamaToken,
+import type {
+  Llama,
+  LlamaModel,
+  LlamaEmbeddingContext,
+  Token as LlamaToken,
 } from "node-llama-cpp";
+
+type NodeLlamaCppModule = {
+  getLlama: (options: Record<string, unknown>) => Promise<Llama>;
+  resolveModelFile: (model: string, cacheDir: string) => Promise<string>;
+  LlamaChatSession: new (options: { contextSequence: unknown }) => {
+    prompt: (prompt: string, options?: Record<string, unknown>) => Promise<string>;
+  };
+  LlamaLogLevel: { error: unknown };
+};
+
+let nodeLlamaCppImport: Promise<NodeLlamaCppModule> | null = null;
+async function loadNodeLlamaCpp(): Promise<NodeLlamaCppModule> {
+  nodeLlamaCppImport ??= import("node-llama-cpp") as Promise<NodeLlamaCppModule>;
+  return nodeLlamaCppImport;
+}
+
 import { homedir } from "os";
 import { join } from "path";
 import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync, writeFileSync, openSync, readSync, closeSync } from "fs";
@@ -344,6 +356,7 @@ export async function pullModels(
       }
     }
 
+    const { resolveModelFile } = await loadNodeLlamaCpp();
     const path = await resolveModelFile(model, cacheDir);
     validateGgufFile(path, model);
     const sizeBytes = existsSync(path) ? statSync(path).size : 0;
@@ -619,6 +632,7 @@ export class LlamaCpp implements LLM {
     if (!this.llama) {
       const gpuMode = resolveLlamaGpuMode();
 
+      const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
       const loadLlama = async (gpu: LlamaGpuMode) =>
         await getLlama({
           build: allowBuild ? "autoAttempt" : "never",
@@ -661,6 +675,7 @@ export class LlamaCpp implements LLM {
   private async resolveModel(modelUri: string): Promise<string> {
     this.ensureModelCacheDir();
     // resolveModelFile handles HF URIs and downloads to the cache dir
+    const { resolveModelFile } = await loadNodeLlamaCpp();
     const modelPath = await resolveModelFile(modelUri, this.modelCacheDir);
     validateGgufFile(modelPath, modelUri);
     return modelPath;
@@ -1079,6 +1094,7 @@ export class LlamaCpp implements LLM {
     // Create fresh context -> sequence -> session for each call
     const context = await this.generateModel!.createContext();
     const sequence = context.getSequence();
+    const { LlamaChatSession } = await loadNodeLlamaCpp();
     const session = new LlamaChatSession({ contextSequence: sequence });
 
     const maxTokens = options.maxTokens ?? 150;
@@ -1158,6 +1174,7 @@ export class LlamaCpp implements LLM {
       contextSize: this.expandContextSize,
     });
     const sequence = genContext.getSequence();
+    const { LlamaChatSession } = await loadNodeLlamaCpp();
     const session = new LlamaChatSession({ contextSequence: sequence });
 
     try {
diff --git a/test/cli-lazy-llm-import.test.ts b/test/cli-lazy-llm-import.test.ts
new file mode 100644
index 0000000..5df3a09
--- /dev/null
+++ b/test/cli-lazy-llm-import.test.ts
@@ -0,0 +1,20 @@
+import { describe, expect, test } from "vitest";
+import { readFileSync } from "fs";
+import { join } from "path";
+
+describe("LLM module loading", () => {
+  test("node-llama-cpp is only dynamically imported by LLM operations", () => {
+    const source = readFileSync(join(process.cwd(), "src", "llm.ts"), "utf-8");
+
+    expect(source).not.toMatch(/import\s+(?!type\b)[\s\S]*?from\s+["']node-llama-cpp["']/);
+    expect(source).toContain('import("node-llama-cpp")');
+  });
+
+  test("importing the CLI for lightweight commands succeeds", async () => {
+    const mod = await import("../src/cli/qmd.ts");
+    expect(mod).toMatchObject({
+      buildEditorUri: expect.any(Function),
+      termLink: expect.any(Function),
+    });
+  });
+});

From d045a8bab6f1e484a3d0b37ec8d88d9442085fb2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 18:12:37 +0000
Subject: [PATCH 02/17] fix(search): support CJK FTS queries

---
 src/store.ts       | 131 ++++++++++++++++++++++++++++++++++++++++-----
 test/store.test.ts |  77 ++++++++++++++++++++++----
 2 files changed, 184 insertions(+), 24 deletions(-)

diff --git a/src/store.ts b/src/store.ts
index 1f296f7..d6d5cd7 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -733,6 +733,73 @@ export function verifySqliteVecLoaded(db: Database): void {
 
 let _sqliteVecAvailable: boolean | null = null;
 
+const CJK_CHAR_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
+const CJK_RUN_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]+/gu;
+const FTS_CJK_NORMALIZED_VERSION = "1";
+
+/**
+ * FTS5's unicode61 tokenizer does not segment CJK text into searchable words.
+ * Normalize CJK runs by spacing every character so exact CJK queries can be
+ * translated into phrase queries while Latin text keeps the default tokenizer.
+ */
+export function normalizeCjkForFTS(text: string): string {
+  return text.replace(CJK_RUN_PATTERN, run => ` ${Array.from(run).join(' ')} `);
+}
+
+function containsCjk(text: string): boolean {
+  return CJK_CHAR_PATTERN.test(text);
+}
+
+function sanitizeFTS5Phrase(phrase: string): string {
+  return normalizeCjkForFTS(phrase)
+    .split(/\s+/)
+    .map(t => sanitizeFTS5Term(t))
+    .filter(t => t)
+    .join(' ');
+}
+
+function rebuildFTSForCjkNormalization(db: Database): void {
+  const version = db.prepare(`SELECT value FROM store_config WHERE key = 'fts_cjk_normalized_version'`).get() as { value?: string } | undefined;
+  if (version?.value === FTS_CJK_NORMALIZED_VERSION) return;
+
+  try {
+    db.exec(`DELETE FROM documents_fts WHERE rowid >= 0`);
+  } catch {
+    // Some older/corrupt FTS5 shadow-table states can reject bulk deletes even
+    // though reads still work. Recreate the virtual table; documents_fts is a
+    // derived index, so rebuilding it from documents/content is safe.
+    db.exec(`DROP TABLE IF EXISTS documents_fts`);
+    db.exec(`
+      CREATE VIRTUAL TABLE documents_fts USING fts5(
+        filepath, title, body,
+        tokenize='porter unicode61'
+      )
+    `);
+  }
+  const rows = db.prepare(`
+    SELECT d.id, d.collection, d.path, d.title, content.doc as body
+    FROM documents d
+    JOIN content ON content.hash = d.hash
+    WHERE d.active = 1
+  `).all() as { id: number; collection: string; path: string; title: string; body: string }[];
+  const insert = db.prepare(`INSERT INTO documents_fts(rowid, filepath, title, body) VALUES (?, ?, ?, ?)`);
+  const rebuild = db.transaction(() => {
+    for (const row of rows) {
+      insert.run(
+        row.id,
+        normalizeCjkForFTS(`${row.collection}/${row.path}`),
+        normalizeCjkForFTS(row.title),
+        normalizeCjkForFTS(row.body)
+      );
+    }
+  });
+  rebuild();
+  db.prepare(`
+    INSERT OR REPLACE INTO store_config(key, value)
+    VALUES ('fts_cjk_normalized_version', ?)
+  `).run(FTS_CJK_NORMALIZED_VERSION);
+}
+
 function initializeDatabase(db: Database): void {
   try {
     loadSqliteVec(db);
@@ -838,9 +905,12 @@ function initializeDatabase(db: Database): void {
     )
   `);
 
-  // Triggers to keep FTS in sync
+  // Triggers keep FTS in sync for callers that write directly to documents.
+  // Production indexing paths rebuild entries in TypeScript so CJK text can be
+  // normalized before it reaches the unicode61 tokenizer.
+  db.exec(`DROP TRIGGER IF EXISTS documents_ai`);
   db.exec(`
-    CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents
+    CREATE TRIGGER documents_ai AFTER INSERT ON documents
     WHEN new.active = 1
     BEGIN
       INSERT INTO documents_fts(rowid, filepath, title, body)
@@ -853,14 +923,16 @@ function initializeDatabase(db: Database): void {
     END
   `);
 
+  db.exec(`DROP TRIGGER IF EXISTS documents_ad`);
   db.exec(`
-    CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
+    CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN
       DELETE FROM documents_fts WHERE rowid = old.id;
     END
   `);
 
+  db.exec(`DROP TRIGGER IF EXISTS documents_au`);
   db.exec(`
-    CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents
+    CREATE TRIGGER documents_au AFTER UPDATE ON documents
     BEGIN
       -- Delete from FTS if no longer active
       DELETE FROM documents_fts WHERE rowid = old.id AND new.active = 0;
@@ -875,6 +947,8 @@ function initializeDatabase(db: Database): void {
       WHERE new.active = 1;
     END
   `);
+
+  rebuildFTSForCjkNormalization(db);
 }
 
 // =============================================================================
@@ -2077,6 +2151,28 @@ export function insertContent(db: Database, hash: string, content: string, creat
     .run(hash, content, createdAt);
 }
 
+function rebuildDocumentFTS(db: Database, documentId: number): void {
+  const row = db.prepare(`
+    SELECT d.id, d.collection, d.path, d.title, content.doc as body
+    FROM documents d
+    JOIN content ON content.hash = d.hash
+    WHERE d.id = ? AND d.active = 1
+  `).get(documentId) as { id: number; collection: string; path: string; title: string; body: string } | undefined;
+
+  db.prepare(`DELETE FROM documents_fts WHERE rowid = ?`).run(documentId);
+  if (!row) return;
+
+  db.prepare(`
+    INSERT INTO documents_fts(rowid, filepath, title, body)
+    VALUES (?, ?, ?, ?)
+  `).run(
+    row.id,
+    normalizeCjkForFTS(`${row.collection}/${row.path}`),
+    normalizeCjkForFTS(row.title),
+    normalizeCjkForFTS(row.body)
+  );
+}
+
 /**
  * Insert a new document into the documents table.
  */
@@ -2098,6 +2194,9 @@ export function insertDocument(
       modified_at = excluded.modified_at,
       active = 1
   `).run(collectionName, path, title, hash, createdAt, modifiedAt);
+
+  const row = db.prepare(`SELECT id FROM documents WHERE collection = ? AND path = ?`).get(collectionName, path) as { id: number } | undefined;
+  if (row) rebuildDocumentFTS(db, row.id);
 }
 
 /**
@@ -2148,15 +2247,7 @@ export function findOrMigrateLegacyDocument(
 
     if (result.changes === 0) return false;
 
-    // FTS5 does not reliably update via the documents_au trigger's
-    // INSERT OR REPLACE. Manually rebuild the FTS entry.
-    db.prepare(`DELETE FROM documents_fts WHERE rowid = ?`).run(legacy.id);
-    db.prepare(`
-      INSERT INTO documents_fts(rowid, filepath, title, body)
-      SELECT id, collection || '/' || path, title,
-             (SELECT doc FROM content WHERE hash = documents.hash)
-      FROM documents WHERE id = ?
-    `).run(legacy.id);
+    rebuildDocumentFTS(db, legacy.id);
 
     return true;
   });
@@ -2177,6 +2268,7 @@ export function updateDocumentTitle(
 ): void {
   db.prepare(`UPDATE documents SET title = ?, modified_at = ? WHERE id = ?`)
     .run(title, modifiedAt, documentId);
+  rebuildDocumentFTS(db, documentId);
 }
 
 /**
@@ -2192,6 +2284,7 @@ export function updateDocument(
 ): void {
   db.prepare(`UPDATE documents SET title = ?, hash = ?, modified_at = ? WHERE id = ?`)
     .run(title, hash, modifiedAt, documentId);
+  rebuildDocumentFTS(db, documentId);
 }
 
 /**
@@ -2940,7 +3033,7 @@ function buildFTS5Query(query: string): string | null {
       const phrase = s.slice(start, i).trim();
       i++; // skip closing quote
       if (phrase.length > 0) {
-        const sanitized = phrase.split(/\s+/).map(t => sanitizeFTS5Term(t)).filter(t => t).join(' ');
+        const sanitized = sanitizeFTS5Phrase(phrase);
         if (sanitized) {
           const ftsPhrase = `"${sanitized}"`;  // Exact phrase, no prefix match
           if (negated) {
@@ -2968,6 +3061,16 @@ function buildFTS5Query(query: string): string | null {
             positive.push(ftsPhrase);
           }
         }
+      } else if (containsCjk(term)) {
+        const sanitized = sanitizeFTS5Phrase(term);
+        if (sanitized) {
+          const ftsPhrase = `"${sanitized}"`;  // CJK phrase over character tokens
+          if (negated) {
+            negative.push(ftsPhrase);
+          } else {
+            positive.push(ftsPhrase);
+          }
+        }
       } else {
         const sanitized = sanitizeFTS5Term(term);
         if (sanitized) {
diff --git a/test/store.test.ts b/test/store.test.ts
index 848ec96..a172064 100644
--- a/test/store.test.ts
+++ b/test/store.test.ts
@@ -48,6 +48,8 @@ import {
   syncConfigToDb,
   STRONG_SIGNAL_MIN_SCORE,
   STRONG_SIGNAL_MIN_GAP,
+  insertContent,
+  insertDocument,
   generateEmbeddings,
   type Store,
   type DocumentResult,
@@ -156,18 +158,18 @@ async function insertTestDocument(
   const hash = opts.hash || await hashContent(body);
 
   // Insert content (with OR IGNORE for deduplication)
-  db.prepare(`
-    INSERT OR IGNORE INTO content (hash, doc, created_at)
-    VALUES (?, ?, ?)
-  `).run(hash, body, now);
+  insertContent(db, hash, body, now);
 
-  // Insert document
-  const result = db.prepare(`
-    INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active)
-    VALUES (?, ?, ?, ?, ?, ?, ?)
-  `).run(collectionName, path, title, hash, now, now, active);
+  insertDocument(db, collectionName, path, title, hash, now, now);
+  const row = db.prepare(`
+    SELECT id FROM documents WHERE collection = ? AND path = ?
+  `).get(collectionName, path) as { id: number } | undefined;
 
-  return Number(result.lastInsertRowid);
+  if (active === 0 && row) {
+    db.prepare(`UPDATE documents SET active = 0 WHERE id = ?`).run(row.id);
+  }
+
+  return row?.id ?? 0;
 }
 
 /** Sync YAML config file to SQLite store_collections in the current test store */
@@ -1250,6 +1252,61 @@ describe("FTS Search", () => {
     await cleanupTestDb(store);
   });
 
+  test("searchFTS finds CJK documents by exact and mixed queries", async () => {
+    const store = await createTestStore();
+    const collectionName = await createTestCollection();
+
+    await insertTestDocument(store.db, collectionName, {
+      name: "zh",
+      title: "中文检索说明",
+      body: "这里介绍 vector 数据库和关键词检索。",
+      displayPath: "cjk/zh.md",
+    });
+    await insertTestDocument(store.db, collectionName, {
+      name: "ja",
+      title: "日本語検索メモ",
+      body: "この文書は検索品質とトークン化について説明します。",
+      displayPath: "cjk/ja.md",
+    });
+    await insertTestDocument(store.db, collectionName, {
+      name: "ko",
+      title: "한국어 검색 노트",
+      body: "이 문서는 검색 품질과 토큰화 문제를 설명합니다.",
+      displayPath: "cjk/ko.md",
+    });
+
+    expect(store.searchFTS("关键词检索", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/zh.md`);
+    expect(store.searchFTS("検索品質", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/ja.md`);
+    expect(store.searchFTS("검색 품질", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/ko.md`);
+    expect(store.searchFTS("vector 关键词", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/zh.md`);
+
+    await cleanupTestDb(store);
+  });
+
+  test("searchFTS keeps English behavior while indexing CJK text", async () => {
+    const store = await createTestStore();
+    const collectionName = await createTestCollection();
+
+    await insertTestDocument(store.db, collectionName, {
+      name: "english",
+      title: "Vector Search Notes",
+      body: "The quick brown fox explains vector search and BM25 ranking.",
+      displayPath: "english.md",
+    });
+    await insertTestDocument(store.db, collectionName, {
+      name: "zh",
+      title: "中文检索说明",
+      body: "这里介绍向量数据库和关键词检索。",
+      displayPath: "zh.md",
+    });
+
+    const foxResults = store.searchFTS("quick fox", 10);
+    expect(foxResults.map(r => r.displayPath)).toContain(`${collectionName}/english.md`);
+    expect(foxResults.map(r => r.displayPath)).not.toContain(`${collectionName}/zh.md`);
+
+    await cleanupTestDb(store);
+  });
+
   test("searchFTS handles special characters in query", async () => {
     const store = await createTestStore();
     const collectionName = await createTestCollection();

From 5b9f4728495d482d59ae4b4d3152743c00335183 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 18:12:37 +0000
Subject: [PATCH 03/17] fix(embed): honor collection filter

---
 CHANGELOG.md     |  5 +++
 src/cli/qmd.ts   | 14 +++++--
 src/index.ts     |  3 ++
 src/store.ts     | 95 ++++++++++++++++++++++++++++++++++++++++--------
 test/sdk.test.ts | 86 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 185 insertions(+), 18 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fbfcde6..fee012c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,11 @@
 
 ### Fixes
 
+- Embedding: `qmd embed -c <collection>` now scopes pending-doc selection
+  to the requested collection instead of embedding global pending work.
+  Scoped `--force` clears only collection-owned vectors, preserves shared
+  hashes referenced by sibling collections, and drops `vectors_vec` only
+  when the scoped clear empties all vectors.
 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
 - Fix: preserve original filename case in `handelize()`. The previous
   `.toLowerCase()` call made indexed paths unreachable on case-sensitive
diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts
index f42d1be..4ceecbe 100755
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@@ -1684,7 +1684,7 @@ function parseChunkStrategy(value: unknown): ChunkStrategy | undefined {
 async function vectorIndex(
   model: string = DEFAULT_EMBED_MODEL_URI,
   force: boolean = false,
-  batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy },
+  batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy; collection?: string },
 ): Promise<void> {
   const storeInstance = getStore();
   const db = storeInstance.db;
@@ -1694,7 +1694,7 @@ async function vectorIndex(
   }
 
   // Check if there's work to do before starting
-  const hashesToEmbed = getHashesNeedingEmbedding(db);
+  const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection);
   if (hashesToEmbed === 0 && !force) {
     console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
     closeDb();
@@ -1715,6 +1715,7 @@ async function vectorIndex(
   const result = await generateEmbeddings(storeInstance, {
     force,
     model,
+    collection: batchOptions?.collection,
     maxDocsPerBatch: batchOptions?.maxDocsPerBatch,
     maxBatchBytes: batchOptions?.maxBatchBytes,
     chunkStrategy: batchOptions?.chunkStrategy,
@@ -2727,7 +2728,7 @@ function showHelp(): void {
   console.log("Maintenance:");
   console.log("  qmd status                    - View index + collection health");
   console.log("  qmd update [--pull]           - Re-index collections (optionally git pull first)");
-  console.log("  qmd embed [-f]                - Generate/refresh vector embeddings");
+  console.log("  qmd embed [-f] [-c <name>]    - Generate/refresh vector embeddings");
   console.log("    --max-docs-per-batch <n>    - Cap docs loaded into memory per embedding batch");
   console.log("    --max-batch-mb <n>          - Cap UTF-8 MB loaded into memory per embedding batch");
   console.log("  qmd cleanup                   - Clear caches, vacuum DB");
@@ -3120,10 +3121,17 @@ if (isMain) {
         const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
         const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
         const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
+        // Validate -c against configured collections before dispatching, so a
+        // typo errors with "Collection not found: X" instead of silently
+        // reporting success because no pending docs match a nonexistent name.
+        // embed operates on a single collection; only the first value is used.
+        const embedValidatedCollections = resolveCollectionFilter(cli.opts.collection, false);
+        const embedCollection = embedValidatedCollections[0];
         await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
           maxDocsPerBatch,
           maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
           chunkStrategy: embedChunkStrategy,
+          collection: embedCollection,
         });
       } catch (error) {
         console.error(error instanceof Error ? error.message : String(error));
diff --git a/src/index.ts b/src/index.ts
index 6772347..3de13a5 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -290,6 +290,8 @@ export interface QMDStore {
   embed(options?: {
     force?: boolean;
     model?: string;
+    /** Restrict embedding to documents in one collection. */
+    collection?: string;
     maxDocsPerBatch?: number;
     maxBatchBytes?: number;
     chunkStrategy?: ChunkStrategy;
@@ -516,6 +518,7 @@ export async function createStore(options: StoreOptions): Promise<QMDStore> {
       return generateEmbeddings(internal, {
         force: embedOpts?.force,
         model: embedOpts?.model,
+        collection: embedOpts?.collection,
         maxDocsPerBatch: embedOpts?.maxDocsPerBatch,
         maxBatchBytes: embedOpts?.maxBatchBytes,
         chunkStrategy: embedOpts?.chunkStrategy,
diff --git a/src/store.ts b/src/store.ts
index d6d5cd7..71dc887 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -1374,6 +1374,11 @@ export type EmbedResult = {
 export type EmbedOptions = {
   force?: boolean;
   model?: string;
+  /**
+   * Restrict embedding to documents in a single collection.
+   * When omitted, all pending documents across every collection are embedded.
+   */
+  collection?: string;
   maxDocsPerBatch?: number;
   maxBatchBytes?: number;
   chunkStrategy?: ChunkStrategy;
@@ -1415,16 +1420,18 @@ function resolveEmbedOptions(options?: EmbedOptions): Required<Pick<EmbedOptions
   };
 }
 
-function getPendingEmbeddingDocs(db: Database): PendingEmbeddingDoc[] {
-  return db.prepare(`
+function getPendingEmbeddingDocs(db: Database, collection?: string): PendingEmbeddingDoc[] {
+  const collectionFilter = collection ? `AND d.collection = ?` : ``;
+  const stmt = db.prepare(`
     SELECT d.hash, MIN(d.path) as path, length(CAST(c.doc AS BLOB)) as bytes
     FROM documents d
     JOIN content c ON d.hash = c.hash
     LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
-    WHERE d.active = 1 AND v.hash IS NULL
+    WHERE d.active = 1 AND v.hash IS NULL ${collectionFilter}
     GROUP BY d.hash
     ORDER BY MIN(d.path)
-  `).all() as PendingEmbeddingDoc[];
+  `);
+  return (collection ? stmt.all(collection) : stmt.all()) as PendingEmbeddingDoc[];
 }
 
 function buildEmbeddingBatches(
@@ -1491,10 +1498,10 @@ export async function generateEmbeddings(
   const encoder = new TextEncoder();
 
   if (options?.force) {
-    clearAllEmbeddings(db);
+    clearAllEmbeddings(db, options?.collection);
   }
 
-  const docsToEmbed = getPendingEmbeddingDocs(db);
+  const docsToEmbed = getPendingEmbeddingDocs(db, options?.collection);
 
   if (docsToEmbed.length === 0) {
     return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
@@ -1942,13 +1949,15 @@ export type IndexStatus = {
 // Index health
 // =============================================================================
 
-export function getHashesNeedingEmbedding(db: Database): number {
-  const result = db.prepare(`
+export function getHashesNeedingEmbedding(db: Database, collection?: string): number {
+  const collectionFilter = collection ? `AND d.collection = ?` : ``;
+  const stmt = db.prepare(`
     SELECT COUNT(DISTINCT d.hash) as count
     FROM documents d
     LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
-    WHERE d.active = 1 AND v.hash IS NULL
-  `).get() as { count: number };
+    WHERE d.active = 1 AND v.hash IS NULL ${collectionFilter}
+  `);
+  const result = (collection ? stmt.get(collection) : stmt.get()) as { count: number };
   return result.count;
 }
 
@@ -3315,12 +3324,68 @@ export function getHashesForEmbedding(db: Database): { hash: string; body: strin
 }
 
 /**
- * Clear all embeddings from the database (force re-index).
- * Deletes all rows from content_vectors and drops the vectors_vec table.
+ * Clear embeddings for the whole index, or just for one collection.
+ *
+ * When `collection` is omitted the entire content_vectors table is emptied and
+ * the vectors_vec virtual table is dropped (it is recreated with the right
+ * dimensions on the next embed run).
+ *
+ * When `collection` is provided, only vectors whose hash is referenced
+ * exclusively by active documents in that collection are removed. Hashes
+ * shared with active documents in other collections are left in place so
+ * vector search keeps working there (content_vectors is keyed globally by
+ * content hash; identical document bodies across collections share a row).
+ * vectors_vec is preserved so other collections keep working unless the scoped
+ * clear empties content_vectors entirely, in which case it is dropped so the
+ * next embed can recreate the table with the current dimensions.
  */
-export function clearAllEmbeddings(db: Database): void {
-  db.exec(`DELETE FROM content_vectors`);
-  db.exec(`DROP TABLE IF EXISTS vectors_vec`);
+export function clearAllEmbeddings(db: Database, collection?: string): void {
+  if (!collection) {
+    db.exec(`DELETE FROM content_vectors`);
+    db.exec(`DROP TABLE IF EXISTS vectors_vec`);
+    return;
+  }
+
+  const exclusiveHashesQuery = `
+    SELECT DISTINCT d.hash
+    FROM documents d
+    WHERE d.collection = ? AND d.active = 1
+      AND NOT EXISTS (
+        SELECT 1 FROM documents d2
+        WHERE d2.hash = d.hash
+          AND d2.active = 1
+          AND d2.collection != d.collection
+      )
+  `;
+
+  const vecTableExists = db
+    .prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='vectors_vec'`)
+    .get();
+
+  if (vecTableExists) {
+    const hashSeqRows = db.prepare(`
+      SELECT cv.hash, cv.seq
+      FROM content_vectors cv
+      WHERE cv.hash IN (${exclusiveHashesQuery})
+    `).all(collection) as { hash: string; seq: number }[];
+
+    const delVec = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
+    for (const row of hashSeqRows) {
+      delVec.run(`${row.hash}_${row.seq}`);
+    }
+  }
+
+  db.prepare(`
+    DELETE FROM content_vectors
+    WHERE hash IN (${exclusiveHashesQuery})
+  `).run(collection);
+
+  const remaining = db
+    .prepare(`SELECT COUNT(*) AS n FROM content_vectors`)
+    .get() as { n: number };
+  if (remaining.n === 0) {
+    db.exec(`DROP TABLE IF EXISTS vectors_vec`);
+  }
 }
 
 /**
diff --git a/test/sdk.test.ts b/test/sdk.test.ts
index 689da27..b60e9e7 100644
--- a/test/sdk.test.ts
+++ b/test/sdk.test.ts
@@ -982,6 +982,92 @@ describe("embed", () => {
     }
   });
 
+  test("store.embed scopes pending documents to the requested collection", async () => {
+    const store = await createStore({
+      dbPath: freshDbPath(),
+      config: {
+        collections: {
+          docs: { path: docsDir, pattern: "**/*.md" },
+          notes: { path: notesDir, pattern: "**/*.md" },
+        },
+      },
+    });
+
+    const fakeLlm = createFakeEmbedLlm();
+    setDefaultLlamaCpp(createFakeTokenizer() as any);
+    store.internal.llm = fakeLlm as any;
+
+    try {
+      await store.update();
+      const result = await store.embed({ collection: "docs" });
+
+      const vectorCounts = store.internal.db.prepare(`
+        SELECT d.collection, COUNT(DISTINCT v.hash) AS count
+        FROM documents d
+        LEFT JOIN content_vectors v ON v.hash = d.hash AND v.seq = 0
+        WHERE d.active = 1
+        GROUP BY d.collection
+        ORDER BY d.collection
+      `).all() as Array<{ collection: string; count: number }>;
+
+      expect(result.docsProcessed).toBe(3);
+      expect(result.chunksEmbedded).toBe(3);
+      expect(vectorCounts).toEqual([
+        { collection: "docs", count: 3 },
+        { collection: "notes", count: 0 },
+      ]);
+    } finally {
+      setDefaultLlamaCpp(null);
+      await store.close();
+    }
+  });
+
+  test("store.embed with force only clears the requested collection", async () => {
+    const store = await createStore({
+      dbPath: freshDbPath(),
+      config: {
+        collections: {
+          docs: { path: docsDir, pattern: "**/*.md" },
+          notes: { path: notesDir, pattern: "**/*.md" },
+        },
+      },
+    });
+
+    const fakeLlm = createFakeEmbedLlm();
+    setDefaultLlamaCpp(createFakeTokenizer() as any);
+    store.internal.llm = fakeLlm as any;
+
+    const vectorCounts = () => store.internal.db.prepare(`
+      SELECT d.collection, COUNT(DISTINCT v.hash) AS count
+      FROM documents d
+      LEFT JOIN content_vectors v ON v.hash = d.hash AND v.seq = 0
+      WHERE d.active = 1
+      GROUP BY d.collection
+      ORDER BY d.collection
+    `).all() as Array<{ collection: string; count: number }>;
+
+    try {
+      await store.update();
+      await store.embed();
+      expect(vectorCounts()).toEqual([
+        { collection: "docs", count: 3 },
+        { collection: "notes", count: 3 },
+      ]);
+
+      const result = await store.embed({ force: true, collection: "docs" });
+
+      expect(result.docsProcessed).toBe(3);
+      expect(result.chunksEmbedded).toBe(3);
+      expect(vectorCounts()).toEqual([
+        { collection: "docs", count: 3 },
+        { collection: "notes", count: 3 },
+      ]);
+    } finally {
+      setDefaultLlamaCpp(null);
+      await store.close();
+    }
+  });
+
   test("store.embed rejects invalid batch limits", async () => {
     const store = await createStore({
       dbPath: freshDbPath(),

From 92aaded36e4aaf76f753dceb9e12dccae6b4704a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 17:53:44 +0000
Subject: [PATCH 04/17] fix(store): preserve inactive docs during orphan
 cleanup

---
 CHANGELOG.md       |  3 ++
 src/store.ts       |  6 ++--
 test/store.test.ts | 71 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fee012c..4931d92 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,9 @@
 - CLI: lazy-load `node-llama-cpp` so lightweight commands such as
   `qmd status` do not import native ML dependencies or trigger llama.cpp
   builds on ARM/no-GPU machines. #491
+- Store: keep content rows referenced by inactive documents during orphan
+  cleanup so `qmd update` preserves soft-deleted tombstones for removed
+  files. #585
 
 ## [2.1.0] - 2026-04-05
 
diff --git a/src/store.ts b/src/store.ts
index 71dc887..f5dd47a 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -2032,13 +2032,15 @@ export function deleteInactiveDocuments(db: Database): number {
 }
 
 /**
- * Remove orphaned content hashes that are not referenced by any active document.
+ * Remove orphaned content hashes that are not referenced by any document.
+ * Inactive documents are soft-deleted tombstones, so their content rows must
+ * remain referenced until deleteInactiveDocuments() hard-deletes them.
  * Returns the number of orphaned content hashes deleted.
  */
 export function cleanupOrphanedContent(db: Database): number {
   const result = db.prepare(`
     DELETE FROM content
-    WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
+    WHERE hash NOT IN (SELECT DISTINCT hash FROM documents)
   `).run();
   return result.changes;
 }
diff --git a/test/store.test.ts b/test/store.test.ts
index a172064..2ed0b06 100644
--- a/test/store.test.ts
+++ b/test/store.test.ts
@@ -9,7 +9,7 @@
 import { describe, test, expect, beforeAll, afterAll, beforeEach, afterEach, vi } from "vitest";
 import { openDatabase, loadSqliteVec } from "../src/db.js";
 import type { Database } from "../src/db.js";
-import { unlink, mkdtemp, rmdir, writeFile } from "node:fs/promises";
+import { unlink, mkdtemp, rmdir, writeFile, rm } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import YAML from "yaml";
@@ -51,6 +51,7 @@ import {
   insertContent,
   insertDocument,
   generateEmbeddings,
+  reindexCollection,
   type Store,
   type DocumentResult,
   type SearchResult,
@@ -2313,6 +2314,33 @@ describe("Vector Table", () => {
 
     await cleanupTestDb(store);
   });
+
+  test("insertEmbedding is idempotent for an existing vec0 hash_seq (#598)", async () => {
+    const store = await createTestStore();
+    store.ensureVecTable(2);
+
+    const hash = "existinghashseq";
+    const first = new Float32Array([0.1, 0.2]);
+    const second = new Float32Array([0.3, 0.4]);
+    const now = new Date().toISOString();
+
+    store.db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash}_0`, first);
+
+    // Reproduces sqlite-vec's broken conflict handling: vec0 does not honor OR REPLACE.
+    expect(() => {
+      store.db.prepare(`INSERT OR REPLACE INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash}_0`, second);
+    }).toThrow(/UNIQUE constraint failed/i);
+
+    // QMD must therefore use DELETE + INSERT when upserting the vector row.
+    expect(() => store.insertEmbedding(hash, 0, 0, second, "test-model", now)).not.toThrow();
+
+    const vectorCount = store.db.prepare(`SELECT COUNT(*) AS count FROM vectors_vec WHERE hash_seq = ?`).get(`${hash}_0`) as { count: number };
+    const metadataCount = store.db.prepare(`SELECT COUNT(*) AS count FROM content_vectors WHERE hash = ? AND seq = 0`).get(hash) as { count: number };
+    expect(vectorCount.count).toBe(1);
+    expect(metadataCount.count).toBe(1);
+
+    await cleanupTestDb(store);
+  });
 });
 
 // =============================================================================
@@ -2320,6 +2348,47 @@ describe("Vector Table", () => {
 // =============================================================================
 
 describe("Integration", () => {
+  test("reindexCollection soft-deletes removed files and preserves inactive content (#585)", async () => {
+    const store = await createTestStore();
+    const collectionDir = await mkdtemp(join(testDir, "orphan-regression-"));
+    const collectionName = "orphan-regression";
+
+    try {
+      for (let i = 1; i <= 5; i++) {
+        await writeFile(join(collectionDir, `doc-${i}.md`), `# Doc ${i}\n\nUnique body ${i}`);
+      }
+
+      await createTestCollection({ pwd: collectionDir, glob: "**/*.md", name: collectionName });
+
+      const initial = await reindexCollection(store, collectionDir, "**/*.md", collectionName);
+      expect(initial.indexed).toBe(5);
+      expect(initial.removed).toBe(0);
+
+      await rm(join(collectionDir, "doc-3.md"));
+      await rm(join(collectionDir, "doc-4.md"));
+      await rm(join(collectionDir, "doc-5.md"));
+
+      const afterDelete = await reindexCollection(store, collectionDir, "**/*.md", collectionName);
+      expect(afterDelete.removed).toBe(3);
+
+      const counts = store.db.prepare(`
+        SELECT
+          SUM(CASE WHEN active = 1 THEN 1 ELSE 0 END) AS active,
+          SUM(CASE WHEN active = 0 THEN 1 ELSE 0 END) AS inactive,
+          COUNT(*) AS total
+        FROM documents
+        WHERE collection = ?
+      `).get(collectionName) as { active: number; inactive: number; total: number };
+      const contentCount = store.db.prepare(`SELECT COUNT(*) AS count FROM content`).get() as { count: number };
+
+      expect(counts).toEqual({ active: 2, inactive: 3, total: 5 });
+      expect(contentCount.count).toBe(5);
+    } finally {
+      await rm(collectionDir, { recursive: true, force: true });
+      await cleanupTestDb(store);
+    }
+  });
+
   test("full document lifecycle: create, search, retrieve", async () => {
     const store = await createTestStore();
     const collectionName = await createTestCollection({ pwd: "/test/notes", glob: "**/*.md" });

From 004714af48650295318c6a9acabde237850e5fae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 17:52:42 +0000
Subject: [PATCH 05/17] Fix hybrid RRF weighting by query type

---
 CHANGELOG.md       |  1 +
 src/store.ts       | 20 ++++++++++++++++++--
 test/store.test.ts | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4931d92..54d84bd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@
   Scoped `--force` clears only collection-owned vectors, preserves shared
   hashes referenced by sibling collections, and drops `vectors_vec` only
   when the scoped clear empties all vectors.
+- Hybrid search: weight RRF lists by query type so original FTS and original vector evidence get the intended 2x boost, instead of accidentally boosting the first lexical expansion. #591
 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
 - Fix: preserve original filename case in `handelize()`. The previous
   `.toLowerCase()` call made indexed paths unreachable on case-sensitive
diff --git a/src/store.ts b/src/store.ts
index f5dd47a..5adafd9 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -4158,6 +4158,21 @@ export type RankedListMeta = {
   query: string;
 };
 
+/**
+ * RRF list weights for hybridQuery.
+ *
+ * Original-query retrieval paths are the primary evidence and get 2x weight:
+ * - original FTS
+ * - original vector search
+ *
+ * Expansion-derived lists (lex/vec/hyde) stay at 1x regardless of list order,
+ * so a lex expansion inserted before original vector search cannot steal the
+ * original vector boost.
+ */
+export function getHybridRrfWeights(rankedListMeta: RankedListMeta[]): number[] {
+  return rankedListMeta.map(meta => meta.queryType === "original" ? 2.0 : 1.0);
+}
+
 /**
  * Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
  *
@@ -4289,8 +4304,9 @@ export async function hybridQuery(
     }
   }
 
-  // Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight
-  const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
+  // Step 4: RRF fusion — original-query FTS and vector lists get 2x weight;
+  // expansion-derived lists stay at 1x independent of insertion order.
+  const weights = getHybridRrfWeights(rankedListMeta);
   const fused = reciprocalRankFusion(rankedLists, weights);
   const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null;
   const candidates = fused.slice(0, candidateLimit);
diff --git a/test/store.test.ts b/test/store.test.ts
index 2ed0b06..24b5a10 100644
--- a/test/store.test.ts
+++ b/test/store.test.ts
@@ -52,10 +52,12 @@ import {
   insertDocument,
   generateEmbeddings,
   reindexCollection,
+  getHybridRrfWeights,
   type Store,
   type DocumentResult,
   type SearchResult,
   type RankedResult,
+  type RankedListMeta,
 } from "../src/store.js";
 import type { CollectionConfig } from "../src/collections.js";
 
@@ -2046,6 +2048,38 @@ describe("Reciprocal Rank Fusion", () => {
     expect(fused[0]!.file).toBe("doc1");
   });
 
+  test("hybrid RRF weights boost original vector evidence over expansion-only hits", () => {
+    const originalFtsOnly = makeResult("original-fts-only.md", 0.95);
+    const expansionOnly = makeResult("lex-expansion-only.md", 0.95);
+    const originalVector = makeResult("original-vector.md", 0.95);
+
+    // Mirrors hybridQuery's common list order when a lex expansion exists:
+    // original FTS, lex expansion FTS, original vector.
+    const rankedLists = [
+      [originalFtsOnly],
+      [expansionOnly],
+      [originalVector],
+    ];
+    const rankedListMeta: RankedListMeta[] = [
+      { source: "fts", queryType: "original", query: "user query" },
+      { source: "fts", queryType: "lex", query: "lex expansion" },
+      { source: "vec", queryType: "original", query: "user query" },
+    ];
+
+    const positionBasedWeights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
+    const buggyOrder = reciprocalRankFusion(rankedLists, positionBasedWeights);
+
+    expect(buggyOrder.findIndex(r => r.file === "lex-expansion-only.md"))
+      .toBeLessThan(buggyOrder.findIndex(r => r.file === "original-vector.md"));
+
+    const semanticWeights = getHybridRrfWeights(rankedListMeta);
+    const fixedOrder = reciprocalRankFusion(rankedLists, semanticWeights);
+
+    expect(semanticWeights).toEqual([2.0, 1.0, 2.0]);
+    expect(fixedOrder.findIndex(r => r.file === "original-vector.md"))
+      .toBeLessThan(fixedOrder.findIndex(r => r.file === "lex-expansion-only.md"));
+  });
+
   test("RRF adds top-rank bonus", () => {
     // doc1 is #1 in list1, doc2 is #2 in list1
     const list1 = [makeResult("doc1", 0.9), makeResult("doc2", 0.8)];

From 3f055e705db8ad45f30251c8ef7ff3839bafd00d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 17:53:58 +0000
Subject: [PATCH 06/17] Fix AST grammar packaging for Bun installs

---
 CHANGELOG.md                       |  3 +++
 bun.lock                           | 22 +++++++++++++++-------
 package.json                       | 16 +++++++++-------
 scripts/check-package-grammars.mjs | 29 +++++++++++++++++++++++++++++
 src/ast.ts                         | 30 +++++++++++++++++++++---------
 test/ast.test.ts                   | 12 +++++++++++-
 test/package.test.ts               | 27 +++++++++++++++++++++++++++
 7 files changed, 115 insertions(+), 24 deletions(-)
 create mode 100644 scripts/check-package-grammars.mjs
 create mode 100644 test/package.test.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 54d84bd..cde9802 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -24,6 +24,9 @@
 - Store: keep content rows referenced by inactive documents during orphan
   cleanup so `qmd update` preserves soft-deleted tombstones for removed
   files. #585
+- Packaging: install AST grammar WASM packages as required dependencies so
+  Bun global installs include TypeScript/TSX/JavaScript grammars, and add a
+  `smoke:package-grammars` verification command. #595
 
 ## [2.1.0] - 2026-04-05
 
diff --git a/bun.lock b/bun.lock
index a96f096..651b00f 100644
--- a/bun.lock
+++ b/bun.lock
@@ -11,6 +11,10 @@
         "node-llama-cpp": "3.18.1",
         "picomatch": "4.0.4",
         "sqlite-vec": "0.1.9",
+        "tree-sitter-go": "0.23.4",
+        "tree-sitter-python": "0.23.4",
+        "tree-sitter-rust": "0.24.0",
+        "tree-sitter-typescript": "0.23.2",
         "web-tree-sitter": "0.26.7",
         "yaml": "2.8.3",
         "zod": "4.2.1",
@@ -26,10 +30,6 @@
         "sqlite-vec-linux-arm64": "0.1.9",
         "sqlite-vec-linux-x64": "0.1.9",
         "sqlite-vec-windows-x64": "0.1.9",
-        "tree-sitter-go": "0.23.4",
-        "tree-sitter-python": "0.23.4",
-        "tree-sitter-rust": "0.24.0",
-        "tree-sitter-typescript": "0.23.2",
       },
       "peerDependencies": {
         "typescript": "^5.9.3",
@@ -509,7 +509,7 @@
 
     "node-abi": ["node-abi@3.87.0", "", { "dependencies": { "semver": "^7.3.5" } }, "sha512-+CGM1L1CgmtheLcBuleyYOn7NWPVu0s0EJH2C4puxgEZb9h8QpR9G2dBfZJOAUhi7VQxuBPMd0hiISWcTyiYyQ=="],
 
-    "node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+    "node-addon-api": ["node-addon-api@8.7.0", "", {}, "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA=="],
 
     "node-api-headers": ["node-api-headers@1.8.0", "", {}, "sha512-jfnmiKWjRAGbdD1yQS28bknFM1tbHC1oucyuMPjmkEs+kpiu76aRs40WlTmBmyEgzDM76ge1DQ7XJ3R5deiVjQ=="],
 
@@ -773,8 +773,6 @@
 
     "micromatch/picomatch": ["picomatch@2.3.1", "", {}, "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA=="],
 
-    "node-llama-cpp/node-addon-api": ["node-addon-api@8.7.0", "", {}, "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA=="],
-
     "ora/cli-spinners": ["cli-spinners@3.4.0", "", {}, "sha512-bXfOC4QcT1tKXGorxL3wbJm6XJPDqEnij2gQ2m7ESQuE+/z9YFIWnl/5RpTiKWbMq3EVKR4fRLJGn6DVfu0mpw=="],
 
     "postcss/nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
@@ -793,6 +791,16 @@
 
     "tinyglobby/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],
 
+    "tree-sitter-go/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
+    "tree-sitter-javascript/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
+    "tree-sitter-python/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
+    "tree-sitter-rust/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
+    "tree-sitter-typescript/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
     "vite/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],
 
     "vitest/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],
diff --git a/package.json b/package.json
index 0ec04c9..59a878a 100644
--- a/package.json
+++ b/package.json
@@ -17,6 +17,7 @@
   "files": [
     "bin/",
     "dist/",
+    "scripts/check-package-grammars.mjs",
     "LICENSE",
     "CHANGELOG.md"
   ],
@@ -31,7 +32,8 @@
     "vsearch": "tsx src/cli/qmd.ts vsearch",
     "rerank": "tsx src/cli/qmd.ts rerank",
     "inspector": "npx @modelcontextprotocol/inspector tsx src/cli/qmd.ts mcp",
-    "release": "./scripts/release.sh"
+    "release": "./scripts/release.sh",
+    "smoke:package-grammars": "node scripts/check-package-grammars.mjs"
   },
   "publishConfig": {
     "access": "public"
@@ -53,18 +55,18 @@
     "sqlite-vec": "0.1.9",
     "web-tree-sitter": "0.26.7",
     "yaml": "2.8.3",
-    "zod": "4.2.1"
+    "zod": "4.2.1",
+    "tree-sitter-go": "0.23.4",
+    "tree-sitter-python": "0.23.4",
+    "tree-sitter-rust": "0.24.0",
+    "tree-sitter-typescript": "0.23.2"
   },
   "optionalDependencies": {
     "sqlite-vec-darwin-arm64": "0.1.9",
     "sqlite-vec-darwin-x64": "0.1.9",
     "sqlite-vec-linux-arm64": "0.1.9",
     "sqlite-vec-linux-x64": "0.1.9",
-    "sqlite-vec-windows-x64": "0.1.9",
-    "tree-sitter-go": "0.23.4",
-    "tree-sitter-python": "0.23.4",
-    "tree-sitter-rust": "0.24.0",
-    "tree-sitter-typescript": "0.23.2"
+    "sqlite-vec-windows-x64": "0.1.9"
   },
   "devDependencies": {
     "@types/better-sqlite3": "7.6.13",
diff --git a/scripts/check-package-grammars.mjs b/scripts/check-package-grammars.mjs
new file mode 100644
index 0000000..45d7854
--- /dev/null
+++ b/scripts/check-package-grammars.mjs
@@ -0,0 +1,29 @@
+#!/usr/bin/env node
+import { createRequire } from "node:module";
+
+const require = createRequire(import.meta.url);
+
+const grammars = [
+  "tree-sitter-typescript/tree-sitter-typescript.wasm",
+  "tree-sitter-typescript/tree-sitter-tsx.wasm",
+  "tree-sitter-python/tree-sitter-python.wasm",
+  "tree-sitter-go/tree-sitter-go.wasm",
+  "tree-sitter-rust/tree-sitter-rust.wasm",
+];
+
+let ok = true;
+for (const grammar of grammars) {
+  try {
+    const resolved = require.resolve(grammar);
+    console.log(`ok ${grammar} -> ${resolved}`);
+  } catch (err) {
+    ok = false;
+    console.error(`missing ${grammar}`);
+    console.error(err instanceof Error ? err.message : String(err));
+  }
+}
+
+if (!ok) {
+  console.error("\nAST grammar package smoke check failed. Run `bun install` locally or repair a broken global install with the matching `bun add tree-sitter-...@<version>` command shown by `qmd status`.");
+  process.exit(1);
+}
diff --git a/src/ast.ts b/src/ast.ts
index 5f8194e..a83dbc1 100644
--- a/src/ast.ts
+++ b/src/ast.ts
@@ -63,15 +63,22 @@ export function detectLanguage(filepath: string): SupportedLanguage | null {
 /**
  * Maps language to the npm package and wasm filename for the grammar.
  */
-const GRAMMAR_MAP: Record<SupportedLanguage, { pkg: string; wasm: string }> = {
-  typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" },
-  tsx:        { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm" },
-  javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" },
-  python:     { pkg: "tree-sitter-python",     wasm: "tree-sitter-python.wasm" },
-  go:         { pkg: "tree-sitter-go",         wasm: "tree-sitter-go.wasm" },
-  rust:       { pkg: "tree-sitter-rust",        wasm: "tree-sitter-rust.wasm" },
+const GRAMMAR_MAP: Record<SupportedLanguage, { pkg: string; wasm: string; version: string }> = {
+  typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
+  tsx:        { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm",        version: "0.23.2" },
+  javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
+  python:     { pkg: "tree-sitter-python",     wasm: "tree-sitter-python.wasm",     version: "0.23.4" },
+  go:         { pkg: "tree-sitter-go",         wasm: "tree-sitter-go.wasm",         version: "0.23.4" },
+  rust:       { pkg: "tree-sitter-rust",       wasm: "tree-sitter-rust.wasm",       version: "0.24.0" },
 };
 
+export function formatGrammarLoadError(language: SupportedLanguage, err: unknown): string {
+  const grammar = GRAMMAR_MAP[language];
+  const detail = err instanceof Error ? err.message : String(err);
+  return `${grammar.pkg}/${grammar.wasm} failed to load (${detail}); falling back to regex chunking. ` +
+    `Repair a broken global install with: bun add ${grammar.pkg}@${grammar.version}`;
+}
+
 // =============================================================================
 // Per-Language Query Definitions
 // =============================================================================
@@ -176,6 +183,9 @@ let initPromise: Promise<void> | null = null;
 /** Languages that have already failed to load — warn only once per process. */
 const failedLanguages = new Set<string>();
 
+/** Last grammar load error by language, for status output. */
+const grammarLoadErrors = new Map<SupportedLanguage, string>();
+
 /** Cached grammar load promises. */
 const grammarCache = new Map<string, Promise<LanguageType>>();
 
@@ -228,7 +238,9 @@ async function loadGrammar(language: SupportedLanguage): Promise<LanguageType |
   } catch (err) {
     failedLanguages.add(language);
     grammarCache.delete(wasmKey);
-    console.warn(`[qmd] Failed to load tree-sitter grammar for ${language}: ${err}`);
+    const message = formatGrammarLoadError(language, err);
+    grammarLoadErrors.set(language, message);
+    console.warn(`[qmd] AST grammar unavailable for ${language}: ${message}`);
     return null;
   }
 }
@@ -345,7 +357,7 @@ export async function getASTStatus(): Promise<{
         getQuery(lang, grammar);
         languages.push({ language: lang, available: true });
       } else {
-        languages.push({ language: lang, available: false, error: "grammar failed to load" });
+        languages.push({ language: lang, available: false, error: grammarLoadErrors.get(lang) ?? "grammar failed to load" });
       }
     } catch (err) {
       languages.push({
diff --git a/test/ast.test.ts b/test/ast.test.ts
index f4ed1bd..0d89a7b 100644
--- a/test/ast.test.ts
+++ b/test/ast.test.ts
@@ -6,7 +6,7 @@
  */
 
 import { describe, test, expect } from "vitest";
-import { detectLanguage, getASTBreakPoints, extractSymbols } from "../src/ast.js";
+import { detectLanguage, getASTBreakPoints, extractSymbols, formatGrammarLoadError } from "../src/ast.js";
 import type { SupportedLanguage } from "../src/ast.js";
 
 // =============================================================================
@@ -315,6 +315,16 @@ describe("getASTBreakPoints - error handling", () => {
     // Should either return some partial break points or empty array — not throw
     expect(Array.isArray(points)).toBe(true);
   });
+
+  test("explains missing grammar packages with a repair command", () => {
+    const msg = formatGrammarLoadError(
+      "typescript",
+      new Error("Cannot find module 'tree-sitter-typescript/tree-sitter-typescript.wasm'"),
+    );
+    expect(msg).toContain("tree-sitter-typescript");
+    expect(msg).toContain("bun add tree-sitter-typescript@0.23.2");
+    expect(msg).toContain("falling back to regex");
+  });
 });
 
 // =============================================================================
diff --git a/test/package.test.ts b/test/package.test.ts
new file mode 100644
index 0000000..018087d
--- /dev/null
+++ b/test/package.test.ts
@@ -0,0 +1,27 @@
+import { describe, expect, test } from "vitest";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const root = new URL("..", import.meta.url);
+const pkg = JSON.parse(readFileSync(new URL("package.json", root), "utf8"));
+
+describe("package grammar distribution", () => {
+  test("installs AST grammar wasm packages as required runtime dependencies", () => {
+    for (const dep of ["tree-sitter-typescript", "tree-sitter-python", "tree-sitter-go", "tree-sitter-rust"]) {
+      expect(pkg.dependencies, `${dep} should be a required dependency`).toHaveProperty(dep);
+      expect(pkg.optionalDependencies ?? {}, `${dep} should not be optional`).not.toHaveProperty(dep);
+    }
+  });
+
+  test("documents a packaging smoke check for grammar wasm availability", () => {
+    expect(pkg.scripts, "package.json scripts").toHaveProperty("smoke:package-grammars");
+    expect(String(pkg.scripts["smoke:package-grammars"])).toContain("check-package-grammars");
+
+    expect(pkg.files, "published package files").toContain("scripts/check-package-grammars.mjs");
+
+    const scriptPath = join(root.pathname, "scripts", "check-package-grammars.mjs");
+    const script = readFileSync(scriptPath, "utf8");
+    expect(script).toContain("tree-sitter-typescript/tree-sitter-typescript.wasm");
+    expect(script).toContain("tree-sitter-typescript/tree-sitter-tsx.wasm");
+  });
+});

From 3653f6015c32e0487704eed5b25022eea5f8cf28 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 17:54:08 +0000
Subject: [PATCH 07/17] Fix MCP stdio native log pollution

---
 CHANGELOG.md     |  1 +
 bin/qmd          | 10 ++++++++
 test/cli.test.ts | 66 +++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cde9802..1af3da0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@
   hashes referenced by sibling collections, and drops `vectors_vec` only
   when the scoped clear empties all vectors.
 - Hybrid search: weight RRF lists by query type so original FTS and original vector evidence get the intended 2x boost, instead of accidentally boosting the first lexical expansion. #591
+- MCP: seed llama.cpp/GGML quiet env vars before launching `qmd mcp` so native logs cannot pollute stdio JSON-RPC framing. #593
 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
 - Fix: preserve original filename case in `handelize()`. The previous
   `.toLowerCase()` call made indexed paths unreachable on case-sensitive
diff --git a/bin/qmd b/bin/qmd
index f658b3b..7522b2e 100755
--- a/bin/qmd
+++ b/bin/qmd
@@ -15,6 +15,16 @@ done
 # to avoid native module ABI mismatches (e.g., better-sqlite3 compiled for bun vs node)
 DIR="$(cd -P "$(dirname "$SOURCE")/.." && pwd)"
 
+# MCP stdio reserves stdout exclusively for JSON-RPC frames. node-llama-cpp
+# / llama.cpp / ggml can write native logs directly to stdout before JS-level
+# log handlers are attached, so seed the native quiet env before Node/Bun imports
+# the CLI and its LLM modules. Preserve explicit user values when provided.
+if [ "$1" = "mcp" ]; then
+  export LLAMA_LOG_LEVEL="${LLAMA_LOG_LEVEL:-error}"
+  export GGML_LOG_LEVEL="${GGML_LOG_LEVEL:-error}"
+  export GGML_BACKEND_SILENT="${GGML_BACKEND_SILENT:-1}"
+fi
+
 # Detect the package manager that installed dependencies by checking lockfiles.
 # $BUN_INSTALL is intentionally NOT checked — it only indicates that bun exists
 # on the system, not that it was used to install this package (see #361).
diff --git a/test/cli.test.ts b/test/cli.test.ts
index 2e49deb..5748676 100644
--- a/test/cli.test.ts
+++ b/test/cli.test.ts
@@ -6,7 +6,7 @@
  */
 
 import { describe, test, expect, beforeAll, afterAll, beforeEach } from "vitest";
-import { mkdtemp, rm, writeFile, mkdir } from "fs/promises";
+import { chmod, copyFile, mkdtemp, rm, writeFile, mkdir } from "fs/promises";
 import { existsSync, lstatSync, readFileSync, symlinkSync, writeFileSync, unlinkSync } from "fs";
 import { tmpdir } from "os";
 import { join, dirname } from "path";
@@ -1601,3 +1601,67 @@ describe("mcp http daemon", () => {
     try { unlinkSync(pidPath()); } catch {}
   });
 });
+
+// =============================================================================
+// MCP stdio stdout hygiene
+// =============================================================================
+
+describe("mcp stdio launcher", () => {
+  test("sets native llama/ggml quiet env before Node starts so stdout stays JSON-RPC only", async () => {
+    const tempPackage = await mkdtemp(join(tmpdir(), "qmd-bin-mcp-"));
+    try {
+      await mkdir(join(tempPackage, "bin"), { recursive: true });
+      await mkdir(join(tempPackage, "dist", "cli"), { recursive: true });
+      await mkdir(join(tempPackage, "fake-bin"), { recursive: true });
+
+      const qmdBin = join(tempPackage, "bin", "qmd");
+      await copyFile(join(projectRoot, "bin", "qmd"), qmdBin);
+      await chmod(qmdBin, 0o755);
+
+      // Force the wrapper down the Node branch, then put our fake `node` first
+      // in PATH. The fake node behaves like the native llama/ggml layer: it
+      // writes a non-JSON stdout line unless qmd pre-seeded the documented
+      // quiet env vars before launching JS.
+      await writeFile(join(tempPackage, "package-lock.json"), "{}\n");
+      const fakeNode = join(tempPackage, "fake-bin", "node");
+      await writeFile(fakeNode, `#!/bin/sh
+if [ "\${GGML_BACKEND_SILENT:-}" != "1" ]; then
+  printf 'llama.cpp native log on stdout\\n'
+fi
+printf '{"jsonrpc":"2.0","id":1,"result":{"ok":true}}\\n'
+`);
+      await chmod(fakeNode, 0o755);
+
+      const proc = spawn(qmdBin, ["mcp"], {
+        cwd: tempPackage,
+        env: {
+          ...process.env,
+          PATH: `${join(tempPackage, "fake-bin")}:${process.env.PATH}`,
+          LLAMA_LOG_LEVEL: "",
+          GGML_LOG_LEVEL: "",
+          GGML_BACKEND_SILENT: "",
+        },
+        stdio: ["ignore", "pipe", "pipe"],
+      });
+
+      let stdout = "";
+      let stderr = "";
+      proc.stdout?.on("data", (chunk: Buffer) => { stdout += chunk.toString(); });
+      proc.stderr?.on("data", (chunk: Buffer) => { stderr += chunk.toString(); });
+      const exitCode = await new Promise<number>((resolve, reject) => {
+        proc.once("error", reject);
+        proc.on("close", (code) => resolve(code ?? 1));
+      });
+
+      expect(exitCode).toBe(0);
+      expect(stderr).toBe("");
+      const lines = stdout.trim().split("\n").filter(Boolean);
+      expect(lines.length).toBeGreaterThan(0);
+      for (const line of lines) {
+        expect(() => JSON.parse(line)).not.toThrow();
+      }
+    } finally {
+      await rm(tempPackage, { recursive: true, force: true });
+    }
+  });
+});

From 656707c6b40ede862983f8563a93e070cbd9a00d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 17:56:03 +0000
Subject: [PATCH 08/17] Fix Node ESM index path normalization

---
 CHANGELOG.md                      |  1 +
 src/cli/qmd.ts                    |  6 ++----
 src/collections.ts                |  6 ++----
 test/esm-ambiguous-module.test.ts | 27 +++++++++++++++++++++++++++
 4 files changed, 32 insertions(+), 8 deletions(-)
 create mode 100644 test/esm-ambiguous-module.test.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1af3da0..4191964 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@
   when the scoped clear empties all vectors.
 - Hybrid search: weight RRF lists by query type so original FTS and original vector evidence get the intended 2x boost, instead of accidentally boosting the first lexical expansion. #591
 - MCP: seed llama.cpp/GGML quiet env vars before launching `qmd mcp` so native logs cannot pollute stdio JSON-RPC framing. #593
+- CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index <path>` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634
 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
 - Fix: preserve original filename case in `handelize()`. The previous
   `.toLowerCase()` call made indexed paths unreachable on case-sensitive
diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts
index 4ceecbe..0c3a1e1 100755
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@@ -3,7 +3,7 @@ import type { Database } from "../db.js";
 import fastGlob from "fast-glob";
 import { execSync, spawn as nodeSpawn } from "child_process";
 import { fileURLToPath } from "url";
-import { dirname, join as pathJoin, relative as relativePath } from "path";
+import { dirname, join as pathJoin, relative as relativePath, resolve as pathResolve } from "path";
 import { parseArgs } from "util";
 import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync } from "fs";
 import { createInterface } from "readline/promises";
@@ -173,9 +173,7 @@ function setIndexName(name: string | null): void {
   let normalizedName = name;
   // Normalize relative paths to prevent malformed database paths
   if (name && name.includes('/')) {
-    const { resolve } = require('path');
-    const { cwd } = require('process');
-    const absolutePath = resolve(cwd(), name);
+    const absolutePath = pathResolve(process.cwd(), name);
     // Replace path separators with underscores to create a valid filename
     normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
   }
diff --git a/src/collections.ts b/src/collections.ts
index e68ff65..a295de7 100644
--- a/src/collections.ts
+++ b/src/collections.ts
@@ -6,7 +6,7 @@
  */
 
 import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
-import { join, dirname } from "path";
+import { join, dirname, resolve } from "path";
 import { homedir } from "os";
 import YAML from "yaml";
 
@@ -101,9 +101,7 @@ export function setConfigSource(source?: { configPath?: string; config?: Collect
 export function setConfigIndexName(name: string): void {
   // Resolve relative paths to absolute paths and sanitize for use as filename
   if (name.includes('/')) {
-    const { resolve } = require('path');
-    const { cwd } = require('process');
-    const absolutePath = resolve(cwd(), name);
+    const absolutePath = resolve(process.cwd(), name);
     // Replace path separators with underscores to create a valid filename
     currentIndexName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
   } else {
diff --git a/test/esm-ambiguous-module.test.ts b/test/esm-ambiguous-module.test.ts
new file mode 100644
index 0000000..80e61b7
--- /dev/null
+++ b/test/esm-ambiguous-module.test.ts
@@ -0,0 +1,27 @@
+import { describe, expect, test } from "vitest";
+import { execFileSync } from "child_process";
+import { mkdtempSync } from "fs";
+import { tmpdir } from "os";
+import { dirname, join, resolve } from "path";
+import { fileURLToPath } from "url";
+
+const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
+
+describe("Node ESM entrypoints", () => {
+  test("CLI --index path normalizes via setIndexName/setConfigIndexName under Node 22+", () => {
+    execFileSync("bun", ["run", "build"], {
+      cwd: repoRoot,
+      encoding: "utf-8",
+      stdio: "pipe",
+    });
+
+    const indexPath = join(mkdtempSync(join(tmpdir(), "qmd-index-")), "nested", "idx");
+    const output = execFileSync("node", ["dist/cli/qmd.js", "--index", indexPath, "--version"], {
+      cwd: repoRoot,
+      encoding: "utf-8",
+      stdio: "pipe",
+    });
+
+    expect(output).toContain("qmd ");
+  }, 120_000);
+});

From dff6513693647495869942240b1a9efb5236412c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 17:56:25 +0000
Subject: [PATCH 09/17] Preserve document IDs across case-only renames

---
 src/store.ts       | 14 ++++++-----
 test/store.test.ts | 63 ++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/src/store.ts b/src/store.ts
index 5adafd9..6ec8ac1 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -2226,8 +2226,8 @@ export function findActiveDocument(
 }
 
 /**
- * Find an active document, falling back to a legacy lowercase path.
- * If found under the legacy path, renames it in-place and rebuilds the
+ * Find an active document, falling back to a case-insensitive path match.
+ * If found under a different casing, renames it in-place and rebuilds the
  * FTS entry. Embeddings are keyed by content hash, so the rename is
  * safe — no re-embedding required.
  *
@@ -2242,10 +2242,12 @@ export function findOrMigrateLegacyDocument(
   const existing = findActiveDocument(db, collectionName, path);
   if (existing) return existing;
 
-  const legacyPath = path.toLowerCase();
-  if (legacyPath === path) return null;
-
-  const legacy = findActiveDocument(db, collectionName, legacyPath);
+  const legacy = db.prepare(`
+    SELECT id, hash, title FROM documents
+    WHERE collection = ? AND path COLLATE NOCASE = ? AND active = 1
+    ORDER BY id
+    LIMIT 1
+  `).get(collectionName, path) as { id: number; hash: string; title: string } | undefined;
   if (!legacy) return null;
 
   // Wrap rename + FTS rebuild in a transaction for atomicity.
diff --git a/test/store.test.ts b/test/store.test.ts
index 24b5a10..9f82624 100644
--- a/test/store.test.ts
+++ b/test/store.test.ts
@@ -9,7 +9,7 @@
 import { describe, test, expect, beforeAll, afterAll, beforeEach, afterEach, vi } from "vitest";
 import { openDatabase, loadSqliteVec } from "../src/db.js";
 import type { Database } from "../src/db.js";
-import { unlink, mkdtemp, rmdir, writeFile, rm } from "node:fs/promises";
+import { unlink, mkdtemp, rmdir, writeFile, rm, mkdir, rename } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import YAML from "yaml";
@@ -46,12 +46,12 @@ import {
   normalizeDocid,
   isDocid,
   syncConfigToDb,
+  reindexCollection,
   STRONG_SIGNAL_MIN_SCORE,
   STRONG_SIGNAL_MIN_GAP,
   insertContent,
   insertDocument,
   generateEmbeddings,
-  reindexCollection,
   getHybridRrfWeights,
   type Store,
   type DocumentResult,
@@ -2112,6 +2112,65 @@ describe("Reciprocal Rank Fusion", () => {
   });
 });
 
+// =============================================================================
+// Reindex Collection Tests
+// =============================================================================
+
+describe("Reindex Collection", () => {
+  test("preserves document id and embeddings when file path changes only by case", async () => {
+    const store = await createTestStore();
+    const collectionName = "docs";
+    const collectionPath = join(testDir, `case-rename-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+    await mkdir(collectionPath, { recursive: true });
+
+    const originalPath = join(collectionPath, "README.md");
+    const renamedPath = join(collectionPath, "readme.md");
+    const body = "# Case Rename\n\nContent that should keep the same embedding.";
+    await writeFile(originalPath, body);
+
+    const firstResult = await reindexCollection(store, collectionPath, "**/*.md", collectionName);
+    expect(firstResult.indexed).toBe(1);
+
+    const before = store.db.prepare(`
+      SELECT id, path, hash FROM documents
+      WHERE collection = ? AND active = 1
+    `).get(collectionName) as { id: number; path: string; hash: string };
+    expect(before.path).toBe("README.md");
+
+    store.db.prepare(`
+      INSERT INTO content_vectors (hash, seq, pos, model, embedded_at)
+      VALUES (?, 0, 0, 'test-model', ?)
+    `).run(before.hash, new Date().toISOString());
+
+    await rename(originalPath, renamedPath);
+
+    const secondResult = await reindexCollection(store, collectionPath, "**/*.md", collectionName);
+    expect(secondResult.indexed).toBe(0);
+    expect(secondResult.unchanged).toBe(1);
+    expect(secondResult.removed).toBe(0);
+
+    const afterRows = store.db.prepare(`
+      SELECT id, path, hash, active FROM documents
+      WHERE collection = ?
+      ORDER BY id
+    `).all(collectionName) as { id: number; path: string; hash: string; active: number }[];
+    expect(afterRows).toHaveLength(1);
+    expect(afterRows[0]).toMatchObject({ id: before.id, path: "readme.md", hash: before.hash, active: 1 });
+
+    const vectorCount = store.db.prepare(`
+      SELECT COUNT(*) AS count FROM content_vectors WHERE hash = ?
+    `).get(before.hash) as { count: number };
+    expect(vectorCount.count).toBe(1);
+
+    const ftsRows = store.db.prepare(`
+      SELECT rowid, filepath FROM documents_fts WHERE rowid = ?
+    `).all(before.id) as { rowid: number; filepath: string }[];
+    expect(ftsRows).toEqual([{ rowid: before.id, filepath: "docs/readme.md" }]);
+
+    await cleanupTestDb(store);
+  });
+});
+
 // =============================================================================
 // Index Status Tests
 // =============================================================================

From e8229d8bfb0f59da9a5def5d009a90502abac44b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 17:56:28 +0000
Subject: [PATCH 10/17] Fix Windows CUDA context parallelism

---
 CHANGELOG.md     |  1 +
 README.md        |  2 ++
 src/llm.ts       | 44 ++++++++++++++++++++++++++++++++++++++++----
 test/llm.test.ts | 40 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4191964..7014380 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@
 - Hybrid search: weight RRF lists by query type so original FTS and original vector evidence get the intended 2x boost, instead of accidentally boosting the first lexical expansion. #591
 - MCP: seed llama.cpp/GGML quiet env vars before launching `qmd mcp` so native logs cannot pollute stdio JSON-RPC framing. #593
 - CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index <path>` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634
+- Windows CUDA: serialize llama.cpp embedding/reranking contexts by default to avoid intermittent `ggml-cuda.cu:98` crashes in `qmd query`; set `QMD_EMBED_PARALLELISM` to opt back into parallel contexts if your driver is stable. #519
 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
 - Fix: preserve original filename case in `handelize()`. The previous
   `.toLowerCase()` call made indexed paths unreachable on case-sensitive
diff --git a/README.md b/README.md
index 6f31844..02e4b1e 100644
--- a/README.md
+++ b/README.md
@@ -797,6 +797,8 @@ llm_cache       -- Cached LLM responses (query expansion, rerank scores)
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `XDG_CACHE_HOME` | `~/.cache` | Cache directory location |
+| `QMD_LLAMA_GPU` | `auto` | Force llama.cpp GPU backend (`metal`, `vulkan`, `cuda`) or disable GPU with `false` |
+| `QMD_EMBED_PARALLELISM` | automatic | Override embedding/reranking context parallelism (1-8). Windows CUDA defaults to `1` because parallel CUDA contexts can crash with `ggml-cuda.cu:98`; use Vulkan or raise this only if your driver is stable. |
 
 ## How It Works
 
diff --git a/src/llm.ts b/src/llm.ts
index 7d2bbe0..d469d36 100644
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -451,7 +451,41 @@ export type LlamaCppConfig = {
 const DEFAULT_INACTIVITY_TIMEOUT_MS = 5 * 60 * 1000;
 const DEFAULT_EXPAND_CONTEXT_SIZE = 2048;
 
-type LlamaGpuMode = "auto" | "metal" | "vulkan" | "cuda" | false;
+export type LlamaGpuMode = "auto" | "metal" | "vulkan" | "cuda" | false;
+
+type ParallelismOptions = {
+  gpu: string | false;
+  platform?: NodeJS.Platform;
+  computed: number;
+  envValue?: string;
+};
+
+export function resolveParallelismOverride(envValue = process.env.QMD_EMBED_PARALLELISM): number | undefined {
+  const normalized = envValue?.trim() ?? "";
+  if (!normalized) return undefined;
+
+  const parsed = Number(normalized);
+  if (!Number.isInteger(parsed) || parsed < 1) {
+    process.stderr.write(`QMD Warning: invalid QMD_EMBED_PARALLELISM="${envValue}", using automatic parallelism.\n`);
+    return undefined;
+  }
+
+  return Math.min(8, parsed);
+}
+
+export function resolveSafeParallelism(options: ParallelismOptions): number {
+  const override = resolveParallelismOverride(options.envValue);
+  if (override !== undefined) return override;
+
+  // node-llama-cpp/llama.cpp CUDA on Windows is unstable with multiple
+  // simultaneous contexts (ggml-cuda.cu:98 in #519). Vulkan and CPU do not
+  // show the same failure mode, so only serialize Windows CUDA by default.
+  if ((options.platform ?? process.platform) === "win32" && options.gpu === "cuda") {
+    return 1;
+  }
+
+  return Math.max(1, options.computed);
+}
 
 export function resolveLlamaGpuMode(envValue = process.env.QMD_LLAMA_GPU): LlamaGpuMode {
   const normalized = envValue?.trim().toLowerCase() ?? "";
@@ -726,16 +760,18 @@ export class LlamaCpp implements LLM {
         const vram = await llama.getVramState();
         const freeMB = vram.free / (1024 * 1024);
         const maxByVram = Math.floor((freeMB * 0.25) / perContextMB);
-        return Math.max(1, Math.min(8, maxByVram));
+        const computed = Math.max(1, Math.min(8, maxByVram));
+        return resolveSafeParallelism({ gpu: llama.gpu, computed });
       } catch {
-        return 2;
+        return resolveSafeParallelism({ gpu: llama.gpu, computed: 2 });
       }
     }
 
     // CPU: split cores across contexts. At least 4 threads per context.
     const cores = llama.cpuMathCores || 4;
     const maxContexts = Math.floor(cores / 4);
-    return Math.max(1, Math.min(4, maxContexts));
+    const computed = Math.max(1, Math.min(4, maxContexts));
+    return resolveSafeParallelism({ gpu: false, computed });
   }
 
   /**
diff --git a/test/llm.test.ts b/test/llm.test.ts
index 74b6430..3678bad 100644
--- a/test/llm.test.ts
+++ b/test/llm.test.ts
@@ -13,6 +13,8 @@ import {
   getDefaultLlamaCpp,
   disposeDefaultLlamaCpp,
   resolveLlamaGpuMode,
+  resolveParallelismOverride,
+  resolveSafeParallelism,
   withLLMSession,
   canUnloadLLM,
   SessionReleasedError,
@@ -88,6 +90,44 @@ describe("QMD_LLAMA_GPU resolution", () => {
   });
 });
 
+describe("LLM context parallelism safety", () => {
+  test("defaults Windows CUDA to one context to avoid ggml-cuda.cu:98 crashes", () => {
+    expect(resolveSafeParallelism({
+      gpu: "cuda",
+      platform: "win32",
+      computed: 8,
+      envValue: undefined,
+    })).toBe(1);
+  });
+
+  test("keeps non-Windows and non-CUDA backends on computed parallelism", () => {
+    expect(resolveSafeParallelism({ gpu: "cuda", platform: "linux", computed: 8 })).toBe(8);
+    expect(resolveSafeParallelism({ gpu: "vulkan", platform: "win32", computed: 8 })).toBe(8);
+    expect(resolveSafeParallelism({ gpu: false, platform: "win32", computed: 4 })).toBe(4);
+  });
+
+  test("QMD_EMBED_PARALLELISM overrides the Windows CUDA safety default", () => {
+    expect(resolveSafeParallelism({
+      gpu: "cuda",
+      platform: "win32",
+      computed: 8,
+      envValue: "2",
+    })).toBe(2);
+  });
+
+  test("QMD_EMBED_PARALLELISM clamps invalid values and warns", () => {
+    const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
+    try {
+      expect(resolveParallelismOverride("0")).toBeUndefined();
+      expect(resolveParallelismOverride("bad")).toBeUndefined();
+      expect(stderrSpy).toHaveBeenCalledTimes(2);
+      expect(String(stderrSpy.mock.calls[0]?.[0] || "")).toContain("QMD_EMBED_PARALLELISM");
+    } finally {
+      stderrSpy.mockRestore();
+    }
+  });
+});
+
 describe("LlamaCpp expand context size config", () => {
   const defaultExpandContextSize = 2048;
 

From b77559223025cbcff3f992df0bf01147497c3bab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 18:00:37 +0000
Subject: [PATCH 11/17] fix mcp --index store selection

---
 CHANGELOG.md      |  1 +
 src/cli/qmd.ts    |  9 +++----
 src/mcp/server.ts | 15 ++++++++----
 test/cli.test.ts  | 61 ++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7014380..4cedf35 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@
 - MCP: seed llama.cpp/GGML quiet env vars before launching `qmd mcp` so native logs cannot pollute stdio JSON-RPC framing. #593
 - CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index <path>` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634
 - Windows CUDA: serialize llama.cpp embedding/reranking contexts by default to avoid intermittent `ggml-cuda.cu:98` crashes in `qmd query`; set `QMD_EMBED_PARALLELISM` to opt back into parallel contexts if your driver is stable. #519
+- MCP: make `qmd mcp --index <name>` use the selected index for both foreground and daemon HTTP servers instead of falling back to the default store. #343
 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
 - Fix: preserve original filename case in `handelize()`. The previous
   `.toLowerCase()` call made indexed paths unreachable on case-sensitive
diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts
index 0c3a1e1..bbef459 100755
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@@ -3253,9 +3253,10 @@ if (isMain) {
           const logPath = resolve(cacheDir, "mcp.log");
           const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
           const selfPath = fileURLToPath(import.meta.url);
+          const indexArgs = cli.values.index ? ["--index", String(cli.values.index)] : [];
           const spawnArgs = selfPath.endsWith(".ts")
-            ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, "mcp", "--http", "--port", String(port)]
-            : [selfPath, "mcp", "--http", "--port", String(port)];
+            ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)]
+            : [selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)];
           const child = nodeSpawn(process.execPath, spawnArgs, {
             stdio: ["ignore", logFd, logFd],
             detached: true,
@@ -3275,7 +3276,7 @@ if (isMain) {
         process.removeAllListeners("SIGINT");
         const { startMcpHttpServer } = await import("../mcp/server.js");
         try {
-          await startMcpHttpServer(port);
+          await startMcpHttpServer(port, { dbPath: getDbPath() });
         } catch (e: any) {
           if (e?.code === "EADDRINUSE") {
             console.error(`Port ${port} already in use. Try a different port with --port.`);
@@ -3286,7 +3287,7 @@ if (isMain) {
       } else {
         // Default: stdio transport
         const { startMcpServer } = await import("../mcp/server.js");
-        await startMcpServer();
+        await startMcpServer({ dbPath: getDbPath() });
       }
       break;
     }
diff --git a/src/mcp/server.ts b/src/mcp/server.ts
index 4fd0d77..a3016e2 100644
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@@ -538,7 +538,11 @@ Intent-aware lex (C++ performance, not sports):
 // Transport: stdio (default)
 // =============================================================================
 
-export async function startMcpServer(): Promise<void> {
+export type McpStartupOptions = {
+  dbPath?: string;
+};
+
+export async function startMcpServer(options: McpStartupOptions = {}): Promise<void> {
   // Opt into production mode when the MCP server is actually started, not
   // when this module is merely imported for its exports. Importing the module
   // at the top level flipped the global production flag and broke test
@@ -547,7 +551,7 @@ export async function startMcpServer(): Promise<void> {
   enableProductionMode();
   const configPath = getConfigPath();
   const store = await createStore({
-    dbPath: getDefaultDbPath(),
+    dbPath: options.dbPath ?? getDefaultDbPath(),
     ...(existsSync(configPath) ? { configPath } : {}),
   });
   const server = await createMcpServer(store);
@@ -569,14 +573,17 @@ export type HttpServerHandle = {
  * Start MCP server over Streamable HTTP (JSON responses, no SSE).
  * Binds to localhost only. Returns a handle for shutdown and port discovery.
  */
-export async function startMcpHttpServer(port: number, options?: { quiet?: boolean }): Promise<HttpServerHandle> {
+export async function startMcpHttpServer(
+  port: number,
+  options: ({ quiet?: boolean } & McpStartupOptions) = {},
+): Promise<HttpServerHandle> {
   // See startMcpServer() for the rationale — flip production mode here so the
   // HTTP transport resolves the real database path, without leaking state into
   // callers that only import this module for its exports (e.g. tests).
   enableProductionMode();
   const configPath = getConfigPath();
   const store = await createStore({
-    dbPath: getDefaultDbPath(),
+    dbPath: options.dbPath ?? getDefaultDbPath(),
     ...(existsSync(configPath) ? { configPath } : {}),
   });
 
diff --git a/test/cli.test.ts b/test/cli.test.ts
index 5748676..40c14c9 100644
--- a/test/cli.test.ts
+++ b/test/cli.test.ts
@@ -1403,13 +1403,17 @@ describe("mcp http daemon", () => {
   }
 
   /** Spawn a foreground HTTP server (non-blocking) and return the process */
-  function spawnHttpServer(port: number): import("child_process").ChildProcess {
-    const proc = spawn(tsxBin, [qmdScript, "mcp", "--http", "--port", String(port)], {
+  function spawnHttpServer(
+    port: number,
+    options: { args?: string[]; env?: Record<string, string> } = {},
+  ): import("child_process").ChildProcess {
+    const proc = spawn(tsxBin, [qmdScript, ...(options.args ?? []), "mcp", "--http", "--port", String(port)], {
       cwd: fixturesDir,
       env: {
         ...process.env,
         INDEX_PATH: daemonDbPath,
         QMD_CONFIG_DIR: daemonConfigDir,
+        ...options.env,
       },
       stdio: ["ignore", "pipe", "pipe"],
     });
@@ -1481,11 +1485,62 @@ describe("mcp http daemon", () => {
       const body = await res.json();
       expect(body.status).toBe("ok");
     } finally {
+      const closed = new Promise(r => proc.once("close", r));
       proc.kill("SIGTERM");
-      await new Promise(r => proc.on("close", r));
+      await closed;
     }
   });
 
+  test("foreground HTTP server honors --index when selecting the store", async () => {
+    const customIndex = "mcp-alt-index";
+    const customCacheDir = join(daemonTestDir, `cache-index-${Date.now()}-${Math.random().toString(16).slice(2)}`);
+    const customConfigDir = join(daemonTestDir, `config-index-${Date.now()}-${Math.random().toString(16).slice(2)}`);
+    await mkdir(customCacheDir, { recursive: true });
+    await mkdir(customConfigDir, { recursive: true });
+
+    const addResult = await runQmd(
+      ["--index", customIndex, "collection", "add", fixturesDir, "--name", "mcp-fixtures"],
+      {
+        dbPath: daemonDbPath,
+        configDir: customConfigDir,
+        env: {
+          INDEX_PATH: "",
+          XDG_CACHE_HOME: customCacheDir,
+        },
+      },
+    );
+    expect(addResult.exitCode).toBe(0);
+
+    const port = randomPort();
+    const proc = spawnHttpServer(port, {
+      args: ["--index", customIndex],
+      env: {
+        INDEX_PATH: "",
+        XDG_CACHE_HOME: customCacheDir,
+        QMD_CONFIG_DIR: customConfigDir,
+      },
+    });
+
+    try {
+      const ready = await waitForServer(port);
+      expect(ready).toBe(true);
+
+      const res = await fetch(`http://localhost:${port}/query`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ searches: [{ type: "lex", query: "authentication" }], limit: 5 }),
+      });
+      expect(res.status).toBe(200);
+      const body = await res.json();
+      const files = body.results.map((r: { file: string }) => r.file);
+      expect(files.some((file: string) => file.includes("mcp-fixtures/notes/meeting.md"))).toBe(true);
+    } finally {
+      const closed = new Promise(r => proc.once("close", r));
+      proc.kill("SIGTERM");
+      await closed;
+    }
+  }, 10000);
+
   // -------------------------------------------------------------------------
   // Daemon lifecycle
   // -------------------------------------------------------------------------

From ddc969a5f48b78c8d451ef1127e1815025030b0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 18:08:56 +0000
Subject: [PATCH 12/17] fix embed model and qmd home resolution

---
 CHANGELOG.md                    |   2 +
 src/cli/qmd.ts                  |   8 ++-
 src/collections.ts              |   4 +-
 src/paths.ts                    |   5 ++
 src/store.ts                    |  23 ++++---
 test/cli.test.ts                |  27 +++++++-
 test/collections-config.test.ts |  15 +++-
 test/store.test.ts              | 118 +++++++++++++++++++++++++++++++-
 8 files changed, 184 insertions(+), 18 deletions(-)
 create mode 100644 src/paths.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4cedf35..b2757c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,8 @@
 - CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index <path>` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634
 - Windows CUDA: serialize llama.cpp embedding/reranking contexts by default to avoid intermittent `ggml-cuda.cu:98` crashes in `qmd query`; set `QMD_EMBED_PARALLELISM` to opt back into parallel contexts if your driver is stable. #519
 - MCP: make `qmd mcp --index <name>` use the selected index for both foreground and daemon HTTP servers instead of falling back to the default store. #343
+- Embedding: respect `QMD_EMBED_MODEL` consistently for vector indexing and vector-backed search, with default-model fallback when unset.
+- Config: use one home-directory resolver for YAML config and the default SQLite cache path, avoiding Windows CLI/MCP split-brain when `HOME` is unset.
 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
 - Fix: preserve original filename case in `handelize()`. The previous
   `.toLowerCase()` call made indexed paths unreachable on case-sensitive
diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts
index bbef459..f576cde 100755
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@@ -1679,8 +1679,12 @@ function parseChunkStrategy(value: unknown): ChunkStrategy | undefined {
   throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`);
 }
 
+export function resolveEmbedModelForCli(): string {
+  return process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL_URI;
+}
+
 async function vectorIndex(
-  model: string = DEFAULT_EMBED_MODEL_URI,
+  model: string = resolveEmbedModelForCli(),
   force: boolean = false,
   batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy; collection?: string },
 ): Promise<void> {
@@ -3125,7 +3129,7 @@ if (isMain) {
         // embed operates on a single collection; only the first value is used.
         const embedValidatedCollections = resolveCollectionFilter(cli.opts.collection, false);
         const embedCollection = embedValidatedCollections[0];
-        await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
+        await vectorIndex(resolveEmbedModelForCli(), !!cli.values.force, {
           maxDocsPerBatch,
           maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
           chunkStrategy: embedChunkStrategy,
diff --git a/src/collections.ts b/src/collections.ts
index a295de7..70185c6 100644
--- a/src/collections.ts
+++ b/src/collections.ts
@@ -7,7 +7,7 @@
 
 import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
 import { join, dirname, resolve } from "path";
-import { homedir } from "os";
+import { qmdHomedir } from "./paths.js";
 import YAML from "yaml";
 
 // ============================================================================
@@ -118,7 +118,7 @@ function getConfigDir(): string {
   if (process.env.XDG_CONFIG_HOME) {
     return join(process.env.XDG_CONFIG_HOME, "qmd");
   }
-  return join(homedir(), ".config", "qmd");
+  return join(qmdHomedir(), ".config", "qmd");
 }
 
 function getConfigFilePath(): string {
diff --git a/src/paths.ts b/src/paths.ts
new file mode 100644
index 0000000..07c51d3
--- /dev/null
+++ b/src/paths.ts
@@ -0,0 +1,5 @@
+import { homedir as osHomedir } from "node:os";
+
+export function qmdHomedir(): string {
+  return process.env.HOME || process.env.USERPROFILE || osHomedir() || "/tmp";
+}
diff --git a/src/store.ts b/src/store.ts
index 6ec8ac1..52dd334 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -18,6 +18,7 @@ import { createHash } from "crypto";
 import { readFileSync, realpathSync, statSync, mkdirSync } from "node:fs";
 // Note: node:path resolve is not imported — we export our own cross-platform resolve()
 import fastGlob from "fast-glob";
+import { qmdHomedir } from "./paths.js";
 import {
   LlamaCpp,
   getDefaultLlamaCpp,
@@ -38,7 +39,6 @@ import type {
 // Configuration
 // =============================================================================
 
-const HOME = process.env.HOME || process.env.USERPROFILE || "/tmp";
 export const DEFAULT_EMBED_MODEL = "embeddinggemma";
 export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
 export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
@@ -334,7 +334,7 @@ export type ExpandedQuery = {
 // =============================================================================
 
 export function homedir(): string {
-  return HOME;
+  return qmdHomedir();
 }
 
 /**
@@ -1492,7 +1492,8 @@ export async function generateEmbeddings(
   options?: EmbedOptions
 ): Promise<EmbedResult> {
   const db = store.db;
-  const model = options?.model ?? DEFAULT_EMBED_MODEL;
+  const llm = getLlm(store);
+  const model = options?.model ?? llm.embedModelName ?? DEFAULT_EMBED_MODEL;
   const now = new Date().toISOString();
   const { maxDocsPerBatch, maxBatchBytes } = resolveEmbedOptions(options);
   const encoder = new TextEncoder();
@@ -1511,8 +1512,7 @@ export async function generateEmbeddings(
   const startTime = Date.now();
 
   // Use store's LlamaCpp or global singleton, wrapped in a session
-  const llm = getLlm(store);
-  const embedModelUri = llm.embedModelName;
+  const embedModelUri = model;
 
   // Create a session manager for this llm instance
   const result = await withLLMSessionForLlm(llm, async (session) => {
@@ -4276,7 +4276,8 @@ export async function hybridQuery(
 
     // Batch embed all vector queries in a single call
     const llm = getLlm(store);
-    const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, llm.embedModelName));
+    const embedModel = llm.embedModelName;
+    const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, embedModel));
     hooks?.onEmbedStart?.(textsToEmbed.length);
     const embedStart = Date.now();
     const embeddings = await llm.embedBatch(textsToEmbed);
@@ -4288,7 +4289,7 @@ export async function hybridQuery(
       if (!embedding) continue;
 
       const vecResults = await store.searchVec(
-        vecQueries[i]!.text, DEFAULT_EMBED_MODEL, 20, collection,
+        vecQueries[i]!.text, embedModel, 20, collection,
         undefined, embedding
       );
       if (vecResults.length > 0) {
@@ -4519,10 +4520,11 @@ export async function vectorSearchQuery(
   options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
 
   // Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
+  const embedModel = getLlm(store).embedModelName;
   const queryTexts = [query, ...vecExpanded.map(q => q.query)];
   const allResults = new Map<string, VectorSearchResult>();
   for (const q of queryTexts) {
-    const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit, collection);
+    const vecResults = await store.searchVec(q, embedModel, limit, collection);
     for (const r of vecResults) {
       const existing = allResults.get(r.filepath);
       if (!existing || r.score > existing.score) {
@@ -4660,7 +4662,8 @@ export async function structuredSearch(
     );
     if (vecSearches.length > 0) {
       const llm = getLlm(store);
-      const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, llm.embedModelName));
+      const embedModel = llm.embedModelName;
+      const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, embedModel));
       hooks?.onEmbedStart?.(textsToEmbed.length);
       const embedStart = Date.now();
       const embeddings = await llm.embedBatch(textsToEmbed);
@@ -4672,7 +4675,7 @@ export async function structuredSearch(
 
         for (const coll of collectionList) {
           const vecResults = await store.searchVec(
-            vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, coll,
+            vecSearches[i]!.query, embedModel, 20, coll,
             undefined, embedding
           );
           if (vecResults.length > 0) {
diff --git a/test/cli.test.ts b/test/cli.test.ts
index 40c14c9..d239347 100644
--- a/test/cli.test.ts
+++ b/test/cli.test.ts
@@ -13,7 +13,8 @@ import { join, dirname } from "path";
 import { fileURLToPath } from "url";
 import { spawn } from "child_process";
 import { setTimeout as sleep } from "timers/promises";
-import { buildEditorUri, termLink } from "../src/cli/qmd.ts";
+import { buildEditorUri, termLink, resolveEmbedModelForCli } from "../src/cli/qmd.ts";
+import { DEFAULT_EMBED_MODEL_URI } from "../src/llm.ts";
 
 // Test fixtures directory and database path
 let testDir: string;
@@ -243,6 +244,30 @@ describe("CLI Help", () => {
 });
 
 describe("CLI Embed", () => {
+  test("prefers QMD_EMBED_MODEL for qmd embed", () => {
+    const prev = process.env.QMD_EMBED_MODEL;
+    process.env.QMD_EMBED_MODEL = "hf:env/embed-model.gguf";
+
+    try {
+      expect(resolveEmbedModelForCli()).toBe("hf:env/embed-model.gguf");
+    } finally {
+      if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
+      else process.env.QMD_EMBED_MODEL = prev;
+    }
+  });
+
+  test("falls back to the default embed model when QMD_EMBED_MODEL is unset", () => {
+    const prev = process.env.QMD_EMBED_MODEL;
+    delete process.env.QMD_EMBED_MODEL;
+
+    try {
+      expect(resolveEmbedModelForCli()).toBe(DEFAULT_EMBED_MODEL_URI);
+    } finally {
+      if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
+      else process.env.QMD_EMBED_MODEL = prev;
+    }
+  });
+
   test("rejects invalid --max-docs-per-batch", async () => {
     const { stderr, exitCode } = await runQmd(["embed", "--max-docs-per-batch", "0"]);
     expect(exitCode).toBe(1);
diff --git a/test/collections-config.test.ts b/test/collections-config.test.ts
index b6b15fe..3dd926b 100644
--- a/test/collections-config.test.ts
+++ b/test/collections-config.test.ts
@@ -7,7 +7,7 @@
 
 import { describe, test, expect, beforeEach, afterEach } from "vitest";
 import { join } from "path";
-import { homedir } from "os";
+import { qmdHomedir } from "../src/paths.js";
 import { getConfigPath, setConfigIndexName } from "../src/collections.js";
 
 // Save/restore env vars around each test
@@ -15,6 +15,8 @@ let savedEnv: Record<string, string | undefined>;
 
 beforeEach(() => {
   savedEnv = {
+    HOME: process.env.HOME,
+    USERPROFILE: process.env.USERPROFILE,
     QMD_CONFIG_DIR: process.env.QMD_CONFIG_DIR,
     XDG_CONFIG_HOME: process.env.XDG_CONFIG_HOME,
   };
@@ -38,7 +40,16 @@ describe("getConfigDir via getConfigPath", () => {
   test("defaults to ~/.config/qmd when no env vars are set", () => {
     delete process.env.QMD_CONFIG_DIR;
     delete process.env.XDG_CONFIG_HOME;
-    expect(getConfigPath()).toBe(join(homedir(), ".config", "qmd", "index.yml"));
+    expect(getConfigPath()).toBe(join(qmdHomedir(), ".config", "qmd", "index.yml"));
+  });
+
+  test("uses the same USERPROFILE fallback as default DB path when HOME is unset", () => {
+    delete process.env.HOME;
+    delete process.env.QMD_CONFIG_DIR;
+    delete process.env.XDG_CONFIG_HOME;
+    process.env.USERPROFILE = "/Users/windows-user";
+
+    expect(getConfigPath()).toBe(join("/Users/windows-user", ".config", "qmd", "index.yml"));
   });
 
   test("QMD_CONFIG_DIR takes highest priority", () => {
diff --git a/test/store.test.ts b/test/store.test.ts
index 9f82624..8bfaae9 100644
--- a/test/store.test.ts
+++ b/test/store.test.ts
@@ -53,6 +53,10 @@ import {
   insertDocument,
   generateEmbeddings,
   getHybridRrfWeights,
+  _resetProductionModeForTesting,
+  hybridQuery,
+  structuredSearch,
+  vectorSearchQuery,
   type Store,
   type DocumentResult,
   type SearchResult,
@@ -282,7 +286,9 @@ afterAll(async () => {
 
 describe("Store Creation", () => {
   test("createStore throws without explicit path in test mode", () => {
-    // In test mode, createStore without path should throw to prevent accidental writes
+    // In test mode, createStore without path should throw to prevent accidental writes.
+    // Other tests may enable production mode in the same Bun process, so reset first.
+    _resetProductionModeForTesting();
     const originalIndexPath = process.env.INDEX_PATH;
     delete process.env.INDEX_PATH;
 
@@ -3021,6 +3027,116 @@ describe("Embedding batching", () => {
     }
   });
 
+  test("generateEmbeddings uses the active llm embed model when no explicit model is passed", async () => {
+    const store = await createTestStore();
+    const db = store.db;
+    const fakeLlm = createFakeEmbedLlm();
+    const model = "hf:env/embed-model.gguf";
+
+    setDefaultLlamaCpp(createFakeTokenizer() as any);
+    store.llm = { ...fakeLlm, embedModelName: model } as any;
+
+    try {
+      await insertTestDocument(db, "docs", { name: "one", body: "# One\n\nAlpha" });
+
+      const result = await generateEmbeddings(store);
+
+      expect(result.chunksEmbedded).toBe(1);
+      expect(fakeLlm.embedCalls[0]?.options?.model).toBe(model);
+      expect(fakeLlm.embedBatchModelCalls).toEqual([{ model }]);
+      expect(db.prepare(`SELECT DISTINCT model FROM content_vectors`).all()).toEqual([{ model }]);
+    } finally {
+      setDefaultLlamaCpp(null);
+      await cleanupTestDb(store);
+    }
+  });
+
+  test("vectorSearchQuery uses the active llm embed model for vector lookups", async () => {
+    const store = await createTestStore();
+    const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
+    const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
+
+    store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
+    store.llm = { embedModelName: model } as any;
+    store.searchVec = searchVecSpy as any;
+    store.expandQuery = vi.fn(async () => []) as any;
+
+    try {
+      await vectorSearchQuery(store, "custom query", { limit: 7, minScore: 0 });
+
+      expect(searchVecSpy).toHaveBeenCalledTimes(1);
+      expect(searchVecSpy.mock.calls[0]?.[0]).toBe("custom query");
+      expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
+      expect(searchVecSpy.mock.calls[0]?.[2]).toBe(7);
+    } finally {
+      await cleanupTestDb(store);
+    }
+  });
+
+  test("hybridQuery uses the active llm embed model for precomputed vector lookups", async () => {
+    const store = await createTestStore();
+    const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
+    const embedBatchSpy = vi.fn(async (texts: string[]) => texts.map(() => ({
+      embedding: [1, 2, 3],
+      model,
+    })));
+    const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
+
+    store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
+    store.llm = {
+      embedModelName: model,
+      embedBatch: embedBatchSpy,
+    } as any;
+    store.searchVec = searchVecSpy as any;
+    store.searchFTS = vi.fn(() => []) as any;
+    store.expandQuery = vi.fn(async () => []) as any;
+
+    try {
+      await hybridQuery(store, "hybrid query", { limit: 5, minScore: 0, skipRerank: true });
+
+      expect(embedBatchSpy).toHaveBeenCalledTimes(1);
+      expect(searchVecSpy).toHaveBeenCalledTimes(1);
+      expect(searchVecSpy.mock.calls[0]?.[0]).toBe("hybrid query");
+      expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
+      expect(searchVecSpy.mock.calls[0]?.[5]).toEqual([1, 2, 3]);
+    } finally {
+      await cleanupTestDb(store);
+    }
+  });
+
+  test("structuredSearch uses the active llm embed model for precomputed vector lookups", async () => {
+    const store = await createTestStore();
+    const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
+    const embedBatchSpy = vi.fn(async (texts: string[]) => texts.map(() => ({
+      embedding: [1, 2, 3],
+      model,
+    })));
+    const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
+
+    store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
+    store.llm = {
+      embedModelName: model,
+      embedBatch: embedBatchSpy,
+    } as any;
+    store.searchVec = searchVecSpy as any;
+
+    try {
+      await structuredSearch(store, [{ type: "vec", query: "structured query" }], {
+        limit: 5,
+        minScore: 0,
+        skipRerank: true,
+      });
+
+      expect(embedBatchSpy).toHaveBeenCalledTimes(1);
+      expect(searchVecSpy).toHaveBeenCalledTimes(1);
+      expect(searchVecSpy.mock.calls[0]?.[0]).toBe("structured query");
+      expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
+      expect(searchVecSpy.mock.calls[0]?.[5]).toEqual([1, 2, 3]);
+    } finally {
+      await cleanupTestDb(store);
+    }
+  });
+
   test("generateEmbeddings rejects invalid batch limits", async () => {
     const store = await createTestStore();
 

From e627ca7de66136f4c4ab46ef35fb9a5e1c5f6aba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 18:20:26 +0000
Subject: [PATCH 13/17] test: allow slow CPU rerank fixture

---
 test/llm.test.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/llm.test.ts b/test/llm.test.ts
index 3678bad..ff22c0c 100644
--- a/test/llm.test.ts
+++ b/test/llm.test.ts
@@ -694,7 +694,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => {
       for (const doc of result.results) {
         console.log(`  ${doc.file}: ${doc.score.toFixed(4)}`);
       }
-    });
+    }, 30000);
   });
 
   describe("expandQuery", () => {

From 4505d8132e441f90475e0f6ef926a3e44334a5a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 18:31:26 +0000
Subject: [PATCH 14/17] ci(nix): update node module hashes

Refresh fixed-output hashes after moving AST grammar packages into runtime dependencies so Nix CI builds the current locked dependency graph.
---
 flake.nix | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flake.nix b/flake.nix
index 3645013..7fd8014 100644
--- a/flake.nix
+++ b/flake.nix
@@ -44,8 +44,8 @@
         });
 
         nodeModulesHashes = {
-          x86_64-linux = "sha256-D0ezO4vqq4iswcAMU2DCql9ZAQvh3me6N9aDB5roq4w=";
-          aarch64-darwin = "sha256-qU+9KdR/nTocelyANS09I/4yaQ+7s1LvJNqB27IOK/c=";
+          x86_64-linux = "sha256-zee2c7LS+JxpZOpdWG2qyUKlS7EJq2PL/wSo+AewJ9g=";
+          aarch64-darwin = "sha256-qL80cpCrl3BbEWqmYStRuTDJlIIAFW1Y71YbJOeu/f0=";
 
           # Populate these on first build for additional hosts if/when needed.
           aarch64-linux = pkgs.lib.fakeHash;

From b32ee4e66099a63b324b652e0aa8a8d1054dfb71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 18:45:56 +0000
Subject: [PATCH 15/17] test: make CI fixture invocations portable

---
 test/cli.test.ts                  | 1 +
 test/esm-ambiguous-module.test.ts | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/cli.test.ts b/test/cli.test.ts
index d239347..070113b 100644
--- a/test/cli.test.ts
+++ b/test/cli.test.ts
@@ -1438,6 +1438,7 @@ describe("mcp http daemon", () => {
         ...process.env,
         INDEX_PATH: daemonDbPath,
         QMD_CONFIG_DIR: daemonConfigDir,
+        PWD: fixturesDir,
         ...options.env,
       },
       stdio: ["ignore", "pipe", "pipe"],
diff --git a/test/esm-ambiguous-module.test.ts b/test/esm-ambiguous-module.test.ts
index 80e61b7..d4602af 100644
--- a/test/esm-ambiguous-module.test.ts
+++ b/test/esm-ambiguous-module.test.ts
@@ -9,7 +9,7 @@ const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
 
 describe("Node ESM entrypoints", () => {
   test("CLI --index path normalizes via setIndexName/setConfigIndexName under Node 22+", () => {
-    execFileSync("bun", ["run", "build"], {
+    execFileSync("npm", ["run", "build"], {
       cwd: repoRoot,
       encoding: "utf-8",
       stdio: "pipe",

From 669e234d1e4c11fdfde9f6774d7683c6b1fc196b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 18:56:06 +0000
Subject: [PATCH 16/17] test: index MCP HTTP fixture before query

---
 test/cli.test.ts | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/test/cli.test.ts b/test/cli.test.ts
index 070113b..b5758c5 100644
--- a/test/cli.test.ts
+++ b/test/cli.test.ts
@@ -1537,6 +1537,19 @@ describe("mcp http daemon", () => {
     );
     expect(addResult.exitCode).toBe(0);
 
+    const updateResult = await runQmd(
+      ["--index", customIndex, "update"],
+      {
+        dbPath: daemonDbPath,
+        configDir: customConfigDir,
+        env: {
+          INDEX_PATH: "",
+          XDG_CACHE_HOME: customCacheDir,
+        },
+      },
+    );
+    expect(updateResult.exitCode).toBe(0);
+
     const port = randomPort();
     const proc = spawnHttpServer(port, {
       args: ["--index", customIndex],

From e36ab96567553df07ff1ff42cc933b381f166a35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobi=20L=C3=BCtke?= <tobi@lutke.com>
Date: Sat, 9 May 2026 19:03:17 +0000
Subject: [PATCH 17/17] fix: allow HTTP query rerank control

---
 src/mcp/server.ts | 1 +
 test/cli.test.ts  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mcp/server.ts b/src/mcp/server.ts
index a3016e2..2f5482f 100644
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@@ -693,6 +693,7 @@ export async function startMcpHttpServer(
           limit: params.limit ?? 10,
           minScore: params.minScore ?? 0,
           intent: params.intent,
+          rerank: params.rerank,
         });
 
         // Use first lex or vec query for snippet extraction
diff --git a/test/cli.test.ts b/test/cli.test.ts
index b5758c5..9c575f8 100644
--- a/test/cli.test.ts
+++ b/test/cli.test.ts
@@ -1567,7 +1567,7 @@ describe("mcp http daemon", () => {
       const res = await fetch(`http://localhost:${port}/query`, {
         method: "POST",
         headers: { "Content-Type": "application/json" },
-        body: JSON.stringify({ searches: [{ type: "lex", query: "authentication" }], limit: 5 }),
+        body: JSON.stringify({ searches: [{ type: "lex", query: "authentication" }], limit: 5, rerank: false }),
       });
       expect(res.status).toBe(200);
       const body = await res.json();