Merge pull request #636 from tobi/stack/qmd-kanban-fixes-2026-05-09

Integrate QMD fix stack
2026-05-09 16:22:07 -04:00 · 2026-05-09 16:22:07 -04:00 · 746beedb48
commit 746beedb48
parent d58fedf4b5 e36ab96567
24 changed files with 1198 additions and 131 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -4,6 +4,18 @@

 ### Fixes

+- Embedding: `qmd embed -c <collection>` now scopes pending-doc selection
+  to the requested collection instead of embedding global pending work.
+  Scoped `--force` clears only collection-owned vectors, preserves shared
+  hashes referenced by sibling collections, and drops `vectors_vec` only
+  when the scoped clear empties all vectors.
+- Hybrid search: weight RRF lists by query type so original FTS and original vector evidence get the intended 2x boost, instead of accidentally boosting the first lexical expansion. #591
+- MCP: seed llama.cpp/GGML quiet env vars before launching `qmd mcp` so native logs cannot pollute stdio JSON-RPC framing. #593
+- CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index <path>` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634
+- Windows CUDA: serialize llama.cpp embedding/reranking contexts by default to avoid intermittent `ggml-cuda.cu:98` crashes in `qmd query`; set `QMD_EMBED_PARALLELISM` to opt back into parallel contexts if your driver is stable. #519
+- MCP: make `qmd mcp --index <name>` use the selected index for both foreground and daemon HTTP servers instead of falling back to the default store. #343
+- Embedding: respect `QMD_EMBED_MODEL` consistently for vector indexing and vector-backed search, with default-model fallback when unset.
+- Config: use one home-directory resolver for YAML config and the default SQLite cache path, avoiding Windows CLI/MCP split-brain when `HOME` is unset.
 - GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
 - Fix: preserve original filename case in `handelize()`. The previous
  `.toLowerCase()` call made indexed paths unreachable on case-sensitive
@ -12,6 +24,15 @@
 - CLI: make `qmd status` skip native `node-llama-cpp` device probing by
  default so status stays safe on machines with broken or unsupported GPU
  drivers. Set `QMD_STATUS_DEVICE_PROBE=1` to opt in.
+- CLI: lazy-load `node-llama-cpp` so lightweight commands such as
+  `qmd status` do not import native ML dependencies or trigger llama.cpp
+  builds on ARM/no-GPU machines. #491
+- Store: keep content rows referenced by inactive documents during orphan
+  cleanup so `qmd update` preserves soft-deleted tombstones for removed
+  files. #585
+- Packaging: install AST grammar WASM packages as required dependencies so
+  Bun global installs include TypeScript/TSX/JavaScript grammars, and add a
+  `smoke:package-grammars` verification command. #595

 ## [2.1.0] - 2026-04-05

--- a/README.md
+++ b/README.md
@ -797,6 +797,8 @@ llm_cache       -- Cached LLM responses (query expansion, rerank scores)
 | Variable | Default | Description |
 |----------|---------|-------------|
 | `XDG_CACHE_HOME` | `~/.cache` | Cache directory location |
+| `QMD_LLAMA_GPU` | `auto` | Force llama.cpp GPU backend (`metal`, `vulkan`, `cuda`) or disable GPU with `false` |
+| `QMD_EMBED_PARALLELISM` | automatic | Override embedding/reranking context parallelism (1-8). Windows CUDA defaults to `1` because parallel CUDA contexts can crash with `ggml-cuda.cu:98`; use Vulkan or raise this only if your driver is stable. |

 ## How It Works

--- a/bin/qmd
+++ b/bin/qmd
@ -15,6 +15,16 @@ done
 # to avoid native module ABI mismatches (e.g., better-sqlite3 compiled for bun vs node)
 DIR="$(cd -P "$(dirname "$SOURCE")/.." && pwd)"

+# MCP stdio reserves stdout exclusively for JSON-RPC frames. node-llama-cpp
+# / llama.cpp / ggml can write native logs directly to stdout before JS-level
+# log handlers are attached, so seed the native quiet env before Node/Bun imports
+# the CLI and its LLM modules. Preserve explicit user values when provided.
+if [ "$1" = "mcp" ]; then
+  export LLAMA_LOG_LEVEL="${LLAMA_LOG_LEVEL:-error}"
+  export GGML_LOG_LEVEL="${GGML_LOG_LEVEL:-error}"
+  export GGML_BACKEND_SILENT="${GGML_BACKEND_SILENT:-1}"
+fi
+
 # Detect the package manager that installed dependencies by checking lockfiles.
 # $BUN_INSTALL is intentionally NOT checked — it only indicates that bun exists
 # on the system, not that it was used to install this package (see #361).
--- a/bun.lock
+++ b/bun.lock
@ -11,6 +11,10 @@
        "node-llama-cpp": "3.18.1",
        "picomatch": "4.0.4",
        "sqlite-vec": "0.1.9",
+        "tree-sitter-go": "0.23.4",
+        "tree-sitter-python": "0.23.4",
+        "tree-sitter-rust": "0.24.0",
+        "tree-sitter-typescript": "0.23.2",
        "web-tree-sitter": "0.26.7",
        "yaml": "2.8.3",
        "zod": "4.2.1",
@ -26,10 +30,6 @@
        "sqlite-vec-linux-arm64": "0.1.9",
        "sqlite-vec-linux-x64": "0.1.9",
        "sqlite-vec-windows-x64": "0.1.9",
-        "tree-sitter-go": "0.23.4",
-        "tree-sitter-python": "0.23.4",
-        "tree-sitter-rust": "0.24.0",
-        "tree-sitter-typescript": "0.23.2",
      },
      "peerDependencies": {
        "typescript": "^5.9.3",
@ -509,7 +509,7 @@

    "node-abi": ["node-abi@3.87.0", "", { "dependencies": { "semver": "^7.3.5" } }, "sha512-+CGM1L1CgmtheLcBuleyYOn7NWPVu0s0EJH2C4puxgEZb9h8QpR9G2dBfZJOAUhi7VQxuBPMd0hiISWcTyiYyQ=="],

-    "node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+    "node-addon-api": ["node-addon-api@8.7.0", "", {}, "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA=="],

    "node-api-headers": ["node-api-headers@1.8.0", "", {}, "sha512-jfnmiKWjRAGbdD1yQS28bknFM1tbHC1oucyuMPjmkEs+kpiu76aRs40WlTmBmyEgzDM76ge1DQ7XJ3R5deiVjQ=="],

@ -773,8 +773,6 @@

    "micromatch/picomatch": ["picomatch@2.3.1", "", {}, "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA=="],

-    "node-llama-cpp/node-addon-api": ["node-addon-api@8.7.0", "", {}, "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA=="],
-
    "ora/cli-spinners": ["cli-spinners@3.4.0", "", {}, "sha512-bXfOC4QcT1tKXGorxL3wbJm6XJPDqEnij2gQ2m7ESQuE+/z9YFIWnl/5RpTiKWbMq3EVKR4fRLJGn6DVfu0mpw=="],

    "postcss/nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
@ -793,6 +791,16 @@

    "tinyglobby/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],

+    "tree-sitter-go/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
+    "tree-sitter-javascript/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
+    "tree-sitter-python/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
+    "tree-sitter-rust/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
+    "tree-sitter-typescript/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
+
    "vite/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],

    "vitest/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],
--- a/flake.nix
+++ b/flake.nix
@ -44,8 +44,8 @@
        });

        nodeModulesHashes = {
-          x86_64-linux = "sha256-D0ezO4vqq4iswcAMU2DCql9ZAQvh3me6N9aDB5roq4w=";
-          aarch64-darwin = "sha256-qU+9KdR/nTocelyANS09I/4yaQ+7s1LvJNqB27IOK/c=";
+          x86_64-linux = "sha256-zee2c7LS+JxpZOpdWG2qyUKlS7EJq2PL/wSo+AewJ9g=";
+          aarch64-darwin = "sha256-qL80cpCrl3BbEWqmYStRuTDJlIIAFW1Y71YbJOeu/f0=";

          # Populate these on first build for additional hosts if/when needed.
          aarch64-linux = pkgs.lib.fakeHash;
--- a/package.json
+++ b/package.json
@ -17,6 +17,7 @@
  "files": [
    "bin/",
    "dist/",
+    "scripts/check-package-grammars.mjs",
    "LICENSE",
    "CHANGELOG.md"
  ],
@ -31,7 +32,8 @@
    "vsearch": "tsx src/cli/qmd.ts vsearch",
    "rerank": "tsx src/cli/qmd.ts rerank",
    "inspector": "npx @modelcontextprotocol/inspector tsx src/cli/qmd.ts mcp",
-    "release": "./scripts/release.sh"
+    "release": "./scripts/release.sh",
+    "smoke:package-grammars": "node scripts/check-package-grammars.mjs"
  },
  "publishConfig": {
    "access": "public"
@ -53,18 +55,18 @@
    "sqlite-vec": "0.1.9",
    "web-tree-sitter": "0.26.7",
    "yaml": "2.8.3",
-    "zod": "4.2.1"
+    "zod": "4.2.1",
+    "tree-sitter-go": "0.23.4",
+    "tree-sitter-python": "0.23.4",
+    "tree-sitter-rust": "0.24.0",
+    "tree-sitter-typescript": "0.23.2"
  },
  "optionalDependencies": {
    "sqlite-vec-darwin-arm64": "0.1.9",
    "sqlite-vec-darwin-x64": "0.1.9",
    "sqlite-vec-linux-arm64": "0.1.9",
    "sqlite-vec-linux-x64": "0.1.9",
-    "sqlite-vec-windows-x64": "0.1.9",
-    "tree-sitter-go": "0.23.4",
-    "tree-sitter-python": "0.23.4",
-    "tree-sitter-rust": "0.24.0",
-    "tree-sitter-typescript": "0.23.2"
+    "sqlite-vec-windows-x64": "0.1.9"
  },
  "devDependencies": {
    "@types/better-sqlite3": "7.6.13",
--- a/scripts/check-package-grammars.mjs
+++ b/scripts/check-package-grammars.mjs
@ -0,0 +1,29 @@
+#!/usr/bin/env node
+import { createRequire } from "node:module";
+
+const require = createRequire(import.meta.url);
+
+const grammars = [
+  "tree-sitter-typescript/tree-sitter-typescript.wasm",
+  "tree-sitter-typescript/tree-sitter-tsx.wasm",
+  "tree-sitter-python/tree-sitter-python.wasm",
+  "tree-sitter-go/tree-sitter-go.wasm",
+  "tree-sitter-rust/tree-sitter-rust.wasm",
+];
+
+let ok = true;
+for (const grammar of grammars) {
+  try {
+    const resolved = require.resolve(grammar);
+    console.log(`ok ${grammar} -> ${resolved}`);
+  } catch (err) {
+    ok = false;
+    console.error(`missing ${grammar}`);
+    console.error(err instanceof Error ? err.message : String(err));
+  }
+}
+
+if (!ok) {
+  console.error("\nAST grammar package smoke check failed. Run `bun install` locally or repair a broken global install with the matching `bun add tree-sitter-...@<version>` command shown by `qmd status`.");
+  process.exit(1);
+}
--- a/src/ast.ts
+++ b/src/ast.ts
@ -63,15 +63,22 @@ export function detectLanguage(filepath: string): SupportedLanguage | null {
 /**
 * Maps language to the npm package and wasm filename for the grammar.
 */
-const GRAMMAR_MAP: Record<SupportedLanguage, { pkg: string; wasm: string }> = {
-  typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" },
-  tsx:        { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm" },
-  javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" },
-  python:     { pkg: "tree-sitter-python",     wasm: "tree-sitter-python.wasm" },
-  go:         { pkg: "tree-sitter-go",         wasm: "tree-sitter-go.wasm" },
-  rust:       { pkg: "tree-sitter-rust",        wasm: "tree-sitter-rust.wasm" },
+const GRAMMAR_MAP: Record<SupportedLanguage, { pkg: string; wasm: string; version: string }> = {
+  typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
+  tsx:        { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm",        version: "0.23.2" },
+  javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
+  python:     { pkg: "tree-sitter-python",     wasm: "tree-sitter-python.wasm",     version: "0.23.4" },
+  go:         { pkg: "tree-sitter-go",         wasm: "tree-sitter-go.wasm",         version: "0.23.4" },
+  rust:       { pkg: "tree-sitter-rust",       wasm: "tree-sitter-rust.wasm",       version: "0.24.0" },
 };

+export function formatGrammarLoadError(language: SupportedLanguage, err: unknown): string {
+  const grammar = GRAMMAR_MAP[language];
+  const detail = err instanceof Error ? err.message : String(err);
+  return `${grammar.pkg}/${grammar.wasm} failed to load (${detail}); falling back to regex chunking. ` +
+    `Repair a broken global install with: bun add ${grammar.pkg}@${grammar.version}`;
+}
+
 // =============================================================================
 // Per-Language Query Definitions
 // =============================================================================
@ -176,6 +183,9 @@ let initPromise: Promise<void> | null = null;
 /** Languages that have already failed to load — warn only once per process. */
 const failedLanguages = new Set<string>();

+/** Last grammar load error by language, for status output. */
+const grammarLoadErrors = new Map<SupportedLanguage, string>();
+
 /** Cached grammar load promises. */
 const grammarCache = new Map<string, Promise<LanguageType>>();

@ -228,7 +238,9 @@ async function loadGrammar(language: SupportedLanguage): Promise<LanguageType |
  } catch (err) {
    failedLanguages.add(language);
    grammarCache.delete(wasmKey);
-    console.warn(`[qmd] Failed to load tree-sitter grammar for ${language}: ${err}`);
+    const message = formatGrammarLoadError(language, err);
+    grammarLoadErrors.set(language, message);
+    console.warn(`[qmd] AST grammar unavailable for ${language}: ${message}`);
    return null;
  }
 }
@ -345,7 +357,7 @@ export async function getASTStatus(): Promise<{
        getQuery(lang, grammar);
        languages.push({ language: lang, available: true });
      } else {
-        languages.push({ language: lang, available: false, error: "grammar failed to load" });
+        languages.push({ language: lang, available: false, error: grammarLoadErrors.get(lang) ?? "grammar failed to load" });
      }
    } catch (err) {
      languages.push({
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@ -3,7 +3,7 @@ import type { Database } from "../db.js";
 import fastGlob from "fast-glob";
 import { execSync, spawn as nodeSpawn } from "child_process";
 import { fileURLToPath } from "url";
-import { dirname, join as pathJoin, relative as relativePath } from "path";
+import { dirname, join as pathJoin, relative as relativePath, resolve as pathResolve } from "path";
 import { parseArgs } from "util";
 import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync } from "fs";
 import { createInterface } from "readline/promises";
@ -173,9 +173,7 @@ function setIndexName(name: string | null): void {
  let normalizedName = name;
  // Normalize relative paths to prevent malformed database paths
  if (name && name.includes('/')) {
-    const { resolve } = require('path');
-    const { cwd } = require('process');
-    const absolutePath = resolve(cwd(), name);
+    const absolutePath = pathResolve(process.cwd(), name);
    // Replace path separators with underscores to create a valid filename
    normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
  }
@ -1681,10 +1679,14 @@ function parseChunkStrategy(value: unknown): ChunkStrategy | undefined {
  throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`);
 }

+export function resolveEmbedModelForCli(): string {
+  return process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL_URI;
+}
+
 async function vectorIndex(
-  model: string = DEFAULT_EMBED_MODEL_URI,
+  model: string = resolveEmbedModelForCli(),
  force: boolean = false,
-  batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy },
+  batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy; collection?: string },
 ): Promise<void> {
  const storeInstance = getStore();
  const db = storeInstance.db;
@ -1694,7 +1696,7 @@ async function vectorIndex(
  }

  // Check if there's work to do before starting
-  const hashesToEmbed = getHashesNeedingEmbedding(db);
+  const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection);
  if (hashesToEmbed === 0 && !force) {
    console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
    closeDb();
@ -1715,6 +1717,7 @@ async function vectorIndex(
  const result = await generateEmbeddings(storeInstance, {
    force,
    model,
+    collection: batchOptions?.collection,
    maxDocsPerBatch: batchOptions?.maxDocsPerBatch,
    maxBatchBytes: batchOptions?.maxBatchBytes,
    chunkStrategy: batchOptions?.chunkStrategy,
@ -2727,7 +2730,7 @@ function showHelp(): void {
  console.log("Maintenance:");
  console.log("  qmd status                    - View index + collection health");
  console.log("  qmd update [--pull]           - Re-index collections (optionally git pull first)");
-  console.log("  qmd embed [-f]                - Generate/refresh vector embeddings");
+  console.log("  qmd embed [-f] [-c <name>]    - Generate/refresh vector embeddings");
  console.log("    --max-docs-per-batch <n>    - Cap docs loaded into memory per embedding batch");
  console.log("    --max-batch-mb <n>          - Cap UTF-8 MB loaded into memory per embedding batch");
  console.log("  qmd cleanup                   - Clear caches, vacuum DB");
@ -3120,10 +3123,17 @@ if (isMain) {
        const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
        const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
        const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
-        await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
+        // Validate -c against configured collections before dispatching, so a
+        // typo errors with "Collection not found: X" instead of silently
+        // reporting success because no pending docs match a nonexistent name.
+        // embed operates on a single collection; only the first value is used.
+        const embedValidatedCollections = resolveCollectionFilter(cli.opts.collection, false);
+        const embedCollection = embedValidatedCollections[0];
+        await vectorIndex(resolveEmbedModelForCli(), !!cli.values.force, {
          maxDocsPerBatch,
          maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
          chunkStrategy: embedChunkStrategy,
+          collection: embedCollection,
        });
      } catch (error) {
        console.error(error instanceof Error ? error.message : String(error));
@ -3247,9 +3257,10 @@ if (isMain) {
          const logPath = resolve(cacheDir, "mcp.log");
          const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
          const selfPath = fileURLToPath(import.meta.url);
+          const indexArgs = cli.values.index ? ["--index", String(cli.values.index)] : [];
          const spawnArgs = selfPath.endsWith(".ts")
-            ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, "mcp", "--http", "--port", String(port)]
-            : [selfPath, "mcp", "--http", "--port", String(port)];
+            ? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)]
+            : [selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)];
          const child = nodeSpawn(process.execPath, spawnArgs, {
            stdio: ["ignore", logFd, logFd],
            detached: true,
@ -3269,7 +3280,7 @@ if (isMain) {
        process.removeAllListeners("SIGINT");
        const { startMcpHttpServer } = await import("../mcp/server.js");
        try {
-          await startMcpHttpServer(port);
+          await startMcpHttpServer(port, { dbPath: getDbPath() });
        } catch (e: any) {
          if (e?.code === "EADDRINUSE") {
            console.error(`Port ${port} already in use. Try a different port with --port.`);
@ -3280,7 +3291,7 @@ if (isMain) {
      } else {
        // Default: stdio transport
        const { startMcpServer } = await import("../mcp/server.js");
-        await startMcpServer();
+        await startMcpServer({ dbPath: getDbPath() });
      }
      break;
    }
--- a/src/collections.ts
+++ b/src/collections.ts
@ -6,8 +6,8 @@
 */

 import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
-import { join, dirname } from "path";
-import { homedir } from "os";
+import { join, dirname, resolve } from "path";
+import { qmdHomedir } from "./paths.js";
 import YAML from "yaml";

 // ============================================================================
@ -101,9 +101,7 @@ export function setConfigSource(source?: { configPath?: string; config?: Collect
 export function setConfigIndexName(name: string): void {
  // Resolve relative paths to absolute paths and sanitize for use as filename
  if (name.includes('/')) {
-    const { resolve } = require('path');
-    const { cwd } = require('process');
-    const absolutePath = resolve(cwd(), name);
+    const absolutePath = resolve(process.cwd(), name);
    // Replace path separators with underscores to create a valid filename
    currentIndexName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
  } else {
@ -120,7 +118,7 @@ function getConfigDir(): string {
  if (process.env.XDG_CONFIG_HOME) {
    return join(process.env.XDG_CONFIG_HOME, "qmd");
  }
-  return join(homedir(), ".config", "qmd");
+  return join(qmdHomedir(), ".config", "qmd");
 }

 function getConfigFilePath(): string {
--- a/src/index.ts
+++ b/src/index.ts
@ -290,6 +290,8 @@ export interface QMDStore {
  embed(options?: {
    force?: boolean;
    model?: string;
+    /** Restrict embedding to documents in one collection. */
+    collection?: string;
    maxDocsPerBatch?: number;
    maxBatchBytes?: number;
    chunkStrategy?: ChunkStrategy;
@ -516,6 +518,7 @@ export async function createStore(options: StoreOptions): Promise<QMDStore> {
      return generateEmbeddings(internal, {
        force: embedOpts?.force,
        model: embedOpts?.model,
+        collection: embedOpts?.collection,
        maxDocsPerBatch: embedOpts?.maxDocsPerBatch,
        maxBatchBytes: embedOpts?.maxBatchBytes,
        chunkStrategy: embedOpts?.chunkStrategy,
--- a/src/llm.ts
+++ b/src/llm.ts
@ -4,16 +4,28 @@
 * Provides embeddings, text generation, and reranking using local GGUF models.
 */

-import {
-  getLlama,
-  resolveModelFile,
-  LlamaChatSession,
-  LlamaLogLevel,
-  type Llama,
-  type LlamaModel,
-  type LlamaEmbeddingContext,
-  type Token as LlamaToken,
+import type {
+  Llama,
+  LlamaModel,
+  LlamaEmbeddingContext,
+  Token as LlamaToken,
 } from "node-llama-cpp";
+
+type NodeLlamaCppModule = {
+  getLlama: (options: Record<string, unknown>) => Promise<Llama>;
+  resolveModelFile: (model: string, cacheDir: string) => Promise<string>;
+  LlamaChatSession: new (options: { contextSequence: unknown }) => {
+    prompt: (prompt: string, options?: Record<string, unknown>) => Promise<string>;
+  };
+  LlamaLogLevel: { error: unknown };
+};
+
+let nodeLlamaCppImport: Promise<NodeLlamaCppModule> | null = null;
+async function loadNodeLlamaCpp(): Promise<NodeLlamaCppModule> {
+  nodeLlamaCppImport ??= import("node-llama-cpp") as Promise<NodeLlamaCppModule>;
+  return nodeLlamaCppImport;
+}
+
 import { homedir } from "os";
 import { join } from "path";
 import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync, writeFileSync, openSync, readSync, closeSync } from "fs";
@ -344,6 +356,7 @@ export async function pullModels(
      }
    }

+    const { resolveModelFile } = await loadNodeLlamaCpp();
    const path = await resolveModelFile(model, cacheDir);
    validateGgufFile(path, model);
    const sizeBytes = existsSync(path) ? statSync(path).size : 0;
@ -438,7 +451,41 @@ export type LlamaCppConfig = {
 const DEFAULT_INACTIVITY_TIMEOUT_MS = 5 * 60 * 1000;
 const DEFAULT_EXPAND_CONTEXT_SIZE = 2048;

-type LlamaGpuMode = "auto" | "metal" | "vulkan" | "cuda" | false;
+export type LlamaGpuMode = "auto" | "metal" | "vulkan" | "cuda" | false;
+
+type ParallelismOptions = {
+  gpu: string | false;
+  platform?: NodeJS.Platform;
+  computed: number;
+  envValue?: string;
+};
+
+export function resolveParallelismOverride(envValue = process.env.QMD_EMBED_PARALLELISM): number | undefined {
+  const normalized = envValue?.trim() ?? "";
+  if (!normalized) return undefined;
+
+  const parsed = Number(normalized);
+  if (!Number.isInteger(parsed) || parsed < 1) {
+    process.stderr.write(`QMD Warning: invalid QMD_EMBED_PARALLELISM="${envValue}", using automatic parallelism.\n`);
+    return undefined;
+  }
+
+  return Math.min(8, parsed);
+}
+
+export function resolveSafeParallelism(options: ParallelismOptions): number {
+  const override = resolveParallelismOverride(options.envValue);
+  if (override !== undefined) return override;
+
+  // node-llama-cpp/llama.cpp CUDA on Windows is unstable with multiple
+  // simultaneous contexts (ggml-cuda.cu:98 in #519). Vulkan and CPU do not
+  // show the same failure mode, so only serialize Windows CUDA by default.
+  if ((options.platform ?? process.platform) === "win32" && options.gpu === "cuda") {
+    return 1;
+  }
+
+  return Math.max(1, options.computed);
+}

 export function resolveLlamaGpuMode(envValue = process.env.QMD_LLAMA_GPU): LlamaGpuMode {
  const normalized = envValue?.trim().toLowerCase() ?? "";
@ -619,6 +666,7 @@ export class LlamaCpp implements LLM {
    if (!this.llama) {
      const gpuMode = resolveLlamaGpuMode();

+      const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
      const loadLlama = async (gpu: LlamaGpuMode) =>
        await getLlama({
          build: allowBuild ? "autoAttempt" : "never",
@ -661,6 +709,7 @@ export class LlamaCpp implements LLM {
  private async resolveModel(modelUri: string): Promise<string> {
    this.ensureModelCacheDir();
    // resolveModelFile handles HF URIs and downloads to the cache dir
+    const { resolveModelFile } = await loadNodeLlamaCpp();
    const modelPath = await resolveModelFile(modelUri, this.modelCacheDir);
    validateGgufFile(modelPath, modelUri);
    return modelPath;
@ -711,16 +760,18 @@ export class LlamaCpp implements LLM {
        const vram = await llama.getVramState();
        const freeMB = vram.free / (1024 * 1024);
        const maxByVram = Math.floor((freeMB * 0.25) / perContextMB);
-        return Math.max(1, Math.min(8, maxByVram));
+        const computed = Math.max(1, Math.min(8, maxByVram));
+        return resolveSafeParallelism({ gpu: llama.gpu, computed });
      } catch {
-        return 2;
+        return resolveSafeParallelism({ gpu: llama.gpu, computed: 2 });
      }
    }

    // CPU: split cores across contexts. At least 4 threads per context.
    const cores = llama.cpuMathCores || 4;
    const maxContexts = Math.floor(cores / 4);
-    return Math.max(1, Math.min(4, maxContexts));
+    const computed = Math.max(1, Math.min(4, maxContexts));
+    return resolveSafeParallelism({ gpu: false, computed });
  }

  /**
@ -1079,6 +1130,7 @@ export class LlamaCpp implements LLM {
    // Create fresh context -> sequence -> session for each call
    const context = await this.generateModel!.createContext();
    const sequence = context.getSequence();
+    const { LlamaChatSession } = await loadNodeLlamaCpp();
    const session = new LlamaChatSession({ contextSequence: sequence });

    const maxTokens = options.maxTokens ?? 150;
@ -1158,6 +1210,7 @@ export class LlamaCpp implements LLM {
      contextSize: this.expandContextSize,
    });
    const sequence = genContext.getSequence();
+    const { LlamaChatSession } = await loadNodeLlamaCpp();
    const session = new LlamaChatSession({ contextSequence: sequence });

    try {
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@ -538,7 +538,11 @@ Intent-aware lex (C++ performance, not sports):
 // Transport: stdio (default)
 // =============================================================================

-export async function startMcpServer(): Promise<void> {
+export type McpStartupOptions = {
+  dbPath?: string;
+};
+
+export async function startMcpServer(options: McpStartupOptions = {}): Promise<void> {
  // Opt into production mode when the MCP server is actually started, not
  // when this module is merely imported for its exports. Importing the module
  // at the top level flipped the global production flag and broke test
@ -547,7 +551,7 @@ export async function startMcpServer(): Promise<void> {
  enableProductionMode();
  const configPath = getConfigPath();
  const store = await createStore({
-    dbPath: getDefaultDbPath(),
+    dbPath: options.dbPath ?? getDefaultDbPath(),
    ...(existsSync(configPath) ? { configPath } : {}),
  });
  const server = await createMcpServer(store);
@ -569,14 +573,17 @@ export type HttpServerHandle = {
 * Start MCP server over Streamable HTTP (JSON responses, no SSE).
 * Binds to localhost only. Returns a handle for shutdown and port discovery.
 */
-export async function startMcpHttpServer(port: number, options?: { quiet?: boolean }): Promise<HttpServerHandle> {
+export async function startMcpHttpServer(
+  port: number,
+  options: ({ quiet?: boolean } & McpStartupOptions) = {},
+): Promise<HttpServerHandle> {
  // See startMcpServer() for the rationale — flip production mode here so the
  // HTTP transport resolves the real database path, without leaking state into
  // callers that only import this module for its exports (e.g. tests).
  enableProductionMode();
  const configPath = getConfigPath();
  const store = await createStore({
-    dbPath: getDefaultDbPath(),
+    dbPath: options.dbPath ?? getDefaultDbPath(),
    ...(existsSync(configPath) ? { configPath } : {}),
  });

@ -686,6 +693,7 @@ export async function startMcpHttpServer(port: number, options?: { quiet?: boole
          limit: params.limit ?? 10,
          minScore: params.minScore ?? 0,
          intent: params.intent,
+          rerank: params.rerank,
        });

        // Use first lex or vec query for snippet extraction
--- a/src/paths.ts
+++ b/src/paths.ts
@ -0,0 +1,5 @@
+import { homedir as osHomedir } from "node:os";
+
+export function qmdHomedir(): string {
+  return process.env.HOME || process.env.USERPROFILE || osHomedir() || "/tmp";
+}
--- a/src/store.ts
+++ b/src/store.ts
@ -18,6 +18,7 @@ import { createHash } from "crypto";
 import { readFileSync, realpathSync, statSync, mkdirSync } from "node:fs";
 // Note: node:path resolve is not imported — we export our own cross-platform resolve()
 import fastGlob from "fast-glob";
+import { qmdHomedir } from "./paths.js";
 import {
  LlamaCpp,
  getDefaultLlamaCpp,
@ -38,7 +39,6 @@ import type {
 // Configuration
 // =============================================================================

-const HOME = process.env.HOME || process.env.USERPROFILE || "/tmp";
 export const DEFAULT_EMBED_MODEL = "embeddinggemma";
 export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
 export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
@ -334,7 +334,7 @@ export type ExpandedQuery = {
 // =============================================================================

 export function homedir(): string {
-  return HOME;
+  return qmdHomedir();
 }

 /**
@ -733,6 +733,73 @@ export function verifySqliteVecLoaded(db: Database): void {

 let _sqliteVecAvailable: boolean | null = null;

+const CJK_CHAR_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
+const CJK_RUN_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]+/gu;
+const FTS_CJK_NORMALIZED_VERSION = "1";
+
+/**
+ * FTS5's unicode61 tokenizer does not segment CJK text into searchable words.
+ * Normalize CJK runs by spacing every character so exact CJK queries can be
+ * translated into phrase queries while Latin text keeps the default tokenizer.
+ */
+export function normalizeCjkForFTS(text: string): string {
+  return text.replace(CJK_RUN_PATTERN, run => ` ${Array.from(run).join(' ')} `);
+}
+
+function containsCjk(text: string): boolean {
+  return CJK_CHAR_PATTERN.test(text);
+}
+
+function sanitizeFTS5Phrase(phrase: string): string {
+  return normalizeCjkForFTS(phrase)
+    .split(/\s+/)
+    .map(t => sanitizeFTS5Term(t))
+    .filter(t => t)
+    .join(' ');
+}
+
+function rebuildFTSForCjkNormalization(db: Database): void {
+  const version = db.prepare(`SELECT value FROM store_config WHERE key = 'fts_cjk_normalized_version'`).get() as { value?: string } | undefined;
+  if (version?.value === FTS_CJK_NORMALIZED_VERSION) return;
+
+  try {
+    db.exec(`DELETE FROM documents_fts WHERE rowid >= 0`);
+  } catch {
+    // Some older/corrupt FTS5 shadow-table states can reject bulk deletes even
+    // though reads still work. Recreate the virtual table; documents_fts is a
+    // derived index, so rebuilding it from documents/content is safe.
+    db.exec(`DROP TABLE IF EXISTS documents_fts`);
+    db.exec(`
+      CREATE VIRTUAL TABLE documents_fts USING fts5(
+        filepath, title, body,
+        tokenize='porter unicode61'
+      )
+    `);
+  }
+  const rows = db.prepare(`
+    SELECT d.id, d.collection, d.path, d.title, content.doc as body
+    FROM documents d
+    JOIN content ON content.hash = d.hash
+    WHERE d.active = 1
+  `).all() as { id: number; collection: string; path: string; title: string; body: string }[];
+  const insert = db.prepare(`INSERT INTO documents_fts(rowid, filepath, title, body) VALUES (?, ?, ?, ?)`);
+  const rebuild = db.transaction(() => {
+    for (const row of rows) {
+      insert.run(
+        row.id,
+        normalizeCjkForFTS(`${row.collection}/${row.path}`),
+        normalizeCjkForFTS(row.title),
+        normalizeCjkForFTS(row.body)
+      );
+    }
+  });
+  rebuild();
+  db.prepare(`
+    INSERT OR REPLACE INTO store_config(key, value)
+    VALUES ('fts_cjk_normalized_version', ?)
+  `).run(FTS_CJK_NORMALIZED_VERSION);
+}
+
 function initializeDatabase(db: Database): void {
  try {
    loadSqliteVec(db);
@ -838,9 +905,12 @@ function initializeDatabase(db: Database): void {
    )
  `);

-  // Triggers to keep FTS in sync
+  // Triggers keep FTS in sync for callers that write directly to documents.
+  // Production indexing paths rebuild entries in TypeScript so CJK text can be
+  // normalized before it reaches the unicode61 tokenizer.
+  db.exec(`DROP TRIGGER IF EXISTS documents_ai`);
  db.exec(`
-    CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents
+    CREATE TRIGGER documents_ai AFTER INSERT ON documents
    WHEN new.active = 1
    BEGIN
      INSERT INTO documents_fts(rowid, filepath, title, body)
@ -853,14 +923,16 @@ function initializeDatabase(db: Database): void {
    END
  `);

+  db.exec(`DROP TRIGGER IF EXISTS documents_ad`);
  db.exec(`
-    CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
+    CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN
      DELETE FROM documents_fts WHERE rowid = old.id;
    END
  `);

+  db.exec(`DROP TRIGGER IF EXISTS documents_au`);
  db.exec(`
-    CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents
+    CREATE TRIGGER documents_au AFTER UPDATE ON documents
    BEGIN
      -- Delete from FTS if no longer active
      DELETE FROM documents_fts WHERE rowid = old.id AND new.active = 0;
@ -875,6 +947,8 @@ function initializeDatabase(db: Database): void {
      WHERE new.active = 1;
    END
  `);
+
+  rebuildFTSForCjkNormalization(db);
 }

 // =============================================================================
@ -1300,6 +1374,11 @@ export type EmbedResult = {
 export type EmbedOptions = {
  force?: boolean;
  model?: string;
+  /**
+   * Restrict embedding to documents in a single collection.
+   * When omitted, all pending documents across every collection are embedded.
+   */
+  collection?: string;
  maxDocsPerBatch?: number;
  maxBatchBytes?: number;
  chunkStrategy?: ChunkStrategy;
@ -1341,16 +1420,18 @@ function resolveEmbedOptions(options?: EmbedOptions): Required<Pick<EmbedOptions
  };
 }

-function getPendingEmbeddingDocs(db: Database): PendingEmbeddingDoc[] {
-  return db.prepare(`
+function getPendingEmbeddingDocs(db: Database, collection?: string): PendingEmbeddingDoc[] {
+  const collectionFilter = collection ? `AND d.collection = ?` : ``;
+  const stmt = db.prepare(`
    SELECT d.hash, MIN(d.path) as path, length(CAST(c.doc AS BLOB)) as bytes
    FROM documents d
    JOIN content c ON d.hash = c.hash
    LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
-    WHERE d.active = 1 AND v.hash IS NULL
+    WHERE d.active = 1 AND v.hash IS NULL ${collectionFilter}
    GROUP BY d.hash
    ORDER BY MIN(d.path)
-  `).all() as PendingEmbeddingDoc[];
+  `);
+  return (collection ? stmt.all(collection) : stmt.all()) as PendingEmbeddingDoc[];
 }

 function buildEmbeddingBatches(
@ -1411,16 +1492,17 @@ export async function generateEmbeddings(
  options?: EmbedOptions
 ): Promise<EmbedResult> {
  const db = store.db;
-  const model = options?.model ?? DEFAULT_EMBED_MODEL;
+  const llm = getLlm(store);
+  const model = options?.model ?? llm.embedModelName ?? DEFAULT_EMBED_MODEL;
  const now = new Date().toISOString();
  const { maxDocsPerBatch, maxBatchBytes } = resolveEmbedOptions(options);
  const encoder = new TextEncoder();

  if (options?.force) {
-    clearAllEmbeddings(db);
+    clearAllEmbeddings(db, options?.collection);
  }

-  const docsToEmbed = getPendingEmbeddingDocs(db);
+  const docsToEmbed = getPendingEmbeddingDocs(db, options?.collection);

  if (docsToEmbed.length === 0) {
    return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
@ -1430,8 +1512,7 @@ export async function generateEmbeddings(
  const startTime = Date.now();

  // Use store's LlamaCpp or global singleton, wrapped in a session
-  const llm = getLlm(store);
-  const embedModelUri = llm.embedModelName;
+  const embedModelUri = model;

  // Create a session manager for this llm instance
  const result = await withLLMSessionForLlm(llm, async (session) => {
@ -1868,13 +1949,15 @@ export type IndexStatus = {
 // Index health
 // =============================================================================

-export function getHashesNeedingEmbedding(db: Database): number {
-  const result = db.prepare(`
+export function getHashesNeedingEmbedding(db: Database, collection?: string): number {
+  const collectionFilter = collection ? `AND d.collection = ?` : ``;
+  const stmt = db.prepare(`
    SELECT COUNT(DISTINCT d.hash) as count
    FROM documents d
    LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
-    WHERE d.active = 1 AND v.hash IS NULL
-  `).get() as { count: number };
+    WHERE d.active = 1 AND v.hash IS NULL ${collectionFilter}
+  `);
+  const result = (collection ? stmt.get(collection) : stmt.get()) as { count: number };
  return result.count;
 }

@ -1949,13 +2032,15 @@ export function deleteInactiveDocuments(db: Database): number {
 }

 /**
- * Remove orphaned content hashes that are not referenced by any active document.
+ * Remove orphaned content hashes that are not referenced by any document.
+ * Inactive documents are soft-deleted tombstones, so their content rows must
+ * remain referenced until deleteInactiveDocuments() hard-deletes them.
 * Returns the number of orphaned content hashes deleted.
 */
 export function cleanupOrphanedContent(db: Database): number {
  const result = db.prepare(`
    DELETE FROM content
-    WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
+    WHERE hash NOT IN (SELECT DISTINCT hash FROM documents)
  `).run();
  return result.changes;
 }
@ -2077,6 +2162,28 @@ export function insertContent(db: Database, hash: string, content: string, creat
    .run(hash, content, createdAt);
 }

+function rebuildDocumentFTS(db: Database, documentId: number): void {
+  const row = db.prepare(`
+    SELECT d.id, d.collection, d.path, d.title, content.doc as body
+    FROM documents d
+    JOIN content ON content.hash = d.hash
+    WHERE d.id = ? AND d.active = 1
+  `).get(documentId) as { id: number; collection: string; path: string; title: string; body: string } | undefined;
+
+  db.prepare(`DELETE FROM documents_fts WHERE rowid = ?`).run(documentId);
+  if (!row) return;
+
+  db.prepare(`
+    INSERT INTO documents_fts(rowid, filepath, title, body)
+    VALUES (?, ?, ?, ?)
+  `).run(
+    row.id,
+    normalizeCjkForFTS(`${row.collection}/${row.path}`),
+    normalizeCjkForFTS(row.title),
+    normalizeCjkForFTS(row.body)
+  );
+}
+
 /**
 * Insert a new document into the documents table.
 */
@ -2098,6 +2205,9 @@ export function insertDocument(
      modified_at = excluded.modified_at,
      active = 1
  `).run(collectionName, path, title, hash, createdAt, modifiedAt);
+
+  const row = db.prepare(`SELECT id FROM documents WHERE collection = ? AND path = ?`).get(collectionName, path) as { id: number } | undefined;
+  if (row) rebuildDocumentFTS(db, row.id);
 }

 /**
@ -2116,8 +2226,8 @@ export function findActiveDocument(
 }

 /**
- * Find an active document, falling back to a legacy lowercase path.
- * If found under the legacy path, renames it in-place and rebuilds the
+ * Find an active document, falling back to a case-insensitive path match.
+ * If found under a different casing, renames it in-place and rebuilds the
 * FTS entry. Embeddings are keyed by content hash, so the rename is
 * safe — no re-embedding required.
 *
@ -2132,10 +2242,12 @@ export function findOrMigrateLegacyDocument(
  const existing = findActiveDocument(db, collectionName, path);
  if (existing) return existing;

-  const legacyPath = path.toLowerCase();
-  if (legacyPath === path) return null;
-
-  const legacy = findActiveDocument(db, collectionName, legacyPath);
+  const legacy = db.prepare(`
+    SELECT id, hash, title FROM documents
+    WHERE collection = ? AND path COLLATE NOCASE = ? AND active = 1
+    ORDER BY id
+    LIMIT 1
+  `).get(collectionName, path) as { id: number; hash: string; title: string } | undefined;
  if (!legacy) return null;

  // Wrap rename + FTS rebuild in a transaction for atomicity.
@ -2148,15 +2260,7 @@ export function findOrMigrateLegacyDocument(

    if (result.changes === 0) return false;

-    // FTS5 does not reliably update via the documents_au trigger's
-    // INSERT OR REPLACE. Manually rebuild the FTS entry.
-    db.prepare(`DELETE FROM documents_fts WHERE rowid = ?`).run(legacy.id);
-    db.prepare(`
-      INSERT INTO documents_fts(rowid, filepath, title, body)
-      SELECT id, collection || '/' || path, title,
-             (SELECT doc FROM content WHERE hash = documents.hash)
-      FROM documents WHERE id = ?
-    `).run(legacy.id);
+    rebuildDocumentFTS(db, legacy.id);

    return true;
  });
@ -2177,6 +2281,7 @@ export function updateDocumentTitle(
 ): void {
  db.prepare(`UPDATE documents SET title = ?, modified_at = ? WHERE id = ?`)
    .run(title, modifiedAt, documentId);
+  rebuildDocumentFTS(db, documentId);
 }

 /**
@ -2192,6 +2297,7 @@ export function updateDocument(
 ): void {
  db.prepare(`UPDATE documents SET title = ?, hash = ?, modified_at = ? WHERE id = ?`)
    .run(title, hash, modifiedAt, documentId);
+  rebuildDocumentFTS(db, documentId);
 }

 /**
@ -2940,7 +3046,7 @@ function buildFTS5Query(query: string): string | null {
      const phrase = s.slice(start, i).trim();
      i++; // skip closing quote
      if (phrase.length > 0) {
-        const sanitized = phrase.split(/\s+/).map(t => sanitizeFTS5Term(t)).filter(t => t).join(' ');
+        const sanitized = sanitizeFTS5Phrase(phrase);
        if (sanitized) {
          const ftsPhrase = `"${sanitized}"`;  // Exact phrase, no prefix match
          if (negated) {
@ -2968,6 +3074,16 @@ function buildFTS5Query(query: string): string | null {
            positive.push(ftsPhrase);
          }
        }
+      } else if (containsCjk(term)) {
+        const sanitized = sanitizeFTS5Phrase(term);
+        if (sanitized) {
+          const ftsPhrase = `"${sanitized}"`;  // CJK phrase over character tokens
+          if (negated) {
+            negative.push(ftsPhrase);
+          } else {
+            positive.push(ftsPhrase);
+          }
+        }
      } else {
        const sanitized = sanitizeFTS5Term(term);
        if (sanitized) {
@ -3212,12 +3328,68 @@ export function getHashesForEmbedding(db: Database): { hash: string; body: strin
 }

 /**
- * Clear all embeddings from the database (force re-index).
- * Deletes all rows from content_vectors and drops the vectors_vec table.
+ * Clear embeddings for the whole index, or just for one collection.
+ *
+ * When `collection` is omitted the entire content_vectors table is emptied and
+ * the vectors_vec virtual table is dropped (it is recreated with the right
+ * dimensions on the next embed run).
+ *
+ * When `collection` is provided, only vectors whose hash is referenced
+ * exclusively by active documents in that collection are removed. Hashes
+ * shared with active documents in other collections are left in place so
+ * vector search keeps working there (content_vectors is keyed globally by
+ * content hash; identical document bodies across collections share a row).
+ * vectors_vec is preserved so other collections keep working unless the scoped
+ * clear empties content_vectors entirely, in which case it is dropped so the
+ * next embed can recreate the table with the current dimensions.
 */
-export function clearAllEmbeddings(db: Database): void {
-  db.exec(`DELETE FROM content_vectors`);
-  db.exec(`DROP TABLE IF EXISTS vectors_vec`);
+export function clearAllEmbeddings(db: Database, collection?: string): void {
+  if (!collection) {
+    db.exec(`DELETE FROM content_vectors`);
+    db.exec(`DROP TABLE IF EXISTS vectors_vec`);
+    return;
+  }
+
+  const exclusiveHashesQuery = `
+    SELECT DISTINCT d.hash
+    FROM documents d
+    WHERE d.collection = ? AND d.active = 1
+      AND NOT EXISTS (
+        SELECT 1 FROM documents d2
+        WHERE d2.hash = d.hash
+          AND d2.active = 1
+          AND d2.collection != d.collection
+      )
+  `;
+
+  const vecTableExists = db
+    .prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='vectors_vec'`)
+    .get();
+
+  if (vecTableExists) {
+    const hashSeqRows = db.prepare(`
+      SELECT cv.hash, cv.seq
+      FROM content_vectors cv
+      WHERE cv.hash IN (${exclusiveHashesQuery})
+    `).all(collection) as { hash: string; seq: number }[];
+
+    const delVec = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
+    for (const row of hashSeqRows) {
+      delVec.run(`${row.hash}_${row.seq}`);
+    }
+  }
+
+  db.prepare(`
+    DELETE FROM content_vectors
+    WHERE hash IN (${exclusiveHashesQuery})
+  `).run(collection);
+
+  const remaining = db
+    .prepare(`SELECT COUNT(*) AS n FROM content_vectors`)
+    .get() as { n: number };
+  if (remaining.n === 0) {
+    db.exec(`DROP TABLE IF EXISTS vectors_vec`);
+  }
 }

 /**
@ -3988,6 +4160,21 @@ export type RankedListMeta = {
  query: string;
 };

+/**
+ * RRF list weights for hybridQuery.
+ *
+ * Original-query retrieval paths are the primary evidence and get 2x weight:
+ * - original FTS
+ * - original vector search
+ *
+ * Expansion-derived lists (lex/vec/hyde) stay at 1x regardless of list order,
+ * so a lex expansion inserted before original vector search cannot steal the
+ * original vector boost.
+ */
+export function getHybridRrfWeights(rankedListMeta: RankedListMeta[]): number[] {
+  return rankedListMeta.map(meta => meta.queryType === "original" ? 2.0 : 1.0);
+}
+
 /**
 * Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
 *
@ -4089,7 +4276,8 @@ export async function hybridQuery(

    // Batch embed all vector queries in a single call
    const llm = getLlm(store);
-    const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, llm.embedModelName));
+    const embedModel = llm.embedModelName;
+    const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, embedModel));
    hooks?.onEmbedStart?.(textsToEmbed.length);
    const embedStart = Date.now();
    const embeddings = await llm.embedBatch(textsToEmbed);
@ -4101,7 +4289,7 @@ export async function hybridQuery(
      if (!embedding) continue;

      const vecResults = await store.searchVec(
-        vecQueries[i]!.text, DEFAULT_EMBED_MODEL, 20, collection,
+        vecQueries[i]!.text, embedModel, 20, collection,
        undefined, embedding
      );
      if (vecResults.length > 0) {
@ -4119,8 +4307,9 @@ export async function hybridQuery(
    }
  }

-  // Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight
-  const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
+  // Step 4: RRF fusion — original-query FTS and vector lists get 2x weight;
+  // expansion-derived lists stay at 1x independent of insertion order.
+  const weights = getHybridRrfWeights(rankedListMeta);
  const fused = reciprocalRankFusion(rankedLists, weights);
  const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null;
  const candidates = fused.slice(0, candidateLimit);
@ -4331,10 +4520,11 @@ export async function vectorSearchQuery(
  options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);

  // Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
+  const embedModel = getLlm(store).embedModelName;
  const queryTexts = [query, ...vecExpanded.map(q => q.query)];
  const allResults = new Map<string, VectorSearchResult>();
  for (const q of queryTexts) {
-    const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit, collection);
+    const vecResults = await store.searchVec(q, embedModel, limit, collection);
    for (const r of vecResults) {
      const existing = allResults.get(r.filepath);
      if (!existing || r.score > existing.score) {
@ -4472,7 +4662,8 @@ export async function structuredSearch(
    );
    if (vecSearches.length > 0) {
      const llm = getLlm(store);
-      const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, llm.embedModelName));
+      const embedModel = llm.embedModelName;
+      const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, embedModel));
      hooks?.onEmbedStart?.(textsToEmbed.length);
      const embedStart = Date.now();
      const embeddings = await llm.embedBatch(textsToEmbed);
@ -4484,7 +4675,7 @@ export async function structuredSearch(

        for (const coll of collectionList) {
          const vecResults = await store.searchVec(
-            vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, coll,
+            vecSearches[i]!.query, embedModel, 20, coll,
            undefined, embedding
          );
          if (vecResults.length > 0) {
--- a/test/ast.test.ts
+++ b/test/ast.test.ts
@ -6,7 +6,7 @@
 */

 import { describe, test, expect } from "vitest";
-import { detectLanguage, getASTBreakPoints, extractSymbols } from "../src/ast.js";
+import { detectLanguage, getASTBreakPoints, extractSymbols, formatGrammarLoadError } from "../src/ast.js";
 import type { SupportedLanguage } from "../src/ast.js";

 // =============================================================================
@ -315,6 +315,16 @@ describe("getASTBreakPoints - error handling", () => {
    // Should either return some partial break points or empty array — not throw
    expect(Array.isArray(points)).toBe(true);
  });
+
+  test("explains missing grammar packages with a repair command", () => {
+    const msg = formatGrammarLoadError(
+      "typescript",
+      new Error("Cannot find module 'tree-sitter-typescript/tree-sitter-typescript.wasm'"),
+    );
+    expect(msg).toContain("tree-sitter-typescript");
+    expect(msg).toContain("bun add tree-sitter-typescript@0.23.2");
+    expect(msg).toContain("falling back to regex");
+  });
 });

 // =============================================================================
--- a/test/cli-lazy-llm-import.test.ts
+++ b/test/cli-lazy-llm-import.test.ts
@ -0,0 +1,20 @@
+import { describe, expect, test } from "vitest";
+import { readFileSync } from "fs";
+import { join } from "path";
+
+describe("LLM module loading", () => {
+  test("node-llama-cpp is only dynamically imported by LLM operations", () => {
+    const source = readFileSync(join(process.cwd(), "src", "llm.ts"), "utf-8");
+
+    expect(source).not.toMatch(/import\s+(?!type\b)[\s\S]*?from\s+["']node-llama-cpp["']/);
+    expect(source).toContain('import("node-llama-cpp")');
+  });
+
+  test("importing the CLI for lightweight commands succeeds", async () => {
+    const mod = await import("../src/cli/qmd.ts");
+    expect(mod).toMatchObject({
+      buildEditorUri: expect.any(Function),
+      termLink: expect.any(Function),
+    });
+  });
+});
--- a/test/cli.test.ts
+++ b/test/cli.test.ts
@ -6,14 +6,15 @@
 */

 import { describe, test, expect, beforeAll, afterAll, beforeEach } from "vitest";
-import { mkdtemp, rm, writeFile, mkdir } from "fs/promises";
+import { chmod, copyFile, mkdtemp, rm, writeFile, mkdir } from "fs/promises";
 import { existsSync, lstatSync, readFileSync, symlinkSync, writeFileSync, unlinkSync } from "fs";
 import { tmpdir } from "os";
 import { join, dirname } from "path";
 import { fileURLToPath } from "url";
 import { spawn } from "child_process";
 import { setTimeout as sleep } from "timers/promises";
-import { buildEditorUri, termLink } from "../src/cli/qmd.ts";
+import { buildEditorUri, termLink, resolveEmbedModelForCli } from "../src/cli/qmd.ts";
+import { DEFAULT_EMBED_MODEL_URI } from "../src/llm.ts";

 // Test fixtures directory and database path
 let testDir: string;
@ -243,6 +244,30 @@ describe("CLI Help", () => {
 });

 describe("CLI Embed", () => {
+  test("prefers QMD_EMBED_MODEL for qmd embed", () => {
+    const prev = process.env.QMD_EMBED_MODEL;
+    process.env.QMD_EMBED_MODEL = "hf:env/embed-model.gguf";
+
+    try {
+      expect(resolveEmbedModelForCli()).toBe("hf:env/embed-model.gguf");
+    } finally {
+      if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
+      else process.env.QMD_EMBED_MODEL = prev;
+    }
+  });
+
+  test("falls back to the default embed model when QMD_EMBED_MODEL is unset", () => {
+    const prev = process.env.QMD_EMBED_MODEL;
+    delete process.env.QMD_EMBED_MODEL;
+
+    try {
+      expect(resolveEmbedModelForCli()).toBe(DEFAULT_EMBED_MODEL_URI);
+    } finally {
+      if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
+      else process.env.QMD_EMBED_MODEL = prev;
+    }
+  });
+
  test("rejects invalid --max-docs-per-batch", async () => {
    const { stderr, exitCode } = await runQmd(["embed", "--max-docs-per-batch", "0"]);
    expect(exitCode).toBe(1);
@ -1403,13 +1428,18 @@ describe("mcp http daemon", () => {
  }

  /** Spawn a foreground HTTP server (non-blocking) and return the process */
-  function spawnHttpServer(port: number): import("child_process").ChildProcess {
-    const proc = spawn(tsxBin, [qmdScript, "mcp", "--http", "--port", String(port)], {
+  function spawnHttpServer(
+    port: number,
+    options: { args?: string[]; env?: Record<string, string> } = {},
+  ): import("child_process").ChildProcess {
+    const proc = spawn(tsxBin, [qmdScript, ...(options.args ?? []), "mcp", "--http", "--port", String(port)], {
      cwd: fixturesDir,
      env: {
        ...process.env,
        INDEX_PATH: daemonDbPath,
        QMD_CONFIG_DIR: daemonConfigDir,
+        PWD: fixturesDir,
+        ...options.env,
      },
      stdio: ["ignore", "pipe", "pipe"],
    });
@ -1481,11 +1511,75 @@ describe("mcp http daemon", () => {
      const body = await res.json();
      expect(body.status).toBe("ok");
    } finally {
+      const closed = new Promise(r => proc.once("close", r));
      proc.kill("SIGTERM");
-      await new Promise(r => proc.on("close", r));
+      await closed;
    }
  });

+  test("foreground HTTP server honors --index when selecting the store", async () => {
+    const customIndex = "mcp-alt-index";
+    const customCacheDir = join(daemonTestDir, `cache-index-${Date.now()}-${Math.random().toString(16).slice(2)}`);
+    const customConfigDir = join(daemonTestDir, `config-index-${Date.now()}-${Math.random().toString(16).slice(2)}`);
+    await mkdir(customCacheDir, { recursive: true });
+    await mkdir(customConfigDir, { recursive: true });
+
+    const addResult = await runQmd(
+      ["--index", customIndex, "collection", "add", fixturesDir, "--name", "mcp-fixtures"],
+      {
+        dbPath: daemonDbPath,
+        configDir: customConfigDir,
+        env: {
+          INDEX_PATH: "",
+          XDG_CACHE_HOME: customCacheDir,
+        },
+      },
+    );
+    expect(addResult.exitCode).toBe(0);
+
+    const updateResult = await runQmd(
+      ["--index", customIndex, "update"],
+      {
+        dbPath: daemonDbPath,
+        configDir: customConfigDir,
+        env: {
+          INDEX_PATH: "",
+          XDG_CACHE_HOME: customCacheDir,
+        },
+      },
+    );
+    expect(updateResult.exitCode).toBe(0);
+
+    const port = randomPort();
+    const proc = spawnHttpServer(port, {
+      args: ["--index", customIndex],
+      env: {
+        INDEX_PATH: "",
+        XDG_CACHE_HOME: customCacheDir,
+        QMD_CONFIG_DIR: customConfigDir,
+      },
+    });
+
+    try {
+      const ready = await waitForServer(port);
+      expect(ready).toBe(true);
+
+      const res = await fetch(`http://localhost:${port}/query`, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ searches: [{ type: "lex", query: "authentication" }], limit: 5, rerank: false }),
+      });
+      expect(res.status).toBe(200);
+      const body = await res.json();
+      const files = body.results.map((r: { file: string }) => r.file);
+      expect(files.some((file: string) => file.includes("mcp-fixtures/notes/meeting.md"))).toBe(true);
+    } finally {
+      const closed = new Promise(r => proc.once("close", r));
+      proc.kill("SIGTERM");
+      await closed;
+    }
+  }, 10000);
+
  // -------------------------------------------------------------------------
  // Daemon lifecycle
  // -------------------------------------------------------------------------
@ -1601,3 +1695,67 @@ describe("mcp http daemon", () => {
    try { unlinkSync(pidPath()); } catch {}
  });
 });
+
+// =============================================================================
+// MCP stdio stdout hygiene
+// =============================================================================
+
+describe("mcp stdio launcher", () => {
+  test("sets native llama/ggml quiet env before Node starts so stdout stays JSON-RPC only", async () => {
+    const tempPackage = await mkdtemp(join(tmpdir(), "qmd-bin-mcp-"));
+    try {
+      await mkdir(join(tempPackage, "bin"), { recursive: true });
+      await mkdir(join(tempPackage, "dist", "cli"), { recursive: true });
+      await mkdir(join(tempPackage, "fake-bin"), { recursive: true });
+
+      const qmdBin = join(tempPackage, "bin", "qmd");
+      await copyFile(join(projectRoot, "bin", "qmd"), qmdBin);
+      await chmod(qmdBin, 0o755);
+
+      // Force the wrapper down the Node branch, then put our fake `node` first
+      // in PATH. The fake node behaves like the native llama/ggml layer: it
+      // writes a non-JSON stdout line unless qmd pre-seeded the documented
+      // quiet env vars before launching JS.
+      await writeFile(join(tempPackage, "package-lock.json"), "{}\n");
+      const fakeNode = join(tempPackage, "fake-bin", "node");
+      await writeFile(fakeNode, `#!/bin/sh
+if [ "\${GGML_BACKEND_SILENT:-}" != "1" ]; then
+  printf 'llama.cpp native log on stdout\\n'
+fi
+printf '{"jsonrpc":"2.0","id":1,"result":{"ok":true}}\\n'
+`);
+      await chmod(fakeNode, 0o755);
+
+      const proc = spawn(qmdBin, ["mcp"], {
+        cwd: tempPackage,
+        env: {
+          ...process.env,
+          PATH: `${join(tempPackage, "fake-bin")}:${process.env.PATH}`,
+          LLAMA_LOG_LEVEL: "",
+          GGML_LOG_LEVEL: "",
+          GGML_BACKEND_SILENT: "",
+        },
+        stdio: ["ignore", "pipe", "pipe"],
+      });
+
+      let stdout = "";
+      let stderr = "";
+      proc.stdout?.on("data", (chunk: Buffer) => { stdout += chunk.toString(); });
+      proc.stderr?.on("data", (chunk: Buffer) => { stderr += chunk.toString(); });
+      const exitCode = await new Promise<number>((resolve, reject) => {
+        proc.once("error", reject);
+        proc.on("close", (code) => resolve(code ?? 1));
+      });
+
+      expect(exitCode).toBe(0);
+      expect(stderr).toBe("");
+      const lines = stdout.trim().split("\n").filter(Boolean);
+      expect(lines.length).toBeGreaterThan(0);
+      for (const line of lines) {
+        expect(() => JSON.parse(line)).not.toThrow();
+      }
+    } finally {
+      await rm(tempPackage, { recursive: true, force: true });
+    }
+  });
+});
--- a/test/collections-config.test.ts
+++ b/test/collections-config.test.ts
@ -7,7 +7,7 @@

 import { describe, test, expect, beforeEach, afterEach } from "vitest";
 import { join } from "path";
-import { homedir } from "os";
+import { qmdHomedir } from "../src/paths.js";
 import { getConfigPath, setConfigIndexName } from "../src/collections.js";

 // Save/restore env vars around each test
@ -15,6 +15,8 @@ let savedEnv: Record<string, string | undefined>;

 beforeEach(() => {
  savedEnv = {
+    HOME: process.env.HOME,
+    USERPROFILE: process.env.USERPROFILE,
    QMD_CONFIG_DIR: process.env.QMD_CONFIG_DIR,
    XDG_CONFIG_HOME: process.env.XDG_CONFIG_HOME,
  };
@ -38,7 +40,16 @@ describe("getConfigDir via getConfigPath", () => {
  test("defaults to ~/.config/qmd when no env vars are set", () => {
    delete process.env.QMD_CONFIG_DIR;
    delete process.env.XDG_CONFIG_HOME;
-    expect(getConfigPath()).toBe(join(homedir(), ".config", "qmd", "index.yml"));
+    expect(getConfigPath()).toBe(join(qmdHomedir(), ".config", "qmd", "index.yml"));
+  });
+
+  test("uses the same USERPROFILE fallback as default DB path when HOME is unset", () => {
+    delete process.env.HOME;
+    delete process.env.QMD_CONFIG_DIR;
+    delete process.env.XDG_CONFIG_HOME;
+    process.env.USERPROFILE = "/Users/windows-user";
+
+    expect(getConfigPath()).toBe(join("/Users/windows-user", ".config", "qmd", "index.yml"));
  });

  test("QMD_CONFIG_DIR takes highest priority", () => {
--- a/test/esm-ambiguous-module.test.ts
+++ b/test/esm-ambiguous-module.test.ts
@ -0,0 +1,27 @@
+import { describe, expect, test } from "vitest";
+import { execFileSync } from "child_process";
+import { mkdtempSync } from "fs";
+import { tmpdir } from "os";
+import { dirname, join, resolve } from "path";
+import { fileURLToPath } from "url";
+
+const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
+
+describe("Node ESM entrypoints", () => {
+  test("CLI --index path normalizes via setIndexName/setConfigIndexName under Node 22+", () => {
+    execFileSync("npm", ["run", "build"], {
+      cwd: repoRoot,
+      encoding: "utf-8",
+      stdio: "pipe",
+    });
+
+    const indexPath = join(mkdtempSync(join(tmpdir(), "qmd-index-")), "nested", "idx");
+    const output = execFileSync("node", ["dist/cli/qmd.js", "--index", indexPath, "--version"], {
+      cwd: repoRoot,
+      encoding: "utf-8",
+      stdio: "pipe",
+    });
+
+    expect(output).toContain("qmd ");
+  }, 120_000);
+});
--- a/test/llm.test.ts
+++ b/test/llm.test.ts
@ -13,6 +13,8 @@ import {
  getDefaultLlamaCpp,
  disposeDefaultLlamaCpp,
  resolveLlamaGpuMode,
+  resolveParallelismOverride,
+  resolveSafeParallelism,
  withLLMSession,
  canUnloadLLM,
  SessionReleasedError,
@ -88,6 +90,44 @@ describe("QMD_LLAMA_GPU resolution", () => {
  });
 });

+describe("LLM context parallelism safety", () => {
+  test("defaults Windows CUDA to one context to avoid ggml-cuda.cu:98 crashes", () => {
+    expect(resolveSafeParallelism({
+      gpu: "cuda",
+      platform: "win32",
+      computed: 8,
+      envValue: undefined,
+    })).toBe(1);
+  });
+
+  test("keeps non-Windows and non-CUDA backends on computed parallelism", () => {
+    expect(resolveSafeParallelism({ gpu: "cuda", platform: "linux", computed: 8 })).toBe(8);
+    expect(resolveSafeParallelism({ gpu: "vulkan", platform: "win32", computed: 8 })).toBe(8);
+    expect(resolveSafeParallelism({ gpu: false, platform: "win32", computed: 4 })).toBe(4);
+  });
+
+  test("QMD_EMBED_PARALLELISM overrides the Windows CUDA safety default", () => {
+    expect(resolveSafeParallelism({
+      gpu: "cuda",
+      platform: "win32",
+      computed: 8,
+      envValue: "2",
+    })).toBe(2);
+  });
+
+  test("QMD_EMBED_PARALLELISM clamps invalid values and warns", () => {
+    const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
+    try {
+      expect(resolveParallelismOverride("0")).toBeUndefined();
+      expect(resolveParallelismOverride("bad")).toBeUndefined();
+      expect(stderrSpy).toHaveBeenCalledTimes(2);
+      expect(String(stderrSpy.mock.calls[0]?.[0] || "")).toContain("QMD_EMBED_PARALLELISM");
+    } finally {
+      stderrSpy.mockRestore();
+    }
+  });
+});
+
 describe("LlamaCpp expand context size config", () => {
  const defaultExpandContextSize = 2048;

@ -654,7 +694,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => {
      for (const doc of result.results) {
        console.log(`  ${doc.file}: ${doc.score.toFixed(4)}`);
      }
-    });
+    }, 30000);
  });

  describe("expandQuery", () => {
--- a/test/package.test.ts
+++ b/test/package.test.ts
@ -0,0 +1,27 @@
+import { describe, expect, test } from "vitest";
+import { readFileSync } from "node:fs";
+import { join } from "node:path";
+
+const root = new URL("..", import.meta.url);
+const pkg = JSON.parse(readFileSync(new URL("package.json", root), "utf8"));
+
+describe("package grammar distribution", () => {
+  test("installs AST grammar wasm packages as required runtime dependencies", () => {
+    for (const dep of ["tree-sitter-typescript", "tree-sitter-python", "tree-sitter-go", "tree-sitter-rust"]) {
+      expect(pkg.dependencies, `${dep} should be a required dependency`).toHaveProperty(dep);
+      expect(pkg.optionalDependencies ?? {}, `${dep} should not be optional`).not.toHaveProperty(dep);
+    }
+  });
+
+  test("documents a packaging smoke check for grammar wasm availability", () => {
+    expect(pkg.scripts, "package.json scripts").toHaveProperty("smoke:package-grammars");
+    expect(String(pkg.scripts["smoke:package-grammars"])).toContain("check-package-grammars");
+
+    expect(pkg.files, "published package files").toContain("scripts/check-package-grammars.mjs");
+
+    const scriptPath = join(root.pathname, "scripts", "check-package-grammars.mjs");
+    const script = readFileSync(scriptPath, "utf8");
+    expect(script).toContain("tree-sitter-typescript/tree-sitter-typescript.wasm");
+    expect(script).toContain("tree-sitter-typescript/tree-sitter-tsx.wasm");
+  });
+});
--- a/test/sdk.test.ts
+++ b/test/sdk.test.ts
@ -982,6 +982,92 @@ describe("embed", () => {
    }
  });

+  test("store.embed scopes pending documents to the requested collection", async () => {
+    const store = await createStore({
+      dbPath: freshDbPath(),
+      config: {
+        collections: {
+          docs: { path: docsDir, pattern: "**/*.md" },
+          notes: { path: notesDir, pattern: "**/*.md" },
+        },
+      },
+    });
+
+    const fakeLlm = createFakeEmbedLlm();
+    setDefaultLlamaCpp(createFakeTokenizer() as any);
+    store.internal.llm = fakeLlm as any;
+
+    try {
+      await store.update();
+      const result = await store.embed({ collection: "docs" });
+
+      const vectorCounts = store.internal.db.prepare(`
+        SELECT d.collection, COUNT(DISTINCT v.hash) AS count
+        FROM documents d
+        LEFT JOIN content_vectors v ON v.hash = d.hash AND v.seq = 0
+        WHERE d.active = 1
+        GROUP BY d.collection
+        ORDER BY d.collection
+      `).all() as Array<{ collection: string; count: number }>;
+
+      expect(result.docsProcessed).toBe(3);
+      expect(result.chunksEmbedded).toBe(3);
+      expect(vectorCounts).toEqual([
+        { collection: "docs", count: 3 },
+        { collection: "notes", count: 0 },
+      ]);
+    } finally {
+      setDefaultLlamaCpp(null);
+      await store.close();
+    }
+  });
+
+  test("store.embed with force only clears the requested collection", async () => {
+    const store = await createStore({
+      dbPath: freshDbPath(),
+      config: {
+        collections: {
+          docs: { path: docsDir, pattern: "**/*.md" },
+          notes: { path: notesDir, pattern: "**/*.md" },
+        },
+      },
+    });
+
+    const fakeLlm = createFakeEmbedLlm();
+    setDefaultLlamaCpp(createFakeTokenizer() as any);
+    store.internal.llm = fakeLlm as any;
+
+    const vectorCounts = () => store.internal.db.prepare(`
+      SELECT d.collection, COUNT(DISTINCT v.hash) AS count
+      FROM documents d
+      LEFT JOIN content_vectors v ON v.hash = d.hash AND v.seq = 0
+      WHERE d.active = 1
+      GROUP BY d.collection
+      ORDER BY d.collection
+    `).all() as Array<{ collection: string; count: number }>;
+
+    try {
+      await store.update();
+      await store.embed();
+      expect(vectorCounts()).toEqual([
+        { collection: "docs", count: 3 },
+        { collection: "notes", count: 3 },
+      ]);
+
+      const result = await store.embed({ force: true, collection: "docs" });
+
+      expect(result.docsProcessed).toBe(3);
+      expect(result.chunksEmbedded).toBe(3);
+      expect(vectorCounts()).toEqual([
+        { collection: "docs", count: 3 },
+        { collection: "notes", count: 3 },
+      ]);
+    } finally {
+      setDefaultLlamaCpp(null);
+      await store.close();
+    }
+  });
+
  test("store.embed rejects invalid batch limits", async () => {
    const store = await createStore({
      dbPath: freshDbPath(),
--- a/test/store.test.ts
+++ b/test/store.test.ts
@ -9,7 +9,7 @@
 import { describe, test, expect, beforeAll, afterAll, beforeEach, afterEach, vi } from "vitest";
 import { openDatabase, loadSqliteVec } from "../src/db.js";
 import type { Database } from "../src/db.js";
-import { unlink, mkdtemp, rmdir, writeFile } from "node:fs/promises";
+import { unlink, mkdtemp, rmdir, writeFile, rm, mkdir, rename } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import YAML from "yaml";
@ -46,13 +46,22 @@ import {
  normalizeDocid,
  isDocid,
  syncConfigToDb,
+  reindexCollection,
  STRONG_SIGNAL_MIN_SCORE,
  STRONG_SIGNAL_MIN_GAP,
+  insertContent,
+  insertDocument,
  generateEmbeddings,
+  getHybridRrfWeights,
+  _resetProductionModeForTesting,
+  hybridQuery,
+  structuredSearch,
+  vectorSearchQuery,
  type Store,
  type DocumentResult,
  type SearchResult,
  type RankedResult,
+  type RankedListMeta,
 } from "../src/store.js";
 import type { CollectionConfig } from "../src/collections.js";

@ -156,18 +165,18 @@ async function insertTestDocument(
  const hash = opts.hash || await hashContent(body);

  // Insert content (with OR IGNORE for deduplication)
-  db.prepare(`
-    INSERT OR IGNORE INTO content (hash, doc, created_at)
-    VALUES (?, ?, ?)
-  `).run(hash, body, now);
+  insertContent(db, hash, body, now);

-  // Insert document
-  const result = db.prepare(`
-    INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active)
-    VALUES (?, ?, ?, ?, ?, ?, ?)
-  `).run(collectionName, path, title, hash, now, now, active);
+  insertDocument(db, collectionName, path, title, hash, now, now);
+  const row = db.prepare(`
+    SELECT id FROM documents WHERE collection = ? AND path = ?
+  `).get(collectionName, path) as { id: number } | undefined;

-  return Number(result.lastInsertRowid);
+  if (active === 0 && row) {
+    db.prepare(`UPDATE documents SET active = 0 WHERE id = ?`).run(row.id);
+  }
+
+  return row?.id ?? 0;
 }

 /** Sync YAML config file to SQLite store_collections in the current test store */
@ -277,7 +286,9 @@ afterAll(async () => {

 describe("Store Creation", () => {
  test("createStore throws without explicit path in test mode", () => {
-    // In test mode, createStore without path should throw to prevent accidental writes
+    // In test mode, createStore without path should throw to prevent accidental writes.
+    // Other tests may enable production mode in the same Bun process, so reset first.
+    _resetProductionModeForTesting();
    const originalIndexPath = process.env.INDEX_PATH;
    delete process.env.INDEX_PATH;

@ -1250,6 +1261,61 @@ describe("FTS Search", () => {
    await cleanupTestDb(store);
  });

+  test("searchFTS finds CJK documents by exact and mixed queries", async () => {
+    const store = await createTestStore();
+    const collectionName = await createTestCollection();
+
+    await insertTestDocument(store.db, collectionName, {
+      name: "zh",
+      title: "中文检索说明",
+      body: "这里介绍 vector 数据库和关键词检索。",
+      displayPath: "cjk/zh.md",
+    });
+    await insertTestDocument(store.db, collectionName, {
+      name: "ja",
+      title: "日本語検索メモ",
+      body: "この文書は検索品質とトークン化について説明します。",
+      displayPath: "cjk/ja.md",
+    });
+    await insertTestDocument(store.db, collectionName, {
+      name: "ko",
+      title: "한국어 검색 노트",
+      body: "이 문서는 검색 품질과 토큰화 문제를 설명합니다.",
+      displayPath: "cjk/ko.md",
+    });
+
+    expect(store.searchFTS("关键词检索", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/zh.md`);
+    expect(store.searchFTS("検索品質", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/ja.md`);
+    expect(store.searchFTS("검색 품질", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/ko.md`);
+    expect(store.searchFTS("vector 关键词", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/zh.md`);
+
+    await cleanupTestDb(store);
+  });
+
+  test("searchFTS keeps English behavior while indexing CJK text", async () => {
+    const store = await createTestStore();
+    const collectionName = await createTestCollection();
+
+    await insertTestDocument(store.db, collectionName, {
+      name: "english",
+      title: "Vector Search Notes",
+      body: "The quick brown fox explains vector search and BM25 ranking.",
+      displayPath: "english.md",
+    });
+    await insertTestDocument(store.db, collectionName, {
+      name: "zh",
+      title: "中文检索说明",
+      body: "这里介绍向量数据库和关键词检索。",
+      displayPath: "zh.md",
+    });
+
+    const foxResults = store.searchFTS("quick fox", 10);
+    expect(foxResults.map(r => r.displayPath)).toContain(`${collectionName}/english.md`);
+    expect(foxResults.map(r => r.displayPath)).not.toContain(`${collectionName}/zh.md`);
+
+    await cleanupTestDb(store);
+  });
+
  test("searchFTS handles special characters in query", async () => {
    const store = await createTestStore();
    const collectionName = await createTestCollection();
@ -1988,6 +2054,38 @@ describe("Reciprocal Rank Fusion", () => {
    expect(fused[0]!.file).toBe("doc1");
  });

+  test("hybrid RRF weights boost original vector evidence over expansion-only hits", () => {
+    const originalFtsOnly = makeResult("original-fts-only.md", 0.95);
+    const expansionOnly = makeResult("lex-expansion-only.md", 0.95);
+    const originalVector = makeResult("original-vector.md", 0.95);
+
+    // Mirrors hybridQuery's common list order when a lex expansion exists:
+    // original FTS, lex expansion FTS, original vector.
+    const rankedLists = [
+      [originalFtsOnly],
+      [expansionOnly],
+      [originalVector],
+    ];
+    const rankedListMeta: RankedListMeta[] = [
+      { source: "fts", queryType: "original", query: "user query" },
+      { source: "fts", queryType: "lex", query: "lex expansion" },
+      { source: "vec", queryType: "original", query: "user query" },
+    ];
+
+    const positionBasedWeights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
+    const buggyOrder = reciprocalRankFusion(rankedLists, positionBasedWeights);
+
+    expect(buggyOrder.findIndex(r => r.file === "lex-expansion-only.md"))
+      .toBeLessThan(buggyOrder.findIndex(r => r.file === "original-vector.md"));
+
+    const semanticWeights = getHybridRrfWeights(rankedListMeta);
+    const fixedOrder = reciprocalRankFusion(rankedLists, semanticWeights);
+
+    expect(semanticWeights).toEqual([2.0, 1.0, 2.0]);
+    expect(fixedOrder.findIndex(r => r.file === "original-vector.md"))
+      .toBeLessThan(fixedOrder.findIndex(r => r.file === "lex-expansion-only.md"));
+  });
+
  test("RRF adds top-rank bonus", () => {
    // doc1 is #1 in list1, doc2 is #2 in list1
    const list1 = [makeResult("doc1", 0.9), makeResult("doc2", 0.8)];
@ -2020,6 +2118,65 @@ describe("Reciprocal Rank Fusion", () => {
  });
 });

+// =============================================================================
+// Reindex Collection Tests
+// =============================================================================
+
+describe("Reindex Collection", () => {
+  test("preserves document id and embeddings when file path changes only by case", async () => {
+    const store = await createTestStore();
+    const collectionName = "docs";
+    const collectionPath = join(testDir, `case-rename-${Date.now()}-${Math.random().toString(36).slice(2)}`);
+    await mkdir(collectionPath, { recursive: true });
+
+    const originalPath = join(collectionPath, "README.md");
+    const renamedPath = join(collectionPath, "readme.md");
+    const body = "# Case Rename\n\nContent that should keep the same embedding.";
+    await writeFile(originalPath, body);
+
+    const firstResult = await reindexCollection(store, collectionPath, "**/*.md", collectionName);
+    expect(firstResult.indexed).toBe(1);
+
+    const before = store.db.prepare(`
+      SELECT id, path, hash FROM documents
+      WHERE collection = ? AND active = 1
+    `).get(collectionName) as { id: number; path: string; hash: string };
+    expect(before.path).toBe("README.md");
+
+    store.db.prepare(`
+      INSERT INTO content_vectors (hash, seq, pos, model, embedded_at)
+      VALUES (?, 0, 0, 'test-model', ?)
+    `).run(before.hash, new Date().toISOString());
+
+    await rename(originalPath, renamedPath);
+
+    const secondResult = await reindexCollection(store, collectionPath, "**/*.md", collectionName);
+    expect(secondResult.indexed).toBe(0);
+    expect(secondResult.unchanged).toBe(1);
+    expect(secondResult.removed).toBe(0);
+
+    const afterRows = store.db.prepare(`
+      SELECT id, path, hash, active FROM documents
+      WHERE collection = ?
+      ORDER BY id
+    `).all(collectionName) as { id: number; path: string; hash: string; active: number }[];
+    expect(afterRows).toHaveLength(1);
+    expect(afterRows[0]).toMatchObject({ id: before.id, path: "readme.md", hash: before.hash, active: 1 });
+
+    const vectorCount = store.db.prepare(`
+      SELECT COUNT(*) AS count FROM content_vectors WHERE hash = ?
+    `).get(before.hash) as { count: number };
+    expect(vectorCount.count).toBe(1);
+
+    const ftsRows = store.db.prepare(`
+      SELECT rowid, filepath FROM documents_fts WHERE rowid = ?
+    `).all(before.id) as { rowid: number; filepath: string }[];
+    expect(ftsRows).toEqual([{ rowid: before.id, filepath: "docs/readme.md" }]);
+
+    await cleanupTestDb(store);
+  });
+});
+
 // =============================================================================
 // Index Status Tests
 // =============================================================================
@ -2256,6 +2413,33 @@ describe("Vector Table", () => {

    await cleanupTestDb(store);
  });
+
+  test("insertEmbedding is idempotent for an existing vec0 hash_seq (#598)", async () => {
+    const store = await createTestStore();
+    store.ensureVecTable(2);
+
+    const hash = "existinghashseq";
+    const first = new Float32Array([0.1, 0.2]);
+    const second = new Float32Array([0.3, 0.4]);
+    const now = new Date().toISOString();
+
+    store.db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash}_0`, first);
+
+    // Reproduces sqlite-vec's broken conflict handling: vec0 does not honor OR REPLACE.
+    expect(() => {
+      store.db.prepare(`INSERT OR REPLACE INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash}_0`, second);
+    }).toThrow(/UNIQUE constraint failed/i);
+
+    // QMD must therefore use DELETE + INSERT when upserting the vector row.
+    expect(() => store.insertEmbedding(hash, 0, 0, second, "test-model", now)).not.toThrow();
+
+    const vectorCount = store.db.prepare(`SELECT COUNT(*) AS count FROM vectors_vec WHERE hash_seq = ?`).get(`${hash}_0`) as { count: number };
+    const metadataCount = store.db.prepare(`SELECT COUNT(*) AS count FROM content_vectors WHERE hash = ? AND seq = 0`).get(hash) as { count: number };
+    expect(vectorCount.count).toBe(1);
+    expect(metadataCount.count).toBe(1);
+
+    await cleanupTestDb(store);
+  });
 });

 // =============================================================================
@ -2263,6 +2447,47 @@ describe("Vector Table", () => {
 // =============================================================================

 describe("Integration", () => {
+  test("reindexCollection soft-deletes removed files and preserves inactive content (#585)", async () => {
+    const store = await createTestStore();
+    const collectionDir = await mkdtemp(join(testDir, "orphan-regression-"));
+    const collectionName = "orphan-regression";
+
+    try {
+      for (let i = 1; i <= 5; i++) {
+        await writeFile(join(collectionDir, `doc-${i}.md`), `# Doc ${i}\n\nUnique body ${i}`);
+      }
+
+      await createTestCollection({ pwd: collectionDir, glob: "**/*.md", name: collectionName });
+
+      const initial = await reindexCollection(store, collectionDir, "**/*.md", collectionName);
+      expect(initial.indexed).toBe(5);
+      expect(initial.removed).toBe(0);
+
+      await rm(join(collectionDir, "doc-3.md"));
+      await rm(join(collectionDir, "doc-4.md"));
+      await rm(join(collectionDir, "doc-5.md"));
+
+      const afterDelete = await reindexCollection(store, collectionDir, "**/*.md", collectionName);
+      expect(afterDelete.removed).toBe(3);
+
+      const counts = store.db.prepare(`
+        SELECT
+          SUM(CASE WHEN active = 1 THEN 1 ELSE 0 END) AS active,
+          SUM(CASE WHEN active = 0 THEN 1 ELSE 0 END) AS inactive,
+          COUNT(*) AS total
+        FROM documents
+        WHERE collection = ?
+      `).get(collectionName) as { active: number; inactive: number; total: number };
+      const contentCount = store.db.prepare(`SELECT COUNT(*) AS count FROM content`).get() as { count: number };
+
+      expect(counts).toEqual({ active: 2, inactive: 3, total: 5 });
+      expect(contentCount.count).toBe(5);
+    } finally {
+      await rm(collectionDir, { recursive: true, force: true });
+      await cleanupTestDb(store);
+    }
+  });
+
  test("full document lifecycle: create, search, retrieve", async () => {
    const store = await createTestStore();
    const collectionName = await createTestCollection({ pwd: "/test/notes", glob: "**/*.md" });
@ -2802,6 +3027,116 @@ describe("Embedding batching", () => {
    }
  });

+  test("generateEmbeddings uses the active llm embed model when no explicit model is passed", async () => {
+    const store = await createTestStore();
+    const db = store.db;
+    const fakeLlm = createFakeEmbedLlm();
+    const model = "hf:env/embed-model.gguf";
+
+    setDefaultLlamaCpp(createFakeTokenizer() as any);
+    store.llm = { ...fakeLlm, embedModelName: model } as any;
+
+    try {
+      await insertTestDocument(db, "docs", { name: "one", body: "# One\n\nAlpha" });
+
+      const result = await generateEmbeddings(store);
+
+      expect(result.chunksEmbedded).toBe(1);
+      expect(fakeLlm.embedCalls[0]?.options?.model).toBe(model);
+      expect(fakeLlm.embedBatchModelCalls).toEqual([{ model }]);
+      expect(db.prepare(`SELECT DISTINCT model FROM content_vectors`).all()).toEqual([{ model }]);
+    } finally {
+      setDefaultLlamaCpp(null);
+      await cleanupTestDb(store);
+    }
+  });
+
+  test("vectorSearchQuery uses the active llm embed model for vector lookups", async () => {
+    const store = await createTestStore();
+    const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
+    const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
+
+    store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
+    store.llm = { embedModelName: model } as any;
+    store.searchVec = searchVecSpy as any;
+    store.expandQuery = vi.fn(async () => []) as any;
+
+    try {
+      await vectorSearchQuery(store, "custom query", { limit: 7, minScore: 0 });
+
+      expect(searchVecSpy).toHaveBeenCalledTimes(1);
+      expect(searchVecSpy.mock.calls[0]?.[0]).toBe("custom query");
+      expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
+      expect(searchVecSpy.mock.calls[0]?.[2]).toBe(7);
+    } finally {
+      await cleanupTestDb(store);
+    }
+  });
+
+  test("hybridQuery uses the active llm embed model for precomputed vector lookups", async () => {
+    const store = await createTestStore();
+    const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
+    const embedBatchSpy = vi.fn(async (texts: string[]) => texts.map(() => ({
+      embedding: [1, 2, 3],
+      model,
+    })));
+    const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
+
+    store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
+    store.llm = {
+      embedModelName: model,
+      embedBatch: embedBatchSpy,
+    } as any;
+    store.searchVec = searchVecSpy as any;
+    store.searchFTS = vi.fn(() => []) as any;
+    store.expandQuery = vi.fn(async () => []) as any;
+
+    try {
+      await hybridQuery(store, "hybrid query", { limit: 5, minScore: 0, skipRerank: true });
+
+      expect(embedBatchSpy).toHaveBeenCalledTimes(1);
+      expect(searchVecSpy).toHaveBeenCalledTimes(1);
+      expect(searchVecSpy.mock.calls[0]?.[0]).toBe("hybrid query");
+      expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
+      expect(searchVecSpy.mock.calls[0]?.[5]).toEqual([1, 2, 3]);
+    } finally {
+      await cleanupTestDb(store);
+    }
+  });
+
+  test("structuredSearch uses the active llm embed model for precomputed vector lookups", async () => {
+    const store = await createTestStore();
+    const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
+    const embedBatchSpy = vi.fn(async (texts: string[]) => texts.map(() => ({
+      embedding: [1, 2, 3],
+      model,
+    })));
+    const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
+
+    store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
+    store.llm = {
+      embedModelName: model,
+      embedBatch: embedBatchSpy,
+    } as any;
+    store.searchVec = searchVecSpy as any;
+
+    try {
+      await structuredSearch(store, [{ type: "vec", query: "structured query" }], {
+        limit: 5,
+        minScore: 0,
+        skipRerank: true,
+      });
+
+      expect(embedBatchSpy).toHaveBeenCalledTimes(1);
+      expect(searchVecSpy).toHaveBeenCalledTimes(1);
+      expect(searchVecSpy.mock.calls[0]?.[0]).toBe("structured query");
+      expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
+      expect(searchVecSpy.mock.calls[0]?.[5]).toEqual([1, 2, 3]);
+    } finally {
+      await cleanupTestDb(store);
+    }
+  });
+
  test("generateEmbeddings rejects invalid batch limits", async () => {
    const store = await createTestStore();