Merge pull request #636 from tobi/stack/qmd-kanban-fixes-2026-05-09

Integrate QMD fix stack
This commit is contained in:
Tobias Lütke 2026-05-09 16:22:07 -04:00 committed by GitHub
commit 746beedb48
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
24 changed files with 1198 additions and 131 deletions

View File

@ -4,6 +4,18 @@
### Fixes
- Embedding: `qmd embed -c <collection>` now scopes pending-doc selection
to the requested collection instead of embedding global pending work.
Scoped `--force` clears only collection-owned vectors, preserves shared
hashes referenced by sibling collections, and drops `vectors_vec` only
when the scoped clear empties all vectors.
- Hybrid search: weight RRF lists by query type so original FTS and original vector evidence get the intended 2x boost, instead of accidentally boosting the first lexical expansion. #591
- MCP: seed llama.cpp/GGML quiet env vars before launching `qmd mcp` so native logs cannot pollute stdio JSON-RPC framing. #593
- CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index <path>` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634
- Windows CUDA: serialize llama.cpp embedding/reranking contexts by default to avoid intermittent `ggml-cuda.cu:98` crashes in `qmd query`; set `QMD_EMBED_PARALLELISM` to opt back into parallel contexts if your driver is stable. #519
- MCP: make `qmd mcp --index <name>` use the selected index for both foreground and daemon HTTP servers instead of falling back to the default store. #343
- Embedding: respect `QMD_EMBED_MODEL` consistently for vector indexing and vector-backed search, with default-model fallback when unset.
- Config: use one home-directory resolver for YAML config and the default SQLite cache path, avoiding Windows CLI/MCP split-brain when `HOME` is unset.
- GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
- Fix: preserve original filename case in `handelize()`. The previous
`.toLowerCase()` call made indexed paths unreachable on case-sensitive
@ -12,6 +24,15 @@
- CLI: make `qmd status` skip native `node-llama-cpp` device probing by
default so status stays safe on machines with broken or unsupported GPU
drivers. Set `QMD_STATUS_DEVICE_PROBE=1` to opt in.
- CLI: lazy-load `node-llama-cpp` so lightweight commands such as
`qmd status` do not import native ML dependencies or trigger llama.cpp
builds on ARM/no-GPU machines. #491
- Store: keep content rows referenced by inactive documents during orphan
cleanup so `qmd update` preserves soft-deleted tombstones for removed
files. #585
- Packaging: install AST grammar WASM packages as required dependencies so
Bun global installs include TypeScript/TSX/JavaScript grammars, and add a
`smoke:package-grammars` verification command. #595
## [2.1.0] - 2026-04-05

View File

@ -797,6 +797,8 @@ llm_cache -- Cached LLM responses (query expansion, rerank scores)
| Variable | Default | Description |
|----------|---------|-------------|
| `XDG_CACHE_HOME` | `~/.cache` | Cache directory location |
| `QMD_LLAMA_GPU` | `auto` | Force llama.cpp GPU backend (`metal`, `vulkan`, `cuda`) or disable GPU with `false` |
| `QMD_EMBED_PARALLELISM` | automatic | Override embedding/reranking context parallelism (1-8). Windows CUDA defaults to `1` because parallel CUDA contexts can crash with `ggml-cuda.cu:98`; use Vulkan or raise this only if your driver is stable. |
## How It Works

10
bin/qmd
View File

@ -15,6 +15,16 @@ done
# to avoid native module ABI mismatches (e.g., better-sqlite3 compiled for bun vs node)
DIR="$(cd -P "$(dirname "$SOURCE")/.." && pwd)"
# MCP stdio reserves stdout exclusively for JSON-RPC frames. node-llama-cpp
# / llama.cpp / ggml can write native logs directly to stdout before JS-level
# log handlers are attached, so seed the native quiet env before Node/Bun imports
# the CLI and its LLM modules. Preserve explicit user values when provided.
if [ "$1" = "mcp" ]; then
export LLAMA_LOG_LEVEL="${LLAMA_LOG_LEVEL:-error}"
export GGML_LOG_LEVEL="${GGML_LOG_LEVEL:-error}"
export GGML_BACKEND_SILENT="${GGML_BACKEND_SILENT:-1}"
fi
# Detect the package manager that installed dependencies by checking lockfiles.
# $BUN_INSTALL is intentionally NOT checked — it only indicates that bun exists
# on the system, not that it was used to install this package (see #361).

View File

@ -11,6 +11,10 @@
"node-llama-cpp": "3.18.1",
"picomatch": "4.0.4",
"sqlite-vec": "0.1.9",
"tree-sitter-go": "0.23.4",
"tree-sitter-python": "0.23.4",
"tree-sitter-rust": "0.24.0",
"tree-sitter-typescript": "0.23.2",
"web-tree-sitter": "0.26.7",
"yaml": "2.8.3",
"zod": "4.2.1",
@ -26,10 +30,6 @@
"sqlite-vec-linux-arm64": "0.1.9",
"sqlite-vec-linux-x64": "0.1.9",
"sqlite-vec-windows-x64": "0.1.9",
"tree-sitter-go": "0.23.4",
"tree-sitter-python": "0.23.4",
"tree-sitter-rust": "0.24.0",
"tree-sitter-typescript": "0.23.2",
},
"peerDependencies": {
"typescript": "^5.9.3",
@ -509,7 +509,7 @@
"node-abi": ["node-abi@3.87.0", "", { "dependencies": { "semver": "^7.3.5" } }, "sha512-+CGM1L1CgmtheLcBuleyYOn7NWPVu0s0EJH2C4puxgEZb9h8QpR9G2dBfZJOAUhi7VQxuBPMd0hiISWcTyiYyQ=="],
"node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
"node-addon-api": ["node-addon-api@8.7.0", "", {}, "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA=="],
"node-api-headers": ["node-api-headers@1.8.0", "", {}, "sha512-jfnmiKWjRAGbdD1yQS28bknFM1tbHC1oucyuMPjmkEs+kpiu76aRs40WlTmBmyEgzDM76ge1DQ7XJ3R5deiVjQ=="],
@ -773,8 +773,6 @@
"micromatch/picomatch": ["picomatch@2.3.1", "", {}, "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA=="],
"node-llama-cpp/node-addon-api": ["node-addon-api@8.7.0", "", {}, "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA=="],
"ora/cli-spinners": ["cli-spinners@3.4.0", "", {}, "sha512-bXfOC4QcT1tKXGorxL3wbJm6XJPDqEnij2gQ2m7ESQuE+/z9YFIWnl/5RpTiKWbMq3EVKR4fRLJGn6DVfu0mpw=="],
"postcss/nanoid": ["nanoid@3.3.11", "", { "bin": { "nanoid": "bin/nanoid.cjs" } }, "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w=="],
@ -793,6 +791,16 @@
"tinyglobby/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],
"tree-sitter-go/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
"tree-sitter-javascript/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
"tree-sitter-python/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
"tree-sitter-rust/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
"tree-sitter-typescript/node-addon-api": ["node-addon-api@8.5.0", "", {}, "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A=="],
"vite/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],
"vitest/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="],

View File

@ -44,8 +44,8 @@
});
nodeModulesHashes = {
x86_64-linux = "sha256-D0ezO4vqq4iswcAMU2DCql9ZAQvh3me6N9aDB5roq4w=";
aarch64-darwin = "sha256-qU+9KdR/nTocelyANS09I/4yaQ+7s1LvJNqB27IOK/c=";
x86_64-linux = "sha256-zee2c7LS+JxpZOpdWG2qyUKlS7EJq2PL/wSo+AewJ9g=";
aarch64-darwin = "sha256-qL80cpCrl3BbEWqmYStRuTDJlIIAFW1Y71YbJOeu/f0=";
# Populate these on first build for additional hosts if/when needed.
aarch64-linux = pkgs.lib.fakeHash;

View File

@ -17,6 +17,7 @@
"files": [
"bin/",
"dist/",
"scripts/check-package-grammars.mjs",
"LICENSE",
"CHANGELOG.md"
],
@ -31,7 +32,8 @@
"vsearch": "tsx src/cli/qmd.ts vsearch",
"rerank": "tsx src/cli/qmd.ts rerank",
"inspector": "npx @modelcontextprotocol/inspector tsx src/cli/qmd.ts mcp",
"release": "./scripts/release.sh"
"release": "./scripts/release.sh",
"smoke:package-grammars": "node scripts/check-package-grammars.mjs"
},
"publishConfig": {
"access": "public"
@ -53,18 +55,18 @@
"sqlite-vec": "0.1.9",
"web-tree-sitter": "0.26.7",
"yaml": "2.8.3",
"zod": "4.2.1"
"zod": "4.2.1",
"tree-sitter-go": "0.23.4",
"tree-sitter-python": "0.23.4",
"tree-sitter-rust": "0.24.0",
"tree-sitter-typescript": "0.23.2"
},
"optionalDependencies": {
"sqlite-vec-darwin-arm64": "0.1.9",
"sqlite-vec-darwin-x64": "0.1.9",
"sqlite-vec-linux-arm64": "0.1.9",
"sqlite-vec-linux-x64": "0.1.9",
"sqlite-vec-windows-x64": "0.1.9",
"tree-sitter-go": "0.23.4",
"tree-sitter-python": "0.23.4",
"tree-sitter-rust": "0.24.0",
"tree-sitter-typescript": "0.23.2"
"sqlite-vec-windows-x64": "0.1.9"
},
"devDependencies": {
"@types/better-sqlite3": "7.6.13",

View File

@ -0,0 +1,29 @@
#!/usr/bin/env node
import { createRequire } from "node:module";
const require = createRequire(import.meta.url);
const grammars = [
"tree-sitter-typescript/tree-sitter-typescript.wasm",
"tree-sitter-typescript/tree-sitter-tsx.wasm",
"tree-sitter-python/tree-sitter-python.wasm",
"tree-sitter-go/tree-sitter-go.wasm",
"tree-sitter-rust/tree-sitter-rust.wasm",
];
let ok = true;
for (const grammar of grammars) {
try {
const resolved = require.resolve(grammar);
console.log(`ok ${grammar} -> ${resolved}`);
} catch (err) {
ok = false;
console.error(`missing ${grammar}`);
console.error(err instanceof Error ? err.message : String(err));
}
}
if (!ok) {
console.error("\nAST grammar package smoke check failed. Run `bun install` locally or repair a broken global install with the matching `bun add tree-sitter-...@<version>` command shown by `qmd status`.");
process.exit(1);
}

View File

@ -63,15 +63,22 @@ export function detectLanguage(filepath: string): SupportedLanguage | null {
/**
* Maps language to the npm package and wasm filename for the grammar.
*/
const GRAMMAR_MAP: Record<SupportedLanguage, { pkg: string; wasm: string }> = {
typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" },
tsx: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm" },
javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm" },
python: { pkg: "tree-sitter-python", wasm: "tree-sitter-python.wasm" },
go: { pkg: "tree-sitter-go", wasm: "tree-sitter-go.wasm" },
rust: { pkg: "tree-sitter-rust", wasm: "tree-sitter-rust.wasm" },
const GRAMMAR_MAP: Record<SupportedLanguage, { pkg: string; wasm: string; version: string }> = {
typescript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
tsx: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-tsx.wasm", version: "0.23.2" },
javascript: { pkg: "tree-sitter-typescript", wasm: "tree-sitter-typescript.wasm", version: "0.23.2" },
python: { pkg: "tree-sitter-python", wasm: "tree-sitter-python.wasm", version: "0.23.4" },
go: { pkg: "tree-sitter-go", wasm: "tree-sitter-go.wasm", version: "0.23.4" },
rust: { pkg: "tree-sitter-rust", wasm: "tree-sitter-rust.wasm", version: "0.24.0" },
};
export function formatGrammarLoadError(language: SupportedLanguage, err: unknown): string {
const grammar = GRAMMAR_MAP[language];
const detail = err instanceof Error ? err.message : String(err);
return `${grammar.pkg}/${grammar.wasm} failed to load (${detail}); falling back to regex chunking. ` +
`Repair a broken global install with: bun add ${grammar.pkg}@${grammar.version}`;
}
// =============================================================================
// Per-Language Query Definitions
// =============================================================================
@ -176,6 +183,9 @@ let initPromise: Promise<void> | null = null;
/** Languages that have already failed to load — warn only once per process. */
const failedLanguages = new Set<string>();
/** Last grammar load error by language, for status output. */
const grammarLoadErrors = new Map<SupportedLanguage, string>();
/** Cached grammar load promises. */
const grammarCache = new Map<string, Promise<LanguageType>>();
@ -228,7 +238,9 @@ async function loadGrammar(language: SupportedLanguage): Promise<LanguageType |
} catch (err) {
failedLanguages.add(language);
grammarCache.delete(wasmKey);
console.warn(`[qmd] Failed to load tree-sitter grammar for ${language}: ${err}`);
const message = formatGrammarLoadError(language, err);
grammarLoadErrors.set(language, message);
console.warn(`[qmd] AST grammar unavailable for ${language}: ${message}`);
return null;
}
}
@ -345,7 +357,7 @@ export async function getASTStatus(): Promise<{
getQuery(lang, grammar);
languages.push({ language: lang, available: true });
} else {
languages.push({ language: lang, available: false, error: "grammar failed to load" });
languages.push({ language: lang, available: false, error: grammarLoadErrors.get(lang) ?? "grammar failed to load" });
}
} catch (err) {
languages.push({

View File

@ -3,7 +3,7 @@ import type { Database } from "../db.js";
import fastGlob from "fast-glob";
import { execSync, spawn as nodeSpawn } from "child_process";
import { fileURLToPath } from "url";
import { dirname, join as pathJoin, relative as relativePath } from "path";
import { dirname, join as pathJoin, relative as relativePath, resolve as pathResolve } from "path";
import { parseArgs } from "util";
import { readFileSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync } from "fs";
import { createInterface } from "readline/promises";
@ -173,9 +173,7 @@ function setIndexName(name: string | null): void {
let normalizedName = name;
// Normalize relative paths to prevent malformed database paths
if (name && name.includes('/')) {
const { resolve } = require('path');
const { cwd } = require('process');
const absolutePath = resolve(cwd(), name);
const absolutePath = pathResolve(process.cwd(), name);
// Replace path separators with underscores to create a valid filename
normalizedName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
}
@ -1681,10 +1679,14 @@ function parseChunkStrategy(value: unknown): ChunkStrategy | undefined {
throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`);
}
export function resolveEmbedModelForCli(): string {
return process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL_URI;
}
async function vectorIndex(
model: string = DEFAULT_EMBED_MODEL_URI,
model: string = resolveEmbedModelForCli(),
force: boolean = false,
batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy },
batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy; collection?: string },
): Promise<void> {
const storeInstance = getStore();
const db = storeInstance.db;
@ -1694,7 +1696,7 @@ async function vectorIndex(
}
// Check if there's work to do before starting
const hashesToEmbed = getHashesNeedingEmbedding(db);
const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection);
if (hashesToEmbed === 0 && !force) {
console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
closeDb();
@ -1715,6 +1717,7 @@ async function vectorIndex(
const result = await generateEmbeddings(storeInstance, {
force,
model,
collection: batchOptions?.collection,
maxDocsPerBatch: batchOptions?.maxDocsPerBatch,
maxBatchBytes: batchOptions?.maxBatchBytes,
chunkStrategy: batchOptions?.chunkStrategy,
@ -2727,7 +2730,7 @@ function showHelp(): void {
console.log("Maintenance:");
console.log(" qmd status - View index + collection health");
console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
console.log(" qmd embed [-f] - Generate/refresh vector embeddings");
console.log(" qmd embed [-f] [-c <name>] - Generate/refresh vector embeddings");
console.log(" --max-docs-per-batch <n> - Cap docs loaded into memory per embedding batch");
console.log(" --max-batch-mb <n> - Cap UTF-8 MB loaded into memory per embedding batch");
console.log(" qmd cleanup - Clear caches, vacuum DB");
@ -3120,10 +3123,17 @@ if (isMain) {
const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
// Validate -c against configured collections before dispatching, so a
// typo errors with "Collection not found: X" instead of silently
// reporting success because no pending docs match a nonexistent name.
// embed operates on a single collection; only the first value is used.
const embedValidatedCollections = resolveCollectionFilter(cli.opts.collection, false);
const embedCollection = embedValidatedCollections[0];
await vectorIndex(resolveEmbedModelForCli(), !!cli.values.force, {
maxDocsPerBatch,
maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
chunkStrategy: embedChunkStrategy,
collection: embedCollection,
});
} catch (error) {
console.error(error instanceof Error ? error.message : String(error));
@ -3247,9 +3257,10 @@ if (isMain) {
const logPath = resolve(cacheDir, "mcp.log");
const logFd = openSync(logPath, "w"); // truncate — fresh log per daemon run
const selfPath = fileURLToPath(import.meta.url);
const indexArgs = cli.values.index ? ["--index", String(cli.values.index)] : [];
const spawnArgs = selfPath.endsWith(".ts")
? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, "mcp", "--http", "--port", String(port)]
: [selfPath, "mcp", "--http", "--port", String(port)];
? ["--import", pathJoin(dirname(selfPath), "..", "..", "node_modules", "tsx", "dist", "esm", "index.mjs"), selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)]
: [selfPath, ...indexArgs, "mcp", "--http", "--port", String(port)];
const child = nodeSpawn(process.execPath, spawnArgs, {
stdio: ["ignore", logFd, logFd],
detached: true,
@ -3269,7 +3280,7 @@ if (isMain) {
process.removeAllListeners("SIGINT");
const { startMcpHttpServer } = await import("../mcp/server.js");
try {
await startMcpHttpServer(port);
await startMcpHttpServer(port, { dbPath: getDbPath() });
} catch (e: any) {
if (e?.code === "EADDRINUSE") {
console.error(`Port ${port} already in use. Try a different port with --port.`);
@ -3280,7 +3291,7 @@ if (isMain) {
} else {
// Default: stdio transport
const { startMcpServer } = await import("../mcp/server.js");
await startMcpServer();
await startMcpServer({ dbPath: getDbPath() });
}
break;
}

View File

@ -6,8 +6,8 @@
*/
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
import { join, dirname } from "path";
import { homedir } from "os";
import { join, dirname, resolve } from "path";
import { qmdHomedir } from "./paths.js";
import YAML from "yaml";
// ============================================================================
@ -101,9 +101,7 @@ export function setConfigSource(source?: { configPath?: string; config?: Collect
export function setConfigIndexName(name: string): void {
// Resolve relative paths to absolute paths and sanitize for use as filename
if (name.includes('/')) {
const { resolve } = require('path');
const { cwd } = require('process');
const absolutePath = resolve(cwd(), name);
const absolutePath = resolve(process.cwd(), name);
// Replace path separators with underscores to create a valid filename
currentIndexName = absolutePath.replace(/\//g, '_').replace(/^_/, '');
} else {
@ -120,7 +118,7 @@ function getConfigDir(): string {
if (process.env.XDG_CONFIG_HOME) {
return join(process.env.XDG_CONFIG_HOME, "qmd");
}
return join(homedir(), ".config", "qmd");
return join(qmdHomedir(), ".config", "qmd");
}
function getConfigFilePath(): string {

View File

@ -290,6 +290,8 @@ export interface QMDStore {
embed(options?: {
force?: boolean;
model?: string;
/** Restrict embedding to documents in one collection. */
collection?: string;
maxDocsPerBatch?: number;
maxBatchBytes?: number;
chunkStrategy?: ChunkStrategy;
@ -516,6 +518,7 @@ export async function createStore(options: StoreOptions): Promise<QMDStore> {
return generateEmbeddings(internal, {
force: embedOpts?.force,
model: embedOpts?.model,
collection: embedOpts?.collection,
maxDocsPerBatch: embedOpts?.maxDocsPerBatch,
maxBatchBytes: embedOpts?.maxBatchBytes,
chunkStrategy: embedOpts?.chunkStrategy,

View File

@ -4,16 +4,28 @@
* Provides embeddings, text generation, and reranking using local GGUF models.
*/
import {
getLlama,
resolveModelFile,
LlamaChatSession,
LlamaLogLevel,
type Llama,
type LlamaModel,
type LlamaEmbeddingContext,
type Token as LlamaToken,
import type {
Llama,
LlamaModel,
LlamaEmbeddingContext,
Token as LlamaToken,
} from "node-llama-cpp";
type NodeLlamaCppModule = {
getLlama: (options: Record<string, unknown>) => Promise<Llama>;
resolveModelFile: (model: string, cacheDir: string) => Promise<string>;
LlamaChatSession: new (options: { contextSequence: unknown }) => {
prompt: (prompt: string, options?: Record<string, unknown>) => Promise<string>;
};
LlamaLogLevel: { error: unknown };
};
let nodeLlamaCppImport: Promise<NodeLlamaCppModule> | null = null;
async function loadNodeLlamaCpp(): Promise<NodeLlamaCppModule> {
nodeLlamaCppImport ??= import("node-llama-cpp") as Promise<NodeLlamaCppModule>;
return nodeLlamaCppImport;
}
import { homedir } from "os";
import { join } from "path";
import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync, writeFileSync, openSync, readSync, closeSync } from "fs";
@ -344,6 +356,7 @@ export async function pullModels(
}
}
const { resolveModelFile } = await loadNodeLlamaCpp();
const path = await resolveModelFile(model, cacheDir);
validateGgufFile(path, model);
const sizeBytes = existsSync(path) ? statSync(path).size : 0;
@ -438,7 +451,41 @@ export type LlamaCppConfig = {
const DEFAULT_INACTIVITY_TIMEOUT_MS = 5 * 60 * 1000;
const DEFAULT_EXPAND_CONTEXT_SIZE = 2048;
type LlamaGpuMode = "auto" | "metal" | "vulkan" | "cuda" | false;
export type LlamaGpuMode = "auto" | "metal" | "vulkan" | "cuda" | false;
type ParallelismOptions = {
gpu: string | false;
platform?: NodeJS.Platform;
computed: number;
envValue?: string;
};
export function resolveParallelismOverride(envValue = process.env.QMD_EMBED_PARALLELISM): number | undefined {
const normalized = envValue?.trim() ?? "";
if (!normalized) return undefined;
const parsed = Number(normalized);
if (!Number.isInteger(parsed) || parsed < 1) {
process.stderr.write(`QMD Warning: invalid QMD_EMBED_PARALLELISM="${envValue}", using automatic parallelism.\n`);
return undefined;
}
return Math.min(8, parsed);
}
export function resolveSafeParallelism(options: ParallelismOptions): number {
const override = resolveParallelismOverride(options.envValue);
if (override !== undefined) return override;
// node-llama-cpp/llama.cpp CUDA on Windows is unstable with multiple
// simultaneous contexts (ggml-cuda.cu:98 in #519). Vulkan and CPU do not
// show the same failure mode, so only serialize Windows CUDA by default.
if ((options.platform ?? process.platform) === "win32" && options.gpu === "cuda") {
return 1;
}
return Math.max(1, options.computed);
}
export function resolveLlamaGpuMode(envValue = process.env.QMD_LLAMA_GPU): LlamaGpuMode {
const normalized = envValue?.trim().toLowerCase() ?? "";
@ -619,6 +666,7 @@ export class LlamaCpp implements LLM {
if (!this.llama) {
const gpuMode = resolveLlamaGpuMode();
const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
const loadLlama = async (gpu: LlamaGpuMode) =>
await getLlama({
build: allowBuild ? "autoAttempt" : "never",
@ -661,6 +709,7 @@ export class LlamaCpp implements LLM {
private async resolveModel(modelUri: string): Promise<string> {
this.ensureModelCacheDir();
// resolveModelFile handles HF URIs and downloads to the cache dir
const { resolveModelFile } = await loadNodeLlamaCpp();
const modelPath = await resolveModelFile(modelUri, this.modelCacheDir);
validateGgufFile(modelPath, modelUri);
return modelPath;
@ -711,16 +760,18 @@ export class LlamaCpp implements LLM {
const vram = await llama.getVramState();
const freeMB = vram.free / (1024 * 1024);
const maxByVram = Math.floor((freeMB * 0.25) / perContextMB);
return Math.max(1, Math.min(8, maxByVram));
const computed = Math.max(1, Math.min(8, maxByVram));
return resolveSafeParallelism({ gpu: llama.gpu, computed });
} catch {
return 2;
return resolveSafeParallelism({ gpu: llama.gpu, computed: 2 });
}
}
// CPU: split cores across contexts. At least 4 threads per context.
const cores = llama.cpuMathCores || 4;
const maxContexts = Math.floor(cores / 4);
return Math.max(1, Math.min(4, maxContexts));
const computed = Math.max(1, Math.min(4, maxContexts));
return resolveSafeParallelism({ gpu: false, computed });
}
/**
@ -1079,6 +1130,7 @@ export class LlamaCpp implements LLM {
// Create fresh context -> sequence -> session for each call
const context = await this.generateModel!.createContext();
const sequence = context.getSequence();
const { LlamaChatSession } = await loadNodeLlamaCpp();
const session = new LlamaChatSession({ contextSequence: sequence });
const maxTokens = options.maxTokens ?? 150;
@ -1158,6 +1210,7 @@ export class LlamaCpp implements LLM {
contextSize: this.expandContextSize,
});
const sequence = genContext.getSequence();
const { LlamaChatSession } = await loadNodeLlamaCpp();
const session = new LlamaChatSession({ contextSequence: sequence });
try {

View File

@ -538,7 +538,11 @@ Intent-aware lex (C++ performance, not sports):
// Transport: stdio (default)
// =============================================================================
export async function startMcpServer(): Promise<void> {
export type McpStartupOptions = {
dbPath?: string;
};
export async function startMcpServer(options: McpStartupOptions = {}): Promise<void> {
// Opt into production mode when the MCP server is actually started, not
// when this module is merely imported for its exports. Importing the module
// at the top level flipped the global production flag and broke test
@ -547,7 +551,7 @@ export async function startMcpServer(): Promise<void> {
enableProductionMode();
const configPath = getConfigPath();
const store = await createStore({
dbPath: getDefaultDbPath(),
dbPath: options.dbPath ?? getDefaultDbPath(),
...(existsSync(configPath) ? { configPath } : {}),
});
const server = await createMcpServer(store);
@ -569,14 +573,17 @@ export type HttpServerHandle = {
* Start MCP server over Streamable HTTP (JSON responses, no SSE).
* Binds to localhost only. Returns a handle for shutdown and port discovery.
*/
export async function startMcpHttpServer(port: number, options?: { quiet?: boolean }): Promise<HttpServerHandle> {
export async function startMcpHttpServer(
port: number,
options: ({ quiet?: boolean } & McpStartupOptions) = {},
): Promise<HttpServerHandle> {
// See startMcpServer() for the rationale — flip production mode here so the
// HTTP transport resolves the real database path, without leaking state into
// callers that only import this module for its exports (e.g. tests).
enableProductionMode();
const configPath = getConfigPath();
const store = await createStore({
dbPath: getDefaultDbPath(),
dbPath: options.dbPath ?? getDefaultDbPath(),
...(existsSync(configPath) ? { configPath } : {}),
});
@ -686,6 +693,7 @@ export async function startMcpHttpServer(port: number, options?: { quiet?: boole
limit: params.limit ?? 10,
minScore: params.minScore ?? 0,
intent: params.intent,
rerank: params.rerank,
});
// Use first lex or vec query for snippet extraction

5
src/paths.ts Normal file
View File

@ -0,0 +1,5 @@
import { homedir as osHomedir } from "node:os";
export function qmdHomedir(): string {
return process.env.HOME || process.env.USERPROFILE || osHomedir() || "/tmp";
}

View File

@ -18,6 +18,7 @@ import { createHash } from "crypto";
import { readFileSync, realpathSync, statSync, mkdirSync } from "node:fs";
// Note: node:path resolve is not imported — we export our own cross-platform resolve()
import fastGlob from "fast-glob";
import { qmdHomedir } from "./paths.js";
import {
LlamaCpp,
getDefaultLlamaCpp,
@ -38,7 +39,6 @@ import type {
// Configuration
// =============================================================================
const HOME = process.env.HOME || process.env.USERPROFILE || "/tmp";
export const DEFAULT_EMBED_MODEL = "embeddinggemma";
export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
@ -334,7 +334,7 @@ export type ExpandedQuery = {
// =============================================================================
export function homedir(): string {
return HOME;
return qmdHomedir();
}
/**
@ -733,6 +733,73 @@ export function verifySqliteVecLoaded(db: Database): void {
let _sqliteVecAvailable: boolean | null = null;
const CJK_CHAR_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
const CJK_RUN_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]+/gu;
const FTS_CJK_NORMALIZED_VERSION = "1";
/**
* FTS5's unicode61 tokenizer does not segment CJK text into searchable words.
* Normalize CJK runs by spacing every character so exact CJK queries can be
* translated into phrase queries while Latin text keeps the default tokenizer.
*/
export function normalizeCjkForFTS(text: string): string {
return text.replace(CJK_RUN_PATTERN, run => ` ${Array.from(run).join(' ')} `);
}
function containsCjk(text: string): boolean {
return CJK_CHAR_PATTERN.test(text);
}
function sanitizeFTS5Phrase(phrase: string): string {
return normalizeCjkForFTS(phrase)
.split(/\s+/)
.map(t => sanitizeFTS5Term(t))
.filter(t => t)
.join(' ');
}
function rebuildFTSForCjkNormalization(db: Database): void {
const version = db.prepare(`SELECT value FROM store_config WHERE key = 'fts_cjk_normalized_version'`).get() as { value?: string } | undefined;
if (version?.value === FTS_CJK_NORMALIZED_VERSION) return;
try {
db.exec(`DELETE FROM documents_fts WHERE rowid >= 0`);
} catch {
// Some older/corrupt FTS5 shadow-table states can reject bulk deletes even
// though reads still work. Recreate the virtual table; documents_fts is a
// derived index, so rebuilding it from documents/content is safe.
db.exec(`DROP TABLE IF EXISTS documents_fts`);
db.exec(`
CREATE VIRTUAL TABLE documents_fts USING fts5(
filepath, title, body,
tokenize='porter unicode61'
)
`);
}
const rows = db.prepare(`
SELECT d.id, d.collection, d.path, d.title, content.doc as body
FROM documents d
JOIN content ON content.hash = d.hash
WHERE d.active = 1
`).all() as { id: number; collection: string; path: string; title: string; body: string }[];
const insert = db.prepare(`INSERT INTO documents_fts(rowid, filepath, title, body) VALUES (?, ?, ?, ?)`);
const rebuild = db.transaction(() => {
for (const row of rows) {
insert.run(
row.id,
normalizeCjkForFTS(`${row.collection}/${row.path}`),
normalizeCjkForFTS(row.title),
normalizeCjkForFTS(row.body)
);
}
});
rebuild();
db.prepare(`
INSERT OR REPLACE INTO store_config(key, value)
VALUES ('fts_cjk_normalized_version', ?)
`).run(FTS_CJK_NORMALIZED_VERSION);
}
function initializeDatabase(db: Database): void {
try {
loadSqliteVec(db);
@ -838,9 +905,12 @@ function initializeDatabase(db: Database): void {
)
`);
// Triggers to keep FTS in sync
// Triggers keep FTS in sync for callers that write directly to documents.
// Production indexing paths rebuild entries in TypeScript so CJK text can be
// normalized before it reaches the unicode61 tokenizer.
db.exec(`DROP TRIGGER IF EXISTS documents_ai`);
db.exec(`
CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents
CREATE TRIGGER documents_ai AFTER INSERT ON documents
WHEN new.active = 1
BEGIN
INSERT INTO documents_fts(rowid, filepath, title, body)
@ -853,14 +923,16 @@ function initializeDatabase(db: Database): void {
END
`);
db.exec(`DROP TRIGGER IF EXISTS documents_ad`);
db.exec(`
CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN
DELETE FROM documents_fts WHERE rowid = old.id;
END
`);
db.exec(`DROP TRIGGER IF EXISTS documents_au`);
db.exec(`
CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents
CREATE TRIGGER documents_au AFTER UPDATE ON documents
BEGIN
-- Delete from FTS if no longer active
DELETE FROM documents_fts WHERE rowid = old.id AND new.active = 0;
@ -875,6 +947,8 @@ function initializeDatabase(db: Database): void {
WHERE new.active = 1;
END
`);
rebuildFTSForCjkNormalization(db);
}
// =============================================================================
@ -1300,6 +1374,11 @@ export type EmbedResult = {
export type EmbedOptions = {
force?: boolean;
model?: string;
/**
* Restrict embedding to documents in a single collection.
* When omitted, all pending documents across every collection are embedded.
*/
collection?: string;
maxDocsPerBatch?: number;
maxBatchBytes?: number;
chunkStrategy?: ChunkStrategy;
@ -1341,16 +1420,18 @@ function resolveEmbedOptions(options?: EmbedOptions): Required<Pick<EmbedOptions
};
}
function getPendingEmbeddingDocs(db: Database): PendingEmbeddingDoc[] {
return db.prepare(`
function getPendingEmbeddingDocs(db: Database, collection?: string): PendingEmbeddingDoc[] {
const collectionFilter = collection ? `AND d.collection = ?` : ``;
const stmt = db.prepare(`
SELECT d.hash, MIN(d.path) as path, length(CAST(c.doc AS BLOB)) as bytes
FROM documents d
JOIN content c ON d.hash = c.hash
LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
WHERE d.active = 1 AND v.hash IS NULL
WHERE d.active = 1 AND v.hash IS NULL ${collectionFilter}
GROUP BY d.hash
ORDER BY MIN(d.path)
`).all() as PendingEmbeddingDoc[];
`);
return (collection ? stmt.all(collection) : stmt.all()) as PendingEmbeddingDoc[];
}
function buildEmbeddingBatches(
@ -1411,16 +1492,17 @@ export async function generateEmbeddings(
options?: EmbedOptions
): Promise<EmbedResult> {
const db = store.db;
const model = options?.model ?? DEFAULT_EMBED_MODEL;
const llm = getLlm(store);
const model = options?.model ?? llm.embedModelName ?? DEFAULT_EMBED_MODEL;
const now = new Date().toISOString();
const { maxDocsPerBatch, maxBatchBytes } = resolveEmbedOptions(options);
const encoder = new TextEncoder();
if (options?.force) {
clearAllEmbeddings(db);
clearAllEmbeddings(db, options?.collection);
}
const docsToEmbed = getPendingEmbeddingDocs(db);
const docsToEmbed = getPendingEmbeddingDocs(db, options?.collection);
if (docsToEmbed.length === 0) {
return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
@ -1430,8 +1512,7 @@ export async function generateEmbeddings(
const startTime = Date.now();
// Use store's LlamaCpp or global singleton, wrapped in a session
const llm = getLlm(store);
const embedModelUri = llm.embedModelName;
const embedModelUri = model;
// Create a session manager for this llm instance
const result = await withLLMSessionForLlm(llm, async (session) => {
@ -1868,13 +1949,15 @@ export type IndexStatus = {
// Index health
// =============================================================================
export function getHashesNeedingEmbedding(db: Database): number {
const result = db.prepare(`
export function getHashesNeedingEmbedding(db: Database, collection?: string): number {
const collectionFilter = collection ? `AND d.collection = ?` : ``;
const stmt = db.prepare(`
SELECT COUNT(DISTINCT d.hash) as count
FROM documents d
LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
WHERE d.active = 1 AND v.hash IS NULL
`).get() as { count: number };
WHERE d.active = 1 AND v.hash IS NULL ${collectionFilter}
`);
const result = (collection ? stmt.get(collection) : stmt.get()) as { count: number };
return result.count;
}
@ -1949,13 +2032,15 @@ export function deleteInactiveDocuments(db: Database): number {
}
/**
* Remove orphaned content hashes that are not referenced by any active document.
* Remove orphaned content hashes that are not referenced by any document.
* Inactive documents are soft-deleted tombstones, so their content rows must
* remain referenced until deleteInactiveDocuments() hard-deletes them.
* Returns the number of orphaned content hashes deleted.
*/
export function cleanupOrphanedContent(db: Database): number {
const result = db.prepare(`
DELETE FROM content
WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
WHERE hash NOT IN (SELECT DISTINCT hash FROM documents)
`).run();
return result.changes;
}
@ -2077,6 +2162,28 @@ export function insertContent(db: Database, hash: string, content: string, creat
.run(hash, content, createdAt);
}
function rebuildDocumentFTS(db: Database, documentId: number): void {
const row = db.prepare(`
SELECT d.id, d.collection, d.path, d.title, content.doc as body
FROM documents d
JOIN content ON content.hash = d.hash
WHERE d.id = ? AND d.active = 1
`).get(documentId) as { id: number; collection: string; path: string; title: string; body: string } | undefined;
db.prepare(`DELETE FROM documents_fts WHERE rowid = ?`).run(documentId);
if (!row) return;
db.prepare(`
INSERT INTO documents_fts(rowid, filepath, title, body)
VALUES (?, ?, ?, ?)
`).run(
row.id,
normalizeCjkForFTS(`${row.collection}/${row.path}`),
normalizeCjkForFTS(row.title),
normalizeCjkForFTS(row.body)
);
}
/**
* Insert a new document into the documents table.
*/
@ -2098,6 +2205,9 @@ export function insertDocument(
modified_at = excluded.modified_at,
active = 1
`).run(collectionName, path, title, hash, createdAt, modifiedAt);
const row = db.prepare(`SELECT id FROM documents WHERE collection = ? AND path = ?`).get(collectionName, path) as { id: number } | undefined;
if (row) rebuildDocumentFTS(db, row.id);
}
/**
@ -2116,8 +2226,8 @@ export function findActiveDocument(
}
/**
* Find an active document, falling back to a legacy lowercase path.
* If found under the legacy path, renames it in-place and rebuilds the
* Find an active document, falling back to a case-insensitive path match.
* If found under a different casing, renames it in-place and rebuilds the
* FTS entry. Embeddings are keyed by content hash, so the rename is
* safe no re-embedding required.
*
@ -2132,10 +2242,12 @@ export function findOrMigrateLegacyDocument(
const existing = findActiveDocument(db, collectionName, path);
if (existing) return existing;
const legacyPath = path.toLowerCase();
if (legacyPath === path) return null;
const legacy = findActiveDocument(db, collectionName, legacyPath);
const legacy = db.prepare(`
SELECT id, hash, title FROM documents
WHERE collection = ? AND path COLLATE NOCASE = ? AND active = 1
ORDER BY id
LIMIT 1
`).get(collectionName, path) as { id: number; hash: string; title: string } | undefined;
if (!legacy) return null;
// Wrap rename + FTS rebuild in a transaction for atomicity.
@ -2148,15 +2260,7 @@ export function findOrMigrateLegacyDocument(
if (result.changes === 0) return false;
// FTS5 does not reliably update via the documents_au trigger's
// INSERT OR REPLACE. Manually rebuild the FTS entry.
db.prepare(`DELETE FROM documents_fts WHERE rowid = ?`).run(legacy.id);
db.prepare(`
INSERT INTO documents_fts(rowid, filepath, title, body)
SELECT id, collection || '/' || path, title,
(SELECT doc FROM content WHERE hash = documents.hash)
FROM documents WHERE id = ?
`).run(legacy.id);
rebuildDocumentFTS(db, legacy.id);
return true;
});
@ -2177,6 +2281,7 @@ export function updateDocumentTitle(
): void {
db.prepare(`UPDATE documents SET title = ?, modified_at = ? WHERE id = ?`)
.run(title, modifiedAt, documentId);
rebuildDocumentFTS(db, documentId);
}
/**
@ -2192,6 +2297,7 @@ export function updateDocument(
): void {
db.prepare(`UPDATE documents SET title = ?, hash = ?, modified_at = ? WHERE id = ?`)
.run(title, hash, modifiedAt, documentId);
rebuildDocumentFTS(db, documentId);
}
/**
@ -2940,7 +3046,7 @@ function buildFTS5Query(query: string): string | null {
const phrase = s.slice(start, i).trim();
i++; // skip closing quote
if (phrase.length > 0) {
const sanitized = phrase.split(/\s+/).map(t => sanitizeFTS5Term(t)).filter(t => t).join(' ');
const sanitized = sanitizeFTS5Phrase(phrase);
if (sanitized) {
const ftsPhrase = `"${sanitized}"`; // Exact phrase, no prefix match
if (negated) {
@ -2968,6 +3074,16 @@ function buildFTS5Query(query: string): string | null {
positive.push(ftsPhrase);
}
}
} else if (containsCjk(term)) {
const sanitized = sanitizeFTS5Phrase(term);
if (sanitized) {
const ftsPhrase = `"${sanitized}"`; // CJK phrase over character tokens
if (negated) {
negative.push(ftsPhrase);
} else {
positive.push(ftsPhrase);
}
}
} else {
const sanitized = sanitizeFTS5Term(term);
if (sanitized) {
@ -3212,12 +3328,68 @@ export function getHashesForEmbedding(db: Database): { hash: string; body: strin
}
/**
* Clear all embeddings from the database (force re-index).
* Deletes all rows from content_vectors and drops the vectors_vec table.
* Clear embeddings for the whole index, or just for one collection.
*
* When `collection` is omitted the entire content_vectors table is emptied and
* the vectors_vec virtual table is dropped (it is recreated with the right
* dimensions on the next embed run).
*
* When `collection` is provided, only vectors whose hash is referenced
* exclusively by active documents in that collection are removed. Hashes
* shared with active documents in other collections are left in place so
* vector search keeps working there (content_vectors is keyed globally by
* content hash; identical document bodies across collections share a row).
* vectors_vec is preserved so other collections keep working unless the scoped
* clear empties content_vectors entirely, in which case it is dropped so the
* next embed can recreate the table with the current dimensions.
*/
export function clearAllEmbeddings(db: Database): void {
db.exec(`DELETE FROM content_vectors`);
db.exec(`DROP TABLE IF EXISTS vectors_vec`);
export function clearAllEmbeddings(db: Database, collection?: string): void {
if (!collection) {
db.exec(`DELETE FROM content_vectors`);
db.exec(`DROP TABLE IF EXISTS vectors_vec`);
return;
}
const exclusiveHashesQuery = `
SELECT DISTINCT d.hash
FROM documents d
WHERE d.collection = ? AND d.active = 1
AND NOT EXISTS (
SELECT 1 FROM documents d2
WHERE d2.hash = d.hash
AND d2.active = 1
AND d2.collection != d.collection
)
`;
const vecTableExists = db
.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='vectors_vec'`)
.get();
if (vecTableExists) {
const hashSeqRows = db.prepare(`
SELECT cv.hash, cv.seq
FROM content_vectors cv
WHERE cv.hash IN (${exclusiveHashesQuery})
`).all(collection) as { hash: string; seq: number }[];
const delVec = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
for (const row of hashSeqRows) {
delVec.run(`${row.hash}_${row.seq}`);
}
}
db.prepare(`
DELETE FROM content_vectors
WHERE hash IN (${exclusiveHashesQuery})
`).run(collection);
const remaining = db
.prepare(`SELECT COUNT(*) AS n FROM content_vectors`)
.get() as { n: number };
if (remaining.n === 0) {
db.exec(`DROP TABLE IF EXISTS vectors_vec`);
}
}
/**
@ -3988,6 +4160,21 @@ export type RankedListMeta = {
query: string;
};
/**
* RRF list weights for hybridQuery.
*
* Original-query retrieval paths are the primary evidence and get 2x weight:
* - original FTS
* - original vector search
*
* Expansion-derived lists (lex/vec/hyde) stay at 1x regardless of list order,
* so a lex expansion inserted before original vector search cannot steal the
* original vector boost.
*/
export function getHybridRrfWeights(rankedListMeta: RankedListMeta[]): number[] {
return rankedListMeta.map(meta => meta.queryType === "original" ? 2.0 : 1.0);
}
/**
* Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
*
@ -4089,7 +4276,8 @@ export async function hybridQuery(
// Batch embed all vector queries in a single call
const llm = getLlm(store);
const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, llm.embedModelName));
const embedModel = llm.embedModelName;
const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, embedModel));
hooks?.onEmbedStart?.(textsToEmbed.length);
const embedStart = Date.now();
const embeddings = await llm.embedBatch(textsToEmbed);
@ -4101,7 +4289,7 @@ export async function hybridQuery(
if (!embedding) continue;
const vecResults = await store.searchVec(
vecQueries[i]!.text, DEFAULT_EMBED_MODEL, 20, collection,
vecQueries[i]!.text, embedModel, 20, collection,
undefined, embedding
);
if (vecResults.length > 0) {
@ -4119,8 +4307,9 @@ export async function hybridQuery(
}
}
// Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight
const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
// Step 4: RRF fusion — original-query FTS and vector lists get 2x weight;
// expansion-derived lists stay at 1x independent of insertion order.
const weights = getHybridRrfWeights(rankedListMeta);
const fused = reciprocalRankFusion(rankedLists, weights);
const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null;
const candidates = fused.slice(0, candidateLimit);
@ -4331,10 +4520,11 @@ export async function vectorSearchQuery(
options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
// Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
const embedModel = getLlm(store).embedModelName;
const queryTexts = [query, ...vecExpanded.map(q => q.query)];
const allResults = new Map<string, VectorSearchResult>();
for (const q of queryTexts) {
const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit, collection);
const vecResults = await store.searchVec(q, embedModel, limit, collection);
for (const r of vecResults) {
const existing = allResults.get(r.filepath);
if (!existing || r.score > existing.score) {
@ -4472,7 +4662,8 @@ export async function structuredSearch(
);
if (vecSearches.length > 0) {
const llm = getLlm(store);
const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, llm.embedModelName));
const embedModel = llm.embedModelName;
const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, embedModel));
hooks?.onEmbedStart?.(textsToEmbed.length);
const embedStart = Date.now();
const embeddings = await llm.embedBatch(textsToEmbed);
@ -4484,7 +4675,7 @@ export async function structuredSearch(
for (const coll of collectionList) {
const vecResults = await store.searchVec(
vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, coll,
vecSearches[i]!.query, embedModel, 20, coll,
undefined, embedding
);
if (vecResults.length > 0) {

View File

@ -6,7 +6,7 @@
*/
import { describe, test, expect } from "vitest";
import { detectLanguage, getASTBreakPoints, extractSymbols } from "../src/ast.js";
import { detectLanguage, getASTBreakPoints, extractSymbols, formatGrammarLoadError } from "../src/ast.js";
import type { SupportedLanguage } from "../src/ast.js";
// =============================================================================
@ -315,6 +315,16 @@ describe("getASTBreakPoints - error handling", () => {
// Should either return some partial break points or empty array — not throw
expect(Array.isArray(points)).toBe(true);
});
test("explains missing grammar packages with a repair command", () => {
const msg = formatGrammarLoadError(
"typescript",
new Error("Cannot find module 'tree-sitter-typescript/tree-sitter-typescript.wasm'"),
);
expect(msg).toContain("tree-sitter-typescript");
expect(msg).toContain("bun add tree-sitter-typescript@0.23.2");
expect(msg).toContain("falling back to regex");
});
});
// =============================================================================

View File

@ -0,0 +1,20 @@
import { describe, expect, test } from "vitest";
import { readFileSync } from "fs";
import { join } from "path";
describe("LLM module loading", () => {
test("node-llama-cpp is only dynamically imported by LLM operations", () => {
const source = readFileSync(join(process.cwd(), "src", "llm.ts"), "utf-8");
expect(source).not.toMatch(/import\s+(?!type\b)[\s\S]*?from\s+["']node-llama-cpp["']/);
expect(source).toContain('import("node-llama-cpp")');
});
test("importing the CLI for lightweight commands succeeds", async () => {
const mod = await import("../src/cli/qmd.ts");
expect(mod).toMatchObject({
buildEditorUri: expect.any(Function),
termLink: expect.any(Function),
});
});
});

View File

@ -6,14 +6,15 @@
*/
import { describe, test, expect, beforeAll, afterAll, beforeEach } from "vitest";
import { mkdtemp, rm, writeFile, mkdir } from "fs/promises";
import { chmod, copyFile, mkdtemp, rm, writeFile, mkdir } from "fs/promises";
import { existsSync, lstatSync, readFileSync, symlinkSync, writeFileSync, unlinkSync } from "fs";
import { tmpdir } from "os";
import { join, dirname } from "path";
import { fileURLToPath } from "url";
import { spawn } from "child_process";
import { setTimeout as sleep } from "timers/promises";
import { buildEditorUri, termLink } from "../src/cli/qmd.ts";
import { buildEditorUri, termLink, resolveEmbedModelForCli } from "../src/cli/qmd.ts";
import { DEFAULT_EMBED_MODEL_URI } from "../src/llm.ts";
// Test fixtures directory and database path
let testDir: string;
@ -243,6 +244,30 @@ describe("CLI Help", () => {
});
describe("CLI Embed", () => {
test("prefers QMD_EMBED_MODEL for qmd embed", () => {
const prev = process.env.QMD_EMBED_MODEL;
process.env.QMD_EMBED_MODEL = "hf:env/embed-model.gguf";
try {
expect(resolveEmbedModelForCli()).toBe("hf:env/embed-model.gguf");
} finally {
if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
else process.env.QMD_EMBED_MODEL = prev;
}
});
test("falls back to the default embed model when QMD_EMBED_MODEL is unset", () => {
const prev = process.env.QMD_EMBED_MODEL;
delete process.env.QMD_EMBED_MODEL;
try {
expect(resolveEmbedModelForCli()).toBe(DEFAULT_EMBED_MODEL_URI);
} finally {
if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
else process.env.QMD_EMBED_MODEL = prev;
}
});
test("rejects invalid --max-docs-per-batch", async () => {
const { stderr, exitCode } = await runQmd(["embed", "--max-docs-per-batch", "0"]);
expect(exitCode).toBe(1);
@ -1403,13 +1428,18 @@ describe("mcp http daemon", () => {
}
/** Spawn a foreground HTTP server (non-blocking) and return the process */
function spawnHttpServer(port: number): import("child_process").ChildProcess {
const proc = spawn(tsxBin, [qmdScript, "mcp", "--http", "--port", String(port)], {
function spawnHttpServer(
port: number,
options: { args?: string[]; env?: Record<string, string> } = {},
): import("child_process").ChildProcess {
const proc = spawn(tsxBin, [qmdScript, ...(options.args ?? []), "mcp", "--http", "--port", String(port)], {
cwd: fixturesDir,
env: {
...process.env,
INDEX_PATH: daemonDbPath,
QMD_CONFIG_DIR: daemonConfigDir,
PWD: fixturesDir,
...options.env,
},
stdio: ["ignore", "pipe", "pipe"],
});
@ -1481,11 +1511,75 @@ describe("mcp http daemon", () => {
const body = await res.json();
expect(body.status).toBe("ok");
} finally {
const closed = new Promise(r => proc.once("close", r));
proc.kill("SIGTERM");
await new Promise(r => proc.on("close", r));
await closed;
}
});
test("foreground HTTP server honors --index when selecting the store", async () => {
const customIndex = "mcp-alt-index";
const customCacheDir = join(daemonTestDir, `cache-index-${Date.now()}-${Math.random().toString(16).slice(2)}`);
const customConfigDir = join(daemonTestDir, `config-index-${Date.now()}-${Math.random().toString(16).slice(2)}`);
await mkdir(customCacheDir, { recursive: true });
await mkdir(customConfigDir, { recursive: true });
const addResult = await runQmd(
["--index", customIndex, "collection", "add", fixturesDir, "--name", "mcp-fixtures"],
{
dbPath: daemonDbPath,
configDir: customConfigDir,
env: {
INDEX_PATH: "",
XDG_CACHE_HOME: customCacheDir,
},
},
);
expect(addResult.exitCode).toBe(0);
const updateResult = await runQmd(
["--index", customIndex, "update"],
{
dbPath: daemonDbPath,
configDir: customConfigDir,
env: {
INDEX_PATH: "",
XDG_CACHE_HOME: customCacheDir,
},
},
);
expect(updateResult.exitCode).toBe(0);
const port = randomPort();
const proc = spawnHttpServer(port, {
args: ["--index", customIndex],
env: {
INDEX_PATH: "",
XDG_CACHE_HOME: customCacheDir,
QMD_CONFIG_DIR: customConfigDir,
},
});
try {
const ready = await waitForServer(port);
expect(ready).toBe(true);
const res = await fetch(`http://localhost:${port}/query`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ searches: [{ type: "lex", query: "authentication" }], limit: 5, rerank: false }),
});
expect(res.status).toBe(200);
const body = await res.json();
const files = body.results.map((r: { file: string }) => r.file);
expect(files.some((file: string) => file.includes("mcp-fixtures/notes/meeting.md"))).toBe(true);
} finally {
const closed = new Promise(r => proc.once("close", r));
proc.kill("SIGTERM");
await closed;
}
}, 10000);
// -------------------------------------------------------------------------
// Daemon lifecycle
// -------------------------------------------------------------------------
@ -1601,3 +1695,67 @@ describe("mcp http daemon", () => {
try { unlinkSync(pidPath()); } catch {}
});
});
// =============================================================================
// MCP stdio stdout hygiene
// =============================================================================
describe("mcp stdio launcher", () => {
test("sets native llama/ggml quiet env before Node starts so stdout stays JSON-RPC only", async () => {
const tempPackage = await mkdtemp(join(tmpdir(), "qmd-bin-mcp-"));
try {
await mkdir(join(tempPackage, "bin"), { recursive: true });
await mkdir(join(tempPackage, "dist", "cli"), { recursive: true });
await mkdir(join(tempPackage, "fake-bin"), { recursive: true });
const qmdBin = join(tempPackage, "bin", "qmd");
await copyFile(join(projectRoot, "bin", "qmd"), qmdBin);
await chmod(qmdBin, 0o755);
// Force the wrapper down the Node branch, then put our fake `node` first
// in PATH. The fake node behaves like the native llama/ggml layer: it
// writes a non-JSON stdout line unless qmd pre-seeded the documented
// quiet env vars before launching JS.
await writeFile(join(tempPackage, "package-lock.json"), "{}\n");
const fakeNode = join(tempPackage, "fake-bin", "node");
await writeFile(fakeNode, `#!/bin/sh
if [ "\${GGML_BACKEND_SILENT:-}" != "1" ]; then
printf 'llama.cpp native log on stdout\\n'
fi
printf '{"jsonrpc":"2.0","id":1,"result":{"ok":true}}\\n'
`);
await chmod(fakeNode, 0o755);
const proc = spawn(qmdBin, ["mcp"], {
cwd: tempPackage,
env: {
...process.env,
PATH: `${join(tempPackage, "fake-bin")}:${process.env.PATH}`,
LLAMA_LOG_LEVEL: "",
GGML_LOG_LEVEL: "",
GGML_BACKEND_SILENT: "",
},
stdio: ["ignore", "pipe", "pipe"],
});
let stdout = "";
let stderr = "";
proc.stdout?.on("data", (chunk: Buffer) => { stdout += chunk.toString(); });
proc.stderr?.on("data", (chunk: Buffer) => { stderr += chunk.toString(); });
const exitCode = await new Promise<number>((resolve, reject) => {
proc.once("error", reject);
proc.on("close", (code) => resolve(code ?? 1));
});
expect(exitCode).toBe(0);
expect(stderr).toBe("");
const lines = stdout.trim().split("\n").filter(Boolean);
expect(lines.length).toBeGreaterThan(0);
for (const line of lines) {
expect(() => JSON.parse(line)).not.toThrow();
}
} finally {
await rm(tempPackage, { recursive: true, force: true });
}
});
});

View File

@ -7,7 +7,7 @@
import { describe, test, expect, beforeEach, afterEach } from "vitest";
import { join } from "path";
import { homedir } from "os";
import { qmdHomedir } from "../src/paths.js";
import { getConfigPath, setConfigIndexName } from "../src/collections.js";
// Save/restore env vars around each test
@ -15,6 +15,8 @@ let savedEnv: Record<string, string | undefined>;
beforeEach(() => {
savedEnv = {
HOME: process.env.HOME,
USERPROFILE: process.env.USERPROFILE,
QMD_CONFIG_DIR: process.env.QMD_CONFIG_DIR,
XDG_CONFIG_HOME: process.env.XDG_CONFIG_HOME,
};
@ -38,7 +40,16 @@ describe("getConfigDir via getConfigPath", () => {
test("defaults to ~/.config/qmd when no env vars are set", () => {
delete process.env.QMD_CONFIG_DIR;
delete process.env.XDG_CONFIG_HOME;
expect(getConfigPath()).toBe(join(homedir(), ".config", "qmd", "index.yml"));
expect(getConfigPath()).toBe(join(qmdHomedir(), ".config", "qmd", "index.yml"));
});
test("uses the same USERPROFILE fallback as default DB path when HOME is unset", () => {
delete process.env.HOME;
delete process.env.QMD_CONFIG_DIR;
delete process.env.XDG_CONFIG_HOME;
process.env.USERPROFILE = "/Users/windows-user";
expect(getConfigPath()).toBe(join("/Users/windows-user", ".config", "qmd", "index.yml"));
});
test("QMD_CONFIG_DIR takes highest priority", () => {

View File

@ -0,0 +1,27 @@
import { describe, expect, test } from "vitest";
import { execFileSync } from "child_process";
import { mkdtempSync } from "fs";
import { tmpdir } from "os";
import { dirname, join, resolve } from "path";
import { fileURLToPath } from "url";
const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..");
describe("Node ESM entrypoints", () => {
test("CLI --index path normalizes via setIndexName/setConfigIndexName under Node 22+", () => {
execFileSync("npm", ["run", "build"], {
cwd: repoRoot,
encoding: "utf-8",
stdio: "pipe",
});
const indexPath = join(mkdtempSync(join(tmpdir(), "qmd-index-")), "nested", "idx");
const output = execFileSync("node", ["dist/cli/qmd.js", "--index", indexPath, "--version"], {
cwd: repoRoot,
encoding: "utf-8",
stdio: "pipe",
});
expect(output).toContain("qmd ");
}, 120_000);
});

View File

@ -13,6 +13,8 @@ import {
getDefaultLlamaCpp,
disposeDefaultLlamaCpp,
resolveLlamaGpuMode,
resolveParallelismOverride,
resolveSafeParallelism,
withLLMSession,
canUnloadLLM,
SessionReleasedError,
@ -88,6 +90,44 @@ describe("QMD_LLAMA_GPU resolution", () => {
});
});
describe("LLM context parallelism safety", () => {
test("defaults Windows CUDA to one context to avoid ggml-cuda.cu:98 crashes", () => {
expect(resolveSafeParallelism({
gpu: "cuda",
platform: "win32",
computed: 8,
envValue: undefined,
})).toBe(1);
});
test("keeps non-Windows and non-CUDA backends on computed parallelism", () => {
expect(resolveSafeParallelism({ gpu: "cuda", platform: "linux", computed: 8 })).toBe(8);
expect(resolveSafeParallelism({ gpu: "vulkan", platform: "win32", computed: 8 })).toBe(8);
expect(resolveSafeParallelism({ gpu: false, platform: "win32", computed: 4 })).toBe(4);
});
test("QMD_EMBED_PARALLELISM overrides the Windows CUDA safety default", () => {
expect(resolveSafeParallelism({
gpu: "cuda",
platform: "win32",
computed: 8,
envValue: "2",
})).toBe(2);
});
test("QMD_EMBED_PARALLELISM clamps invalid values and warns", () => {
const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
try {
expect(resolveParallelismOverride("0")).toBeUndefined();
expect(resolveParallelismOverride("bad")).toBeUndefined();
expect(stderrSpy).toHaveBeenCalledTimes(2);
expect(String(stderrSpy.mock.calls[0]?.[0] || "")).toContain("QMD_EMBED_PARALLELISM");
} finally {
stderrSpy.mockRestore();
}
});
});
describe("LlamaCpp expand context size config", () => {
const defaultExpandContextSize = 2048;
@ -654,7 +694,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => {
for (const doc of result.results) {
console.log(` ${doc.file}: ${doc.score.toFixed(4)}`);
}
});
}, 30000);
});
describe("expandQuery", () => {

27
test/package.test.ts Normal file
View File

@ -0,0 +1,27 @@
import { describe, expect, test } from "vitest";
import { readFileSync } from "node:fs";
import { join } from "node:path";
const root = new URL("..", import.meta.url);
const pkg = JSON.parse(readFileSync(new URL("package.json", root), "utf8"));
describe("package grammar distribution", () => {
test("installs AST grammar wasm packages as required runtime dependencies", () => {
for (const dep of ["tree-sitter-typescript", "tree-sitter-python", "tree-sitter-go", "tree-sitter-rust"]) {
expect(pkg.dependencies, `${dep} should be a required dependency`).toHaveProperty(dep);
expect(pkg.optionalDependencies ?? {}, `${dep} should not be optional`).not.toHaveProperty(dep);
}
});
test("documents a packaging smoke check for grammar wasm availability", () => {
expect(pkg.scripts, "package.json scripts").toHaveProperty("smoke:package-grammars");
expect(String(pkg.scripts["smoke:package-grammars"])).toContain("check-package-grammars");
expect(pkg.files, "published package files").toContain("scripts/check-package-grammars.mjs");
const scriptPath = join(root.pathname, "scripts", "check-package-grammars.mjs");
const script = readFileSync(scriptPath, "utf8");
expect(script).toContain("tree-sitter-typescript/tree-sitter-typescript.wasm");
expect(script).toContain("tree-sitter-typescript/tree-sitter-tsx.wasm");
});
});

View File

@ -982,6 +982,92 @@ describe("embed", () => {
}
});
test("store.embed scopes pending documents to the requested collection", async () => {
const store = await createStore({
dbPath: freshDbPath(),
config: {
collections: {
docs: { path: docsDir, pattern: "**/*.md" },
notes: { path: notesDir, pattern: "**/*.md" },
},
},
});
const fakeLlm = createFakeEmbedLlm();
setDefaultLlamaCpp(createFakeTokenizer() as any);
store.internal.llm = fakeLlm as any;
try {
await store.update();
const result = await store.embed({ collection: "docs" });
const vectorCounts = store.internal.db.prepare(`
SELECT d.collection, COUNT(DISTINCT v.hash) AS count
FROM documents d
LEFT JOIN content_vectors v ON v.hash = d.hash AND v.seq = 0
WHERE d.active = 1
GROUP BY d.collection
ORDER BY d.collection
`).all() as Array<{ collection: string; count: number }>;
expect(result.docsProcessed).toBe(3);
expect(result.chunksEmbedded).toBe(3);
expect(vectorCounts).toEqual([
{ collection: "docs", count: 3 },
{ collection: "notes", count: 0 },
]);
} finally {
setDefaultLlamaCpp(null);
await store.close();
}
});
test("store.embed with force only clears the requested collection", async () => {
const store = await createStore({
dbPath: freshDbPath(),
config: {
collections: {
docs: { path: docsDir, pattern: "**/*.md" },
notes: { path: notesDir, pattern: "**/*.md" },
},
},
});
const fakeLlm = createFakeEmbedLlm();
setDefaultLlamaCpp(createFakeTokenizer() as any);
store.internal.llm = fakeLlm as any;
const vectorCounts = () => store.internal.db.prepare(`
SELECT d.collection, COUNT(DISTINCT v.hash) AS count
FROM documents d
LEFT JOIN content_vectors v ON v.hash = d.hash AND v.seq = 0
WHERE d.active = 1
GROUP BY d.collection
ORDER BY d.collection
`).all() as Array<{ collection: string; count: number }>;
try {
await store.update();
await store.embed();
expect(vectorCounts()).toEqual([
{ collection: "docs", count: 3 },
{ collection: "notes", count: 3 },
]);
const result = await store.embed({ force: true, collection: "docs" });
expect(result.docsProcessed).toBe(3);
expect(result.chunksEmbedded).toBe(3);
expect(vectorCounts()).toEqual([
{ collection: "docs", count: 3 },
{ collection: "notes", count: 3 },
]);
} finally {
setDefaultLlamaCpp(null);
await store.close();
}
});
test("store.embed rejects invalid batch limits", async () => {
const store = await createStore({
dbPath: freshDbPath(),

View File

@ -9,7 +9,7 @@
import { describe, test, expect, beforeAll, afterAll, beforeEach, afterEach, vi } from "vitest";
import { openDatabase, loadSqliteVec } from "../src/db.js";
import type { Database } from "../src/db.js";
import { unlink, mkdtemp, rmdir, writeFile } from "node:fs/promises";
import { unlink, mkdtemp, rmdir, writeFile, rm, mkdir, rename } from "node:fs/promises";
import { tmpdir } from "node:os";
import { join } from "node:path";
import YAML from "yaml";
@ -46,13 +46,22 @@ import {
normalizeDocid,
isDocid,
syncConfigToDb,
reindexCollection,
STRONG_SIGNAL_MIN_SCORE,
STRONG_SIGNAL_MIN_GAP,
insertContent,
insertDocument,
generateEmbeddings,
getHybridRrfWeights,
_resetProductionModeForTesting,
hybridQuery,
structuredSearch,
vectorSearchQuery,
type Store,
type DocumentResult,
type SearchResult,
type RankedResult,
type RankedListMeta,
} from "../src/store.js";
import type { CollectionConfig } from "../src/collections.js";
@ -156,18 +165,18 @@ async function insertTestDocument(
const hash = opts.hash || await hashContent(body);
// Insert content (with OR IGNORE for deduplication)
db.prepare(`
INSERT OR IGNORE INTO content (hash, doc, created_at)
VALUES (?, ?, ?)
`).run(hash, body, now);
insertContent(db, hash, body, now);
// Insert document
const result = db.prepare(`
INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active)
VALUES (?, ?, ?, ?, ?, ?, ?)
`).run(collectionName, path, title, hash, now, now, active);
insertDocument(db, collectionName, path, title, hash, now, now);
const row = db.prepare(`
SELECT id FROM documents WHERE collection = ? AND path = ?
`).get(collectionName, path) as { id: number } | undefined;
return Number(result.lastInsertRowid);
if (active === 0 && row) {
db.prepare(`UPDATE documents SET active = 0 WHERE id = ?`).run(row.id);
}
return row?.id ?? 0;
}
/** Sync YAML config file to SQLite store_collections in the current test store */
@ -277,7 +286,9 @@ afterAll(async () => {
describe("Store Creation", () => {
test("createStore throws without explicit path in test mode", () => {
// In test mode, createStore without path should throw to prevent accidental writes
// In test mode, createStore without path should throw to prevent accidental writes.
// Other tests may enable production mode in the same Bun process, so reset first.
_resetProductionModeForTesting();
const originalIndexPath = process.env.INDEX_PATH;
delete process.env.INDEX_PATH;
@ -1250,6 +1261,61 @@ describe("FTS Search", () => {
await cleanupTestDb(store);
});
test("searchFTS finds CJK documents by exact and mixed queries", async () => {
const store = await createTestStore();
const collectionName = await createTestCollection();
await insertTestDocument(store.db, collectionName, {
name: "zh",
title: "中文检索说明",
body: "这里介绍 vector 数据库和关键词检索。",
displayPath: "cjk/zh.md",
});
await insertTestDocument(store.db, collectionName, {
name: "ja",
title: "日本語検索メモ",
body: "この文書は検索品質とトークン化について説明します。",
displayPath: "cjk/ja.md",
});
await insertTestDocument(store.db, collectionName, {
name: "ko",
title: "한국어 검색 노트",
body: "이 문서는 검색 품질과 토큰화 문제를 설명합니다.",
displayPath: "cjk/ko.md",
});
expect(store.searchFTS("关键词检索", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/zh.md`);
expect(store.searchFTS("検索品質", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/ja.md`);
expect(store.searchFTS("검색 품질", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/ko.md`);
expect(store.searchFTS("vector 关键词", 10).map(r => r.displayPath)).toContain(`${collectionName}/cjk/zh.md`);
await cleanupTestDb(store);
});
test("searchFTS keeps English behavior while indexing CJK text", async () => {
const store = await createTestStore();
const collectionName = await createTestCollection();
await insertTestDocument(store.db, collectionName, {
name: "english",
title: "Vector Search Notes",
body: "The quick brown fox explains vector search and BM25 ranking.",
displayPath: "english.md",
});
await insertTestDocument(store.db, collectionName, {
name: "zh",
title: "中文检索说明",
body: "这里介绍向量数据库和关键词检索。",
displayPath: "zh.md",
});
const foxResults = store.searchFTS("quick fox", 10);
expect(foxResults.map(r => r.displayPath)).toContain(`${collectionName}/english.md`);
expect(foxResults.map(r => r.displayPath)).not.toContain(`${collectionName}/zh.md`);
await cleanupTestDb(store);
});
test("searchFTS handles special characters in query", async () => {
const store = await createTestStore();
const collectionName = await createTestCollection();
@ -1988,6 +2054,38 @@ describe("Reciprocal Rank Fusion", () => {
expect(fused[0]!.file).toBe("doc1");
});
test("hybrid RRF weights boost original vector evidence over expansion-only hits", () => {
const originalFtsOnly = makeResult("original-fts-only.md", 0.95);
const expansionOnly = makeResult("lex-expansion-only.md", 0.95);
const originalVector = makeResult("original-vector.md", 0.95);
// Mirrors hybridQuery's common list order when a lex expansion exists:
// original FTS, lex expansion FTS, original vector.
const rankedLists = [
[originalFtsOnly],
[expansionOnly],
[originalVector],
];
const rankedListMeta: RankedListMeta[] = [
{ source: "fts", queryType: "original", query: "user query" },
{ source: "fts", queryType: "lex", query: "lex expansion" },
{ source: "vec", queryType: "original", query: "user query" },
];
const positionBasedWeights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
const buggyOrder = reciprocalRankFusion(rankedLists, positionBasedWeights);
expect(buggyOrder.findIndex(r => r.file === "lex-expansion-only.md"))
.toBeLessThan(buggyOrder.findIndex(r => r.file === "original-vector.md"));
const semanticWeights = getHybridRrfWeights(rankedListMeta);
const fixedOrder = reciprocalRankFusion(rankedLists, semanticWeights);
expect(semanticWeights).toEqual([2.0, 1.0, 2.0]);
expect(fixedOrder.findIndex(r => r.file === "original-vector.md"))
.toBeLessThan(fixedOrder.findIndex(r => r.file === "lex-expansion-only.md"));
});
test("RRF adds top-rank bonus", () => {
// doc1 is #1 in list1, doc2 is #2 in list1
const list1 = [makeResult("doc1", 0.9), makeResult("doc2", 0.8)];
@ -2020,6 +2118,65 @@ describe("Reciprocal Rank Fusion", () => {
});
});
// =============================================================================
// Reindex Collection Tests
// =============================================================================
describe("Reindex Collection", () => {
test("preserves document id and embeddings when file path changes only by case", async () => {
const store = await createTestStore();
const collectionName = "docs";
const collectionPath = join(testDir, `case-rename-${Date.now()}-${Math.random().toString(36).slice(2)}`);
await mkdir(collectionPath, { recursive: true });
const originalPath = join(collectionPath, "README.md");
const renamedPath = join(collectionPath, "readme.md");
const body = "# Case Rename\n\nContent that should keep the same embedding.";
await writeFile(originalPath, body);
const firstResult = await reindexCollection(store, collectionPath, "**/*.md", collectionName);
expect(firstResult.indexed).toBe(1);
const before = store.db.prepare(`
SELECT id, path, hash FROM documents
WHERE collection = ? AND active = 1
`).get(collectionName) as { id: number; path: string; hash: string };
expect(before.path).toBe("README.md");
store.db.prepare(`
INSERT INTO content_vectors (hash, seq, pos, model, embedded_at)
VALUES (?, 0, 0, 'test-model', ?)
`).run(before.hash, new Date().toISOString());
await rename(originalPath, renamedPath);
const secondResult = await reindexCollection(store, collectionPath, "**/*.md", collectionName);
expect(secondResult.indexed).toBe(0);
expect(secondResult.unchanged).toBe(1);
expect(secondResult.removed).toBe(0);
const afterRows = store.db.prepare(`
SELECT id, path, hash, active FROM documents
WHERE collection = ?
ORDER BY id
`).all(collectionName) as { id: number; path: string; hash: string; active: number }[];
expect(afterRows).toHaveLength(1);
expect(afterRows[0]).toMatchObject({ id: before.id, path: "readme.md", hash: before.hash, active: 1 });
const vectorCount = store.db.prepare(`
SELECT COUNT(*) AS count FROM content_vectors WHERE hash = ?
`).get(before.hash) as { count: number };
expect(vectorCount.count).toBe(1);
const ftsRows = store.db.prepare(`
SELECT rowid, filepath FROM documents_fts WHERE rowid = ?
`).all(before.id) as { rowid: number; filepath: string }[];
expect(ftsRows).toEqual([{ rowid: before.id, filepath: "docs/readme.md" }]);
await cleanupTestDb(store);
});
});
// =============================================================================
// Index Status Tests
// =============================================================================
@ -2256,6 +2413,33 @@ describe("Vector Table", () => {
await cleanupTestDb(store);
});
test("insertEmbedding is idempotent for an existing vec0 hash_seq (#598)", async () => {
const store = await createTestStore();
store.ensureVecTable(2);
const hash = "existinghashseq";
const first = new Float32Array([0.1, 0.2]);
const second = new Float32Array([0.3, 0.4]);
const now = new Date().toISOString();
store.db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash}_0`, first);
// Reproduces sqlite-vec's broken conflict handling: vec0 does not honor OR REPLACE.
expect(() => {
store.db.prepare(`INSERT OR REPLACE INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${hash}_0`, second);
}).toThrow(/UNIQUE constraint failed/i);
// QMD must therefore use DELETE + INSERT when upserting the vector row.
expect(() => store.insertEmbedding(hash, 0, 0, second, "test-model", now)).not.toThrow();
const vectorCount = store.db.prepare(`SELECT COUNT(*) AS count FROM vectors_vec WHERE hash_seq = ?`).get(`${hash}_0`) as { count: number };
const metadataCount = store.db.prepare(`SELECT COUNT(*) AS count FROM content_vectors WHERE hash = ? AND seq = 0`).get(hash) as { count: number };
expect(vectorCount.count).toBe(1);
expect(metadataCount.count).toBe(1);
await cleanupTestDb(store);
});
});
// =============================================================================
@ -2263,6 +2447,47 @@ describe("Vector Table", () => {
// =============================================================================
describe("Integration", () => {
test("reindexCollection soft-deletes removed files and preserves inactive content (#585)", async () => {
const store = await createTestStore();
const collectionDir = await mkdtemp(join(testDir, "orphan-regression-"));
const collectionName = "orphan-regression";
try {
for (let i = 1; i <= 5; i++) {
await writeFile(join(collectionDir, `doc-${i}.md`), `# Doc ${i}\n\nUnique body ${i}`);
}
await createTestCollection({ pwd: collectionDir, glob: "**/*.md", name: collectionName });
const initial = await reindexCollection(store, collectionDir, "**/*.md", collectionName);
expect(initial.indexed).toBe(5);
expect(initial.removed).toBe(0);
await rm(join(collectionDir, "doc-3.md"));
await rm(join(collectionDir, "doc-4.md"));
await rm(join(collectionDir, "doc-5.md"));
const afterDelete = await reindexCollection(store, collectionDir, "**/*.md", collectionName);
expect(afterDelete.removed).toBe(3);
const counts = store.db.prepare(`
SELECT
SUM(CASE WHEN active = 1 THEN 1 ELSE 0 END) AS active,
SUM(CASE WHEN active = 0 THEN 1 ELSE 0 END) AS inactive,
COUNT(*) AS total
FROM documents
WHERE collection = ?
`).get(collectionName) as { active: number; inactive: number; total: number };
const contentCount = store.db.prepare(`SELECT COUNT(*) AS count FROM content`).get() as { count: number };
expect(counts).toEqual({ active: 2, inactive: 3, total: 5 });
expect(contentCount.count).toBe(5);
} finally {
await rm(collectionDir, { recursive: true, force: true });
await cleanupTestDb(store);
}
});
test("full document lifecycle: create, search, retrieve", async () => {
const store = await createTestStore();
const collectionName = await createTestCollection({ pwd: "/test/notes", glob: "**/*.md" });
@ -2802,6 +3027,116 @@ describe("Embedding batching", () => {
}
});
test("generateEmbeddings uses the active llm embed model when no explicit model is passed", async () => {
const store = await createTestStore();
const db = store.db;
const fakeLlm = createFakeEmbedLlm();
const model = "hf:env/embed-model.gguf";
setDefaultLlamaCpp(createFakeTokenizer() as any);
store.llm = { ...fakeLlm, embedModelName: model } as any;
try {
await insertTestDocument(db, "docs", { name: "one", body: "# One\n\nAlpha" });
const result = await generateEmbeddings(store);
expect(result.chunksEmbedded).toBe(1);
expect(fakeLlm.embedCalls[0]?.options?.model).toBe(model);
expect(fakeLlm.embedBatchModelCalls).toEqual([{ model }]);
expect(db.prepare(`SELECT DISTINCT model FROM content_vectors`).all()).toEqual([{ model }]);
} finally {
setDefaultLlamaCpp(null);
await cleanupTestDb(store);
}
});
test("vectorSearchQuery uses the active llm embed model for vector lookups", async () => {
const store = await createTestStore();
const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
store.llm = { embedModelName: model } as any;
store.searchVec = searchVecSpy as any;
store.expandQuery = vi.fn(async () => []) as any;
try {
await vectorSearchQuery(store, "custom query", { limit: 7, minScore: 0 });
expect(searchVecSpy).toHaveBeenCalledTimes(1);
expect(searchVecSpy.mock.calls[0]?.[0]).toBe("custom query");
expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
expect(searchVecSpy.mock.calls[0]?.[2]).toBe(7);
} finally {
await cleanupTestDb(store);
}
});
test("hybridQuery uses the active llm embed model for precomputed vector lookups", async () => {
const store = await createTestStore();
const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
const embedBatchSpy = vi.fn(async (texts: string[]) => texts.map(() => ({
embedding: [1, 2, 3],
model,
})));
const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
store.llm = {
embedModelName: model,
embedBatch: embedBatchSpy,
} as any;
store.searchVec = searchVecSpy as any;
store.searchFTS = vi.fn(() => []) as any;
store.expandQuery = vi.fn(async () => []) as any;
try {
await hybridQuery(store, "hybrid query", { limit: 5, minScore: 0, skipRerank: true });
expect(embedBatchSpy).toHaveBeenCalledTimes(1);
expect(searchVecSpy).toHaveBeenCalledTimes(1);
expect(searchVecSpy.mock.calls[0]?.[0]).toBe("hybrid query");
expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
expect(searchVecSpy.mock.calls[0]?.[5]).toEqual([1, 2, 3]);
} finally {
await cleanupTestDb(store);
}
});
test("structuredSearch uses the active llm embed model for precomputed vector lookups", async () => {
const store = await createTestStore();
const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
const embedBatchSpy = vi.fn(async (texts: string[]) => texts.map(() => ({
embedding: [1, 2, 3],
model,
})));
const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
store.llm = {
embedModelName: model,
embedBatch: embedBatchSpy,
} as any;
store.searchVec = searchVecSpy as any;
try {
await structuredSearch(store, [{ type: "vec", query: "structured query" }], {
limit: 5,
minScore: 0,
skipRerank: true,
});
expect(embedBatchSpy).toHaveBeenCalledTimes(1);
expect(searchVecSpy).toHaveBeenCalledTimes(1);
expect(searchVecSpy.mock.calls[0]?.[0]).toBe("structured query");
expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
expect(searchVecSpy.mock.calls[0]?.[5]).toEqual([1, 2, 3]);
} finally {
await cleanupTestDb(store);
}
});
test("generateEmbeddings rejects invalid batch limits", async () => {
const store = await createTestStore();