Merge remote-tracking branch 'origin/main' into feat/local-qmd-index-bench

# Conflicts:
#	src/cli/qmd.ts
This commit is contained in:
Tobi Lütke 2026-05-16 18:27:49 +00:00
commit b2550d273a
No known key found for this signature in database
12 changed files with 859 additions and 70 deletions

View File

@ -4,6 +4,23 @@
### Fixes
- GPU: add `QMD_FORCE_CPU=1` / `--no-gpu` to bypass CUDA/Vulkan/Metal probing entirely, and route native llama.cpp stdout noise to stderr so JSON output stays parseable during search/query commands.
- Snippet line numbers: `qmd_query` (MCP), HTTP `/query`, and `qmd query`
(CLI JSON output and snippet headers) now return absolute source-file
line numbers instead of chunk-local ones, so the `line` field can be
passed back to `qmd_get` as `fromLine` without a separate lookup.
Snippet selection remains scoped to the best matching chunk
(preserves #149).
- CLI: `qmd query --full` now emits the full document body in all output
formats (json, csv, md, xml), restoring the documented behavior of the
flag. Previously it returned only the best matching chunk (~3.6KB max
per result). Output payload for `--full` queries is now proportional
to total document size.
- macOS Metal: `qmd query --json` now flushes successful JSON output and uses a safe immediate-exit path on Darwin to avoid ggml Metal finalizer aborts; other commands still dispose LLM contexts/models before the llama runtime. #368
- Embedding: require complete chunk coverage before treating a document as
embedded, remove partial vectors when chunk/session failures leave a
document incomplete, and keep `qmd status` pending counts honest after
interrupted long embed runs. #637 #378
- Embedding: `qmd embed -c <collection>` now scopes pending-doc selection
to the requested collection instead of embedding global pending work.
Scoped `--force` clears only collection-owned vectors, preserves shared
@ -33,6 +50,9 @@
- Packaging: install AST grammar WASM packages as required dependencies so
Bun global installs include TypeScript/TSX/JavaScript grammars, and add a
`smoke:package-grammars` verification command. #595
- Launcher: add wrapper smoke coverage for scoped package, npm/npx,
Homebrew/Linuxbrew, Bun global symlink layouts, and `$BUN_INSTALL`
false-positive runtime selection regressions. #351 #353 #354 #356 #358 #359
## [2.1.0] - 2026-04-05

View File

@ -798,6 +798,7 @@ llm_cache -- Cached LLM responses (query expansion, rerank scores)
|----------|---------|-------------|
| `XDG_CACHE_HOME` | `~/.cache` | Cache directory location |
| `QMD_LLAMA_GPU` | `auto` | Force llama.cpp GPU backend (`metal`, `vulkan`, `cuda`) or disable GPU with `false` |
| `QMD_FORCE_CPU` | unset | Set to `1`/`true` to force CPU mode before any CUDA/Vulkan/Metal probing. Equivalent CLI flag: `--no-gpu`. |
| `QMD_EMBED_PARALLELISM` | automatic | Override embedding/reranking context parallelism (1-8). Windows CUDA defaults to `1` because parallel CUDA contexts can crash with `ggml-cuda.cu:98`; use Vulkan or raise this only if your driver is stable. |
## How It Works

View File

@ -212,6 +212,76 @@ const cursor = {
show() { process.stderr.write('\x1b[?25h'); },
};
type CliLifecycleWritable = {
write(chunk: string | Uint8Array, callback?: (error?: Error | null) => void): boolean;
};
type FinishSuccessfulCliCommandOptions = {
command: string;
format?: OutputFormat;
cleanup?: () => Promise<void>;
exit?: (code: number) => void;
immediateExit?: (code: number) => void;
stdout?: CliLifecycleWritable;
stderr?: CliLifecycleWritable;
platform?: NodeJS.Platform;
};
async function flushWritable(stream: CliLifecycleWritable): Promise<void> {
await new Promise<void>((resolve) => {
stream.write("", () => resolve());
});
}
function shouldBypassNativeCleanup(options: FinishSuccessfulCliCommandOptions): boolean {
return (
(options.platform ?? process.platform) === "darwin" &&
options.command === "query" &&
options.format === "json" &&
process.env.QMD_DISABLE_DARWIN_QUERY_JSON_SAFE_EXIT !== "1"
);
}
function immediateProcessExit(code: number): void {
const processWithReallyExit = process as NodeJS.Process & { reallyExit?: (code?: number) => void };
if (typeof processWithReallyExit.reallyExit === "function") {
processWithReallyExit.reallyExit(code);
return;
}
process.exit(code);
}
/**
* Finish a successful CLI command after output has been flushed. On macOS JSON
* query runs, skip normal native teardown and use Node/Bun's immediate exit path:
* ggml Metal can abort from C++ finalizers after valid JSON has already been
* produced (#368). This wrapper is only reached after the command completed, so
* real query failures still exit through the normal error path before this runs.
*/
export async function finishSuccessfulCliCommand(options: FinishSuccessfulCliCommandOptions): Promise<void> {
const stderr = options.stderr ?? process.stderr;
const exit = options.exit ?? ((code: number) => process.exit(code));
const immediateExit = options.immediateExit ?? immediateProcessExit;
await flushWritable(options.stdout ?? process.stdout);
if (shouldBypassNativeCleanup(options)) {
await flushWritable(stderr);
immediateExit(0);
return;
}
try {
await (options.cleanup ?? disposeDefaultLlamaCpp)();
} catch (error) {
stderr.write(
`QMD Warning: cleanup after successful output failed (${error instanceof Error ? error.message : String(error)}); exiting 0 because command output completed.\n`
);
}
await flushWritable(stderr);
exit(0);
}
// Ensure cursor is restored on exit
process.on('SIGINT', () => { cursor.show(); process.exit(130); });
process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
@ -849,6 +919,7 @@ function getDocument(filename: string, fromLine?: number, maxLines?: number, lin
inputPath = inputPath.slice(0, -colonMatch[0].length);
}
}
if (fromLine !== undefined) fromLine = Math.max(1, fromLine);
const parsedIndexPath = isVirtualPath(inputPath) ? parseVirtualPath(inputPath) : null;
if (parsedIndexPath?.indexName) {
@ -1740,7 +1811,7 @@ async function vectorIndex(
}
// Check if there's work to do before starting
const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection);
const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection, model);
if (hashesToEmbed === 0 && !force) {
console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
closeDb();
@ -1930,6 +2001,7 @@ type OutputRow = {
score: number;
context?: string | null;
chunkPos?: number;
chunkLen?: number;
hash?: string;
docid?: string;
explain?: HybridQueryExplain;
@ -2012,9 +2084,9 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
// JSON output for LLM consumption
const output = filtered.map(row => {
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
const snippetInfo = extractSnippet(row.body, query, 300, row.chunkPos, row.chunkLen, opts.intent);
let body = opts.full ? row.body : undefined;
const snippetInfo = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos, undefined, opts.intent) : undefined;
let snippet = snippetInfo?.snippet;
let snippet = !opts.full ? snippetInfo.snippet : undefined;
if (opts.lineNumbers) {
if (body) body = addLineNumbers(body);
if (snippet) snippet = addLineNumbers(snippet);
@ -2023,7 +2095,7 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
...(docid && { docid: `#${docid}` }),
score: Math.round(row.score * 100) / 100,
file: toQmdPath(row.displayPath),
...(snippetInfo && { line: snippetInfo.line }),
line: snippetInfo.line,
title: row.title,
...(row.context && { context: row.context }),
...(body && { body }),
@ -2046,7 +2118,7 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
for (let i = 0; i < filtered.length; i++) {
const row = filtered[i];
if (!row) continue;
const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent);
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
// Line 1: filepath with docid
@ -2110,8 +2182,9 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
console.log();
// Snippet with highlighting (diff-style header included)
let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
const highlighted = highlightTerms(displaySnippet, query);
const content = opts.full ? row.body : snippet;
const displayContent = opts.lineNumbers ? addLineNumbers(content, opts.full ? 1 : line) : content;
const highlighted = highlightTerms(displayContent, query);
console.log(highlighted);
// Double empty line between results
@ -2123,7 +2196,7 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
if (!row) continue;
const heading = row.title || row.displayPath;
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent).snippet;
if (opts.lineNumbers) {
content = addLineNumbers(content);
}
@ -2136,7 +2209,7 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '&quot;')}"` : "";
const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '&quot;')}"` : "";
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent).snippet;
if (opts.lineNumbers) {
content = addLineNumbers(content);
}
@ -2146,10 +2219,10 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
// CSV format
console.log("docid,score,file,title,context,line,snippet");
for (const row of filtered) {
const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent);
let content = opts.full ? row.body : snippet;
if (opts.lineNumbers) {
content = addLineNumbers(content, line);
content = addLineNumbers(content, opts.full ? 1 : line);
}
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
const snippetText = content || "";
@ -2505,13 +2578,13 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
: query;
// Map to CLI output format — use bestChunk for snippet display
outputResults(results.map(r => ({
file: r.file,
displayPath: r.displayPath,
title: r.title,
body: r.bestChunk,
body: r.body,
chunkPos: r.bestChunkPos,
chunkLen: r.bestChunk.length,
score: r.score,
context: r.context,
docid: r.docid,
@ -2567,6 +2640,7 @@ function parseCLI() {
// Query options
"candidate-limit": { type: "string", short: "C" },
"no-rerank": { type: "boolean", default: false },
"no-gpu": { type: "boolean", default: false },
intent: { type: "string" },
// Chunking options
"chunk-strategy": { type: "string" }, // "regex" (default) or "auto" (AST for code files)
@ -2579,6 +2653,10 @@ function parseCLI() {
strict: false, // Allow unknown options to pass through
});
if (values["no-gpu"]) {
process.env.QMD_FORCE_CPU = "1";
}
// Select index name (default: "index"). If no explicit --index is supplied,
// a project-local .qmd/index.yaml overrides the global config/cache paths.
const indexName = values.index as string | undefined;
@ -2842,6 +2920,7 @@ function showHelp(): void {
console.log(" --full - Output full document instead of snippet");
console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
console.log(" --no-rerank - Skip LLM reranking (use RRF scores only, much faster on CPU)");
console.log(" --no-gpu - Force CPU mode for llama.cpp operations (same as QMD_FORCE_CPU=1)");
console.log(" --line-numbers - Include line numbers in output");
console.log(" --explain - Include retrieval score traces (query --json/CLI)");
console.log(" --files | --json | --csv | --md | --xml - Output format");
@ -3430,8 +3509,10 @@ if (isMain) {
}
if (cli.command !== "mcp") {
await disposeDefaultLlamaCpp();
process.exit(0);
await finishSuccessfulCliCommand({
command: cli.command,
format: cli.opts.format,
});
}
} // end if (main module)

View File

@ -22,10 +22,45 @@ type NodeLlamaCppModule = {
let nodeLlamaCppImport: Promise<NodeLlamaCppModule> | null = null;
async function loadNodeLlamaCpp(): Promise<NodeLlamaCppModule> {
nodeLlamaCppImport ??= import("node-llama-cpp") as Promise<NodeLlamaCppModule>;
nodeLlamaCppImport ??= withNativeStdoutRedirectedToStderr(
() => import("node-llama-cpp") as Promise<NodeLlamaCppModule>
);
return nodeLlamaCppImport;
}
export function setNodeLlamaCppModuleForTest(module: NodeLlamaCppModule | null): void {
nodeLlamaCppImport = module ? Promise.resolve(module) : null;
failedGpuInitModes.clear();
}
type StdoutWrite = typeof process.stdout.write;
let nativeStdoutRedirectDepth = 0;
let originalStdoutWrite: StdoutWrite | null = null;
/**
* Some node-llama-cpp native build/probe paths write library noise to stdout.
* JSON APIs must reserve stdout for machine-readable payloads, so route that
* noise to stderr while native llama initialization is in progress.
*/
export async function withNativeStdoutRedirectedToStderr<T>(fn: () => Promise<T>): Promise<T> {
if (nativeStdoutRedirectDepth === 0) {
originalStdoutWrite = process.stdout.write.bind(process.stdout) as StdoutWrite;
process.stdout.write = ((chunk: any, encoding?: any, cb?: any) => {
return process.stderr.write(chunk, encoding, cb as any);
}) as StdoutWrite;
}
nativeStdoutRedirectDepth++;
try {
return await fn();
} finally {
nativeStdoutRedirectDepth--;
if (nativeStdoutRedirectDepth === 0 && originalStdoutWrite) {
process.stdout.write = originalStdoutWrite;
originalStdoutWrite = null;
}
}
}
import { homedir } from "os";
import { join } from "path";
import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync, writeFileSync, openSync, readSync, closeSync } from "fs";
@ -487,7 +522,15 @@ export function resolveSafeParallelism(options: ParallelismOptions): number {
return Math.max(1, options.computed);
}
export function resolveLlamaGpuMode(envValue = process.env.QMD_LLAMA_GPU): LlamaGpuMode {
export function resolveLlamaGpuMode(
envValue = process.env.QMD_LLAMA_GPU,
forceCpuValue = process.env.QMD_FORCE_CPU
): LlamaGpuMode {
const forceCpu = forceCpuValue?.trim().toLowerCase() ?? "";
if (forceCpu && !["false", "off", "none", "disable", "disabled", "0"].includes(forceCpu)) {
return false;
}
const normalized = envValue?.trim().toLowerCase() ?? "";
if (!normalized) return "auto";
if (["false", "off", "none", "disable", "disabled", "0"].includes(normalized)) return false;
@ -497,6 +540,23 @@ export function resolveLlamaGpuMode(envValue = process.env.QMD_LLAMA_GPU): Llama
return "auto";
}
async function disposeWithTimeout(resourceName: string, dispose: () => Promise<void>, timeoutMs = 1000): Promise<void> {
const timeoutPromise = new Promise<"timeout">((resolve) => {
setTimeout(() => resolve("timeout"), timeoutMs).unref();
});
try {
const result = await Promise.race([dispose(), timeoutPromise]);
if (result === "timeout") {
process.stderr.write(`QMD Warning: timed out disposing ${resourceName}; continuing shutdown.\n`);
}
} catch (error) {
process.stderr.write(
`QMD Warning: failed to dispose ${resourceName} (${error instanceof Error ? error.message : String(error)}); continuing shutdown.\n`
);
}
}
function resolveExpandContextSize(configValue?: number): number {
if (configValue !== undefined) {
if (!Number.isInteger(configValue) || configValue <= 0) {
@ -518,6 +578,8 @@ function resolveExpandContextSize(configValue?: number): number {
return parsed;
}
const failedGpuInitModes = new Set<LlamaGpuMode>();
export class LlamaCpp implements LLM {
private readonly _ciMode = !!process.env.CI;
private llama: Llama | null = null;
@ -668,22 +730,29 @@ export class LlamaCpp implements LLM {
const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
const loadLlama = async (gpu: LlamaGpuMode) =>
await getLlama({
await withNativeStdoutRedirectedToStderr(() => getLlama({
build: allowBuild ? "autoAttempt" : "never",
logLevel: LlamaLogLevel.error,
gpu,
skipDownload: !allowBuild,
});
}));
let llama: Llama;
if (gpuMode === false) {
if (gpuMode === false || failedGpuInitModes.has(gpuMode)) {
if (gpuMode !== false && failedGpuInitModes.has(gpuMode)) {
process.stderr.write(
`QMD Warning: skipping previously failed GPU init${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`}, using CPU.\n`
);
}
llama = await loadLlama(false);
} else {
try {
llama = await loadLlama(gpuMode);
} catch (err) {
// GPU backend (e.g. Vulkan on headless/driverless machines) can throw at init.
// Fall back to CPU so qmd still works.
// GPU backend (e.g. Vulkan/CUDA on headless/driverless machines) can throw at init.
// Fall back to CPU so qmd still works, and cache the failure to avoid repeated
// expensive native build/probe attempts in this process.
failedGpuInitModes.add(gpuMode);
process.stderr.write(
`QMD Warning: GPU init failed${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`} (${err instanceof Error ? err.message : String(err)}), falling back to CPU.\n`
);
@ -1413,22 +1482,37 @@ export class LlamaCpp implements LLM {
this.inactivityTimer = null;
}
// Disposing llama cascades to models and contexts automatically
// See: https://node-llama-cpp.withcat.ai/guide/objects-lifecycle
// Note: llama.dispose() can hang indefinitely, so we use a timeout
if (this.llama) {
const disposePromise = this.llama.dispose();
const timeoutPromise = new Promise<void>((resolve) => setTimeout(resolve, 1000));
await Promise.race([disposePromise, timeoutPromise]);
// Explicitly dispose in dependency order: contexts first, then models, then llama.
// Relying only on llama.dispose() leaves Metal resource sets alive until process
// finalization on Apple Silicon, where ggml_metal_device_free can abort after
// otherwise-successful CLI output (#368).
for (const ctx of this.embedContexts) {
await disposeWithTimeout("embedding context", () => ctx.dispose());
}
this.embedContexts = [];
for (const ctx of this.rerankContexts) {
await disposeWithTimeout("rerank context", () => ctx.dispose());
}
this.rerankContexts = [];
if (this.embedModel) {
await disposeWithTimeout("embedding model", () => this.embedModel!.dispose());
this.embedModel = null;
}
if (this.generateModel) {
await disposeWithTimeout("generation model", () => this.generateModel!.dispose());
this.generateModel = null;
}
if (this.rerankModel) {
await disposeWithTimeout("rerank model", () => this.rerankModel!.dispose());
this.rerankModel = null;
}
// Clear references
this.embedContexts = [];
this.rerankContexts = [];
this.embedModel = null;
this.generateModel = null;
this.rerankModel = null;
this.llama = null;
if (this.llama) {
await disposeWithTimeout("llama runtime", () => this.llama!.dispose());
this.llama = null;
}
// Clear any in-flight load/create promises
this.embedModelLoadPromise = null;

View File

@ -42,6 +42,7 @@ type SearchResultItem = {
title: string;
score: number;
context: string | null;
line: number; // Absolute line in source markdown
snippet: string;
};
@ -239,6 +240,8 @@ async function createMcpServer(store: QMDStore): Promise<McpServer> {
title: "Query",
description: `Search the knowledge base using a query document — one or more typed sub-queries combined for best recall.
Each result includes a \`line\` field with the absolute 1-indexed line of the best match in the source markdown. To read more context around a hit, call \`get(file, fromLine = max(1, line - 20), maxLines = 80, lineNumbers = true)\`.
## Query Types
**lex** BM25 keyword search. Fast, exact, no LLM needed.
@ -339,13 +342,14 @@ Intent-aware lex (C++ performance, not sports):
|| searches[0]?.query || "";
const filtered: SearchResultItem[] = results.map(r => {
const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300, undefined, undefined, intent);
const { line, snippet } = extractSnippet(r.body, primaryQuery, 300, r.bestChunkPos, r.bestChunk.length, intent);
return {
docid: `#${r.docid}`,
file: r.displayPath,
title: r.title,
score: Math.round(r.score * 100) / 100,
context: r.context,
line,
snippet: addLineNumbers(snippet, line),
};
});
@ -383,6 +387,7 @@ Intent-aware lex (C++ performance, not sports):
parsedFromLine = parseInt(colonMatch[1], 10);
lookup = lookup.slice(0, -colonMatch[0].length);
}
if (parsedFromLine !== undefined) parsedFromLine = Math.max(1, parsedFromLine);
const result = await store.get(lookup, { includeBody: false });
@ -701,13 +706,14 @@ export async function startMcpHttpServer(
|| params.searches[0]?.query || "";
const formatted = results.map(r => {
const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
const { line, snippet } = extractSnippet(r.body, primaryQuery, 300, r.bestChunkPos, r.bestChunk.length, params.intent);
return {
docid: `#${r.docid}`,
file: r.displayPath,
title: r.title,
score: Math.round(r.score * 100) / 100,
context: r.context,
line,
snippet: addLineNumbers(snippet, line),
};
});

View File

@ -871,10 +871,15 @@ function initializeDatabase(db: Database): void {
seq INTEGER NOT NULL DEFAULT 0,
pos INTEGER NOT NULL DEFAULT 0,
model TEXT NOT NULL,
total_chunks INTEGER NOT NULL DEFAULT 1,
embedded_at TEXT NOT NULL,
PRIMARY KEY (hash, seq)
)
`);
const cvInfoAfterCreate = db.prepare(`PRAGMA table_info(content_vectors)`).all() as { name: string }[];
if (!cvInfoAfterCreate.some(col => col.name === 'total_chunks')) {
db.exec(`ALTER TABLE content_vectors ADD COLUMN total_chunks INTEGER NOT NULL DEFAULT 1`);
}
// Store collections — makes the DB self-contained (no external config needed)
db.exec(`
@ -1167,9 +1172,9 @@ export type Store = {
ensureVecTable: (dimensions: number) => void;
// Index health
getHashesNeedingEmbedding: () => number;
getIndexHealth: () => IndexHealthInfo;
getStatus: () => IndexStatus;
getHashesNeedingEmbedding: (model?: string) => number;
getIndexHealth: (model?: string) => IndexHealthInfo;
getStatus: (model?: string) => IndexStatus;
// Caching
getCacheKey: typeof getCacheKey;
@ -1229,7 +1234,7 @@ export type Store = {
// Vector/embedding operations
getHashesForEmbedding: () => { hash: string; body: string; path: string }[];
clearAllEmbeddings: () => void;
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string) => void;
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string, totalChunks?: number) => void;
};
// =============================================================================
@ -1420,18 +1425,31 @@ function resolveEmbedOptions(options?: EmbedOptions): Required<Pick<EmbedOptions
};
}
function getPendingEmbeddingDocs(db: Database, collection?: string): PendingEmbeddingDoc[] {
function contentVectorExpectedChunksExpr(db: Database): string {
const columns = db.prepare(`PRAGMA table_info(content_vectors)`).all() as { name: string }[];
return columns.some(col => col.name === 'total_chunks') ? 'MAX(total_chunks)' : '1';
}
function getPendingEmbeddingDocs(db: Database, collection?: string, model: string = DEFAULT_EMBED_MODEL): PendingEmbeddingDoc[] {
const collectionFilter = collection ? `AND d.collection = ?` : ``;
const expectedChunksExpr = contentVectorExpectedChunksExpr(db);
const stmt = db.prepare(`
SELECT d.hash, MIN(d.path) as path, length(CAST(c.doc AS BLOB)) as bytes
FROM documents d
JOIN content c ON d.hash = c.hash
LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
WHERE d.active = 1 AND v.hash IS NULL ${collectionFilter}
LEFT JOIN (
SELECT hash, model, COUNT(*) AS chunk_count, ${expectedChunksExpr} AS expected_chunks
FROM content_vectors
WHERE model = ?
GROUP BY hash, model
) v ON d.hash = v.hash
WHERE d.active = 1
AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
${collectionFilter}
GROUP BY d.hash
ORDER BY MIN(d.path)
`);
return (collection ? stmt.all(collection) : stmt.all()) as PendingEmbeddingDoc[];
return (collection ? stmt.all(model, collection) : stmt.all(model)) as PendingEmbeddingDoc[];
}
function buildEmbeddingBatches(
@ -1502,7 +1520,7 @@ export async function generateEmbeddings(
clearAllEmbeddings(db, options?.collection);
}
const docsToEmbed = getPendingEmbeddingDocs(db, options?.collection);
const docsToEmbed = getPendingEmbeddingDocs(db, options?.collection, model);
if (docsToEmbed.length === 0) {
return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
@ -1533,6 +1551,7 @@ export async function generateEmbeddings(
const batchDocs = getEmbeddingDocsForBatch(db, batchMeta);
const batchChunks: ChunkItem[] = [];
const expectedChunksByHash = new Map<string, number>();
const batchBytes = batchMeta.reduce((sum, doc) => sum + Math.max(0, doc.bytes), 0);
for (const doc of batchDocs) {
@ -1558,6 +1577,7 @@ export async function generateEmbeddings(
bytes: encoder.encode(chunks[seq]!.text).length,
});
}
expectedChunksByHash.set(doc.hash, chunks.length);
}
totalChunks += batchChunks.length;
@ -1610,7 +1630,7 @@ export async function generateEmbeddings(
const chunk = chunkBatch[i]!;
const embedding = embeddings[i];
if (embedding) {
insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now, expectedChunksByHash.get(chunk.hash) ?? 1);
chunksEmbedded++;
} else {
errors++;
@ -1629,7 +1649,7 @@ export async function generateEmbeddings(
const text = formatDocForEmbedding(chunk.text, chunk.title, embedModelUri);
const result = await session.embed(text, { model });
if (result) {
insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now, expectedChunksByHash.get(chunk.hash) ?? 1);
chunksEmbedded++;
} else {
errors++;
@ -1654,6 +1674,11 @@ export async function generateEmbeddings(
});
}
const removedPartialChunks = removeIncompleteEmbeddings(db, expectedChunksByHash, model);
if (removedPartialChunks > 0) {
chunksEmbedded = Math.max(0, chunksEmbedded - removedPartialChunks);
}
bytesProcessed += batchBytes;
options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors });
}
@ -1688,9 +1713,9 @@ export function createStore(dbPath?: string): Store {
ensureVecTable: (dimensions: number) => ensureVecTableInternal(db, dimensions),
// Index health
getHashesNeedingEmbedding: () => getHashesNeedingEmbedding(db),
getIndexHealth: () => getIndexHealth(db),
getStatus: () => getStatus(db),
getHashesNeedingEmbedding: (model?: string) => getHashesNeedingEmbedding(db, undefined, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
getIndexHealth: (model?: string) => getIndexHealth(db, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
getStatus: (model?: string) => getStatus(db, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
// Caching
getCacheKey,
@ -1750,7 +1775,7 @@ export function createStore(dbPath?: string): Store {
// Vector/embedding operations
getHashesForEmbedding: () => getHashesForEmbedding(db),
clearAllEmbeddings: () => clearAllEmbeddings(db),
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string) => insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt),
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string, totalChunks?: number) => insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt, totalChunks),
};
return store;
@ -1949,15 +1974,23 @@ export type IndexStatus = {
// Index health
// =============================================================================
export function getHashesNeedingEmbedding(db: Database, collection?: string): number {
export function getHashesNeedingEmbedding(db: Database, collection?: string, model: string = DEFAULT_EMBED_MODEL): number {
const collectionFilter = collection ? `AND d.collection = ?` : ``;
const expectedChunksExpr = contentVectorExpectedChunksExpr(db);
const stmt = db.prepare(`
SELECT COUNT(DISTINCT d.hash) as count
FROM documents d
LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
WHERE d.active = 1 AND v.hash IS NULL ${collectionFilter}
LEFT JOIN (
SELECT hash, model, COUNT(*) AS chunk_count, ${expectedChunksExpr} AS expected_chunks
FROM content_vectors
WHERE model = ?
GROUP BY hash, model
) v ON d.hash = v.hash
WHERE d.active = 1
AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
${collectionFilter}
`);
const result = (collection ? stmt.get(collection) : stmt.get()) as { count: number };
const result = (collection ? stmt.get(model, collection) : stmt.get(model)) as { count: number };
return result.count;
}
@ -1967,8 +2000,8 @@ export type IndexHealthInfo = {
daysStale: number | null;
};
export function getIndexHealth(db: Database): IndexHealthInfo {
const needsEmbedding = getHashesNeedingEmbedding(db);
export function getIndexHealth(db: Database, model: string = DEFAULT_EMBED_MODEL): IndexHealthInfo {
const needsEmbedding = getHashesNeedingEmbedding(db, undefined, model);
const totalDocs = (db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get() as { count: number }).count;
const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null };
@ -3316,15 +3349,22 @@ async function getEmbedding(text: string, model: string, isQuery: boolean, sessi
* Get all unique content hashes that need embeddings (from active documents).
* Returns hash, document body, and a sample path for display purposes.
*/
export function getHashesForEmbedding(db: Database): { hash: string; body: string; path: string }[] {
export function getHashesForEmbedding(db: Database, model: string = DEFAULT_EMBED_MODEL): { hash: string; body: string; path: string }[] {
const expectedChunksExpr = contentVectorExpectedChunksExpr(db);
return db.prepare(`
SELECT d.hash, c.doc as body, MIN(d.path) as path
FROM documents d
JOIN content c ON d.hash = c.hash
LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
WHERE d.active = 1 AND v.hash IS NULL
LEFT JOIN (
SELECT hash, model, COUNT(*) AS chunk_count, ${expectedChunksExpr} AS expected_chunks
FROM content_vectors
WHERE model = ?
GROUP BY hash, model
) v ON d.hash = v.hash
WHERE d.active = 1
AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
GROUP BY d.hash
`).all() as { hash: string; body: string; path: string }[];
`).all(model) as { hash: string; body: string; path: string }[];
}
/**
@ -3409,13 +3449,14 @@ export function insertEmbedding(
pos: number,
embedding: Float32Array,
model: string,
embeddedAt: string
embeddedAt: string,
totalChunks: number = 1
): void {
const hashSeq = `${hash}_${seq}`;
// Insert content_vectors first — crash-safe ordering (see getHashesForEmbedding)
const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, ?, ?, ?, ?)`);
insertContentVectorStmt.run(hash, seq, pos, model, embeddedAt);
const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, total_chunks, embedded_at) VALUES (?, ?, ?, ?, ?, ?)`);
insertContentVectorStmt.run(hash, seq, pos, model, totalChunks, embeddedAt);
// vec0 virtual tables don't support OR REPLACE — use DELETE + INSERT
const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
@ -3424,6 +3465,26 @@ export function insertEmbedding(
insertVecStmt.run(hashSeq, embedding);
}
function removeIncompleteEmbeddings(db: Database, expectedChunksByHash: Map<string, number>, model: string): number {
let removed = 0;
const rowsStmt = db.prepare(`SELECT seq FROM content_vectors WHERE hash = ? AND model = ?`);
const deleteContentStmt = db.prepare(`DELETE FROM content_vectors WHERE hash = ? AND model = ?`);
const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
for (const [hash, expectedChunks] of expectedChunksByHash) {
const rows = rowsStmt.all(hash, model) as { seq: number }[];
if (rows.length === 0 || rows.length === expectedChunks) continue;
for (const row of rows) {
deleteVecStmt.run(`${hash}_${row.seq}`);
}
deleteContentStmt.run(hash, model);
removed += rows.length;
}
return removed;
}
// =============================================================================
// Query expansion
// =============================================================================
@ -3800,7 +3861,7 @@ export function getDocumentBody(db: Database, doc: DocumentResult | { filepath:
let body = row.body;
if (fromLine !== undefined || maxLines !== undefined) {
const lines = body.split('\n');
const start = (fromLine || 1) - 1;
const start = Math.max(0, (fromLine || 1) - 1);
const end = maxLines !== undefined ? start + maxLines : lines.length;
body = lines.slice(start, end).join('\n');
}
@ -3922,7 +3983,7 @@ export function findDocuments(
// Status
// =============================================================================
export function getStatus(db: Database): IndexStatus {
export function getStatus(db: Database, model: string = DEFAULT_EMBED_MODEL): IndexStatus {
// DB is source of truth for collections — config provides supplementary metadata
const dbCollections = db.prepare(`
SELECT
@ -3957,7 +4018,7 @@ export function getStatus(db: Database): IndexStatus {
});
const totalDocs = (db.prepare(`SELECT COUNT(*) as c FROM documents WHERE active = 1`).get() as { c: number }).c;
const needsEmbedding = getHashesNeedingEmbedding(db);
const needsEmbedding = getHashesNeedingEmbedding(db, undefined, model);
const hasVectors = !!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
return {
@ -4023,7 +4084,7 @@ export function extractSnippet(body: string, query: string, maxLen = 500, chunkP
let searchBody = body;
let lineOffset = 0;
if (chunkPos && chunkPos > 0) {
if (chunkPos !== undefined && chunkPos >= 0) {
// Search within the chunk region, with some padding for context
// Use provided chunkLen or fall back to max chunk size (covers variable-length chunks)
const searchLen = chunkLen || CHUNK_SIZE_CHARS;
@ -4055,6 +4116,23 @@ export function extractSnippet(body: string, query: string, maxLen = 500, chunkP
}
}
if (chunkPos !== undefined && chunkPos >= 0 && bestScore <= 0) {
if (chunkPos === 0) {
// chunkPos=0 may be the chunk selector's initialization default for queries
// where lexical chunk scoring found no winner (e.g. tokens filtered to empty
// by the length>2 guard). Retry with full body so the real match isn't missed.
return extractSnippet(body, query, maxLen, undefined, undefined, intent);
}
// For chunkPos > 0 the reranker actively picked this chunk. Tokens failing to
// match literally is most likely a tokenizer limitation (quoted phrases, FTS5
// syntax, HYDE passages, semantic hits), so anchor on the chunk start rather
// than disregarding the reranker's pick.
const contextStart = Math.max(0, chunkPos - 100);
bestLine = chunkPos > contextStart
? searchBody.slice(0, chunkPos - contextStart).split('\n').length - 1
: 0;
}
const start = Math.max(0, bestLine - 1);
const end = Math.min(lines.length, bestLine + 3);
const snippetLines = lines.slice(start, end);

164
test/bin-wrapper.test.ts Normal file
View File

@ -0,0 +1,164 @@
import { afterEach, describe, expect, test } from "vitest";
import { chmodSync, copyFileSync, mkdtempSync, mkdirSync, readFileSync, realpathSync, rmSync, symlinkSync, writeFileSync } from "node:fs";
import { tmpdir } from "node:os";
import { dirname, join, relative } from "node:path";
import { execFileSync } from "node:child_process";
import { fileURLToPath } from "node:url";
const repoRoot = fileURLToPath(new URL("..", import.meta.url));
const fixtures: string[] = [];
function makeTempFixture() {
const root = mkdtempSync(join(tmpdir(), "qmd-bin-wrapper-"));
fixtures.push(root);
const capturePath = join(root, "capture.txt");
const runtimeBin = join(root, "runtime-bin");
mkdirSync(runtimeBin, { recursive: true });
for (const runtime of ["node", "bun"]) {
const runtimePath = join(runtimeBin, runtime);
writeFileSync(
runtimePath,
`#!/bin/sh\n{\n printf '%s\\n' '${runtime}'\n printf '%s\\n' "$1"\n shift\n printf '%s\\n' "$@"\n} > "$QMD_WRAPPER_CAPTURE"\n`,
);
chmodSync(runtimePath, 0o755);
}
return { root, capturePath, runtimeBin };
}
function makePackage(root: string, packagePath: string, lockfiles: string[] = []) {
const packageRoot = join(root, packagePath);
mkdirSync(join(packageRoot, "bin"), { recursive: true });
mkdirSync(join(packageRoot, "dist", "cli"), { recursive: true });
copyFileSync(join(repoRoot, "bin", "qmd"), join(packageRoot, "bin", "qmd"));
chmodSync(join(packageRoot, "bin", "qmd"), 0o755);
writeFileSync(join(packageRoot, "dist", "cli", "qmd.js"), "// fixture\n");
for (const lockfile of lockfiles) {
writeFileSync(join(packageRoot, lockfile), "");
}
return packageRoot;
}
function symlinkRelative(target: string, linkPath: string) {
mkdirSync(dirname(linkPath), { recursive: true });
symlinkSync(relative(dirname(linkPath), target), linkPath);
}
function runWrapper(commandPath: string, runtimeBin: string, capturePath: string, env: Record<string, string> = {}) {
rmSync(capturePath, { force: true });
execFileSync(commandPath, ["--version"], {
env: {
...process.env,
...env,
PATH: `${runtimeBin}:${process.env.PATH ?? ""}`,
QMD_WRAPPER_CAPTURE: capturePath,
},
stdio: ["ignore", "pipe", "pipe"],
});
const [runtime, scriptPath, ...args] = readFileSync(capturePath, "utf8").trimEnd().split("\n");
return { runtime, scriptPath, args };
}
afterEach(() => {
for (const fixture of fixtures.splice(0)) {
rmSync(fixture, { recursive: true, force: true });
}
});
describe("bin/qmd package wrapper", () => {
test("direct package invocation resolves dist/cli/qmd.js from the package root", () => {
const { root, runtimeBin, capturePath } = makeTempFixture();
const packageRoot = makePackage(root, "node_modules/@tobilu/qmd");
const result = runWrapper(join(packageRoot, "bin", "qmd"), runtimeBin, capturePath);
expect(result.runtime).toBe("node");
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
expect(result.args).toEqual(["--version"]);
});
test("npm/Homebrew global bin symlink resolves scoped package path", () => {
const { root, runtimeBin, capturePath } = makeTempFixture();
const packageRoot = makePackage(root, "opt/homebrew/lib/node_modules/@tobilu/qmd");
const globalBin = join(root, "opt", "homebrew", "bin", "qmd");
symlinkRelative(join(packageRoot, "bin", "qmd"), globalBin);
const result = runWrapper(globalBin, runtimeBin, capturePath);
expect(result.runtime).toBe("node");
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
});
test("multi-hop global bin symlink chain resolves to the real package root", () => {
const { root, runtimeBin, capturePath } = makeTempFixture();
const packageRoot = makePackage(root, "opt/homebrew/lib/node_modules/@tobilu/qmd");
const globalBin = join(root, "opt", "homebrew", "bin", "qmd");
const shim = join(root, "opt", "homebrew", "Cellar", "qmd", "current", "bin", "qmd");
symlinkRelative(join(packageRoot, "bin", "qmd"), shim);
symlinkRelative(shim, globalBin);
const result = runWrapper(globalBin, runtimeBin, capturePath);
expect(result.runtime).toBe("node");
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
});
test("linuxbrew global bin symlink resolves lib/node_modules scoped package path", () => {
const { root, runtimeBin, capturePath } = makeTempFixture();
const packageRoot = makePackage(root, "home/linuxbrew/.linuxbrew/lib/node_modules/@tobilu/qmd");
const globalBin = join(root, "home", "linuxbrew", ".linuxbrew", "bin", "qmd");
symlinkRelative(join(packageRoot, "bin", "qmd"), globalBin);
const result = runWrapper(globalBin, runtimeBin, capturePath);
expect(result.runtime).toBe("node");
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
});
test("npx scoped package .bin symlink resolves @tobilu/qmd package path", () => {
const { root, runtimeBin, capturePath } = makeTempFixture();
const packageRoot = makePackage(root, "npm/_npx/abc123/node_modules/@tobilu/qmd");
const npxBin = join(root, "npm", "_npx", "abc123", "node_modules", ".bin", "qmd");
symlinkRelative(join(packageRoot, "bin", "qmd"), npxBin);
const result = runWrapper(npxBin, runtimeBin, capturePath);
expect(result.runtime).toBe("node");
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
});
test("bun global symlink uses bun when package-local bun lockfile exists", () => {
const { root, runtimeBin, capturePath } = makeTempFixture();
const packageRoot = makePackage(root, "home/user/.bun/install/global/node_modules/@tobilu/qmd", ["bun.lock"]);
const bunBin = join(root, "home", "user", ".bun", "bin", "qmd");
symlinkRelative(join(packageRoot, "bin", "qmd"), bunBin);
const result = runWrapper(bunBin, runtimeBin, capturePath);
expect(result.runtime).toBe("bun");
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
});
test("ambient BUN_INSTALL alone does not select bun for an npm-installed package", () => {
const { root, runtimeBin, capturePath } = makeTempFixture();
const packageRoot = makePackage(root, "opt/homebrew/lib/node_modules/@tobilu/qmd");
const globalBin = join(root, "opt", "homebrew", "bin", "qmd");
symlinkRelative(join(packageRoot, "bin", "qmd"), globalBin);
const result = runWrapper(globalBin, runtimeBin, capturePath, { BUN_INSTALL: join(root, ".bun") });
expect(result.runtime).toBe("node");
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
});
test("package-lock.json takes priority over bun lockfiles", () => {
const { root, runtimeBin, capturePath } = makeTempFixture();
const packageRoot = makePackage(root, "node_modules/@tobilu/qmd", ["package-lock.json", "bun.lock"]);
const result = runWrapper(join(packageRoot, "bin", "qmd"), runtimeBin, capturePath);
expect(result.runtime).toBe("node");
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
});
});

View File

@ -0,0 +1,82 @@
import { describe, expect, test } from "vitest";
import { finishSuccessfulCliCommand } from "../src/cli/qmd.ts";
import { LlamaCpp } from "../src/llm.ts";
describe("CLI successful-exit lifecycle", () => {
test("exits 0 after successful JSON output when post-output LLM cleanup fails", async () => {
const exitCodes: number[] = [];
const stderr: string[] = [];
const flushed: string[] = [];
await finishSuccessfulCliCommand({
command: "query",
format: "json",
platform: "linux",
cleanup: async () => {
throw new Error("ggml_metal_device_free abort simulation");
},
exit: (code) => {
exitCodes.push(code);
},
stdout: { write: (chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { flushed.push(String(chunk)); cb?.(); return true; } },
stderr: { write: (chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { stderr.push(String(chunk)); cb?.(); return true; } },
});
expect(exitCodes).toEqual([0]);
expect(stderr.join("")).toContain("QMD Warning: cleanup after successful output failed");
expect(flushed).toEqual([""]);
});
test("uses immediate exit for successful macOS JSON query after stdout flush", async () => {
const calls: string[] = [];
await finishSuccessfulCliCommand({
command: "query",
format: "json",
platform: "darwin",
cleanup: async () => {
calls.push("cleanup");
},
exit: (code) => {
calls.push(`exit:${code}`);
},
immediateExit: (code) => {
calls.push(`immediate-exit:${code}`);
},
stdout: { write: (_chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { calls.push("stdout-flush"); cb?.(); return true; } },
stderr: { write: (_chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { calls.push("stderr-flush"); cb?.(); return true; } },
});
expect(calls).toEqual(["stdout-flush", "stderr-flush", "immediate-exit:0"]);
});
test("disposes Llama resources in dependency order before CLI exit", async () => {
const calls: string[] = [];
const llm = new LlamaCpp({ inactivityTimeoutMs: 0 });
const disposable = (name: string) => ({
dispose: async () => {
calls.push(name);
},
});
Object.assign(llm as unknown as Record<string, unknown>, {
embedContexts: [disposable("embed-context")],
rerankContexts: [disposable("rerank-context")],
embedModel: disposable("embed-model"),
generateModel: disposable("generate-model"),
rerankModel: disposable("rerank-model"),
llama: disposable("llama"),
});
await llm.dispose();
expect(calls).toEqual([
"embed-context",
"rerank-context",
"embed-model",
"generate-model",
"rerank-model",
"llama",
]);
});
});

View File

@ -233,6 +233,7 @@ describe("CLI Help", () => {
expect(stdout).toContain("Usage:");
expect(stdout).toContain("qmd collection add");
expect(stdout).toContain("qmd search");
expect(stdout).toContain("--no-gpu");
expect(stdout).toContain("qmd skill show/install");
});
@ -507,6 +508,16 @@ describe("CLI Search Command", () => {
// Error message goes to stderr
expect(stderr).toContain("Usage:");
});
test("--json --full includes line field for round-tripping to qmd get", async () => {
const { stdout, exitCode } = await runQmd(["search", "meeting", "--json", "--full", "-n", "1"]);
expect(exitCode).toBe(0);
const results = JSON.parse(stdout);
expect(results.length).toBeGreaterThan(0);
expect(results[0].line).toBeTypeOf("number");
expect(results[0].line).toBeGreaterThan(0);
expect(results[0].body).toBeTypeOf("string");
});
});
describe("CLI Get Command", () => {
@ -532,6 +543,13 @@ describe("CLI Get Command", () => {
// Should indicate file not found
expect(exitCode).toBe(1);
});
test("clamps negative --from to top of file (no silent tail content)", async () => {
const baseline = await runQmd(["get", "README.md"]);
const negative = await runQmd(["get", "README.md", "--from", "-19"]);
expect(negative.exitCode).toBe(0);
expect(negative.stdout).toBe(baseline.stdout);
});
});
describe("CLI Multi-Get Command", () => {

View File

@ -13,6 +13,8 @@ import {
getDefaultLlamaCpp,
disposeDefaultLlamaCpp,
resolveLlamaGpuMode,
setNodeLlamaCppModuleForTest,
withNativeStdoutRedirectedToStderr,
resolveParallelismOverride,
resolveSafeParallelism,
withLLMSession,
@ -78,6 +80,29 @@ describe("QMD_LLAMA_GPU resolution", () => {
expect(resolveLlamaGpuMode(" cuda ")).toBe("cuda");
});
test("QMD_FORCE_CPU disables GPU before QMD_LLAMA_GPU auto-detection", () => {
const prevForceCpu = process.env.QMD_FORCE_CPU;
process.env.QMD_FORCE_CPU = "1";
try {
expect(resolveLlamaGpuMode(undefined)).toBe(false);
expect(resolveLlamaGpuMode("cuda")).toBe(false);
} finally {
if (prevForceCpu === undefined) delete process.env.QMD_FORCE_CPU;
else process.env.QMD_FORCE_CPU = prevForceCpu;
}
});
test("QMD_FORCE_CPU ignores false-ish values", () => {
const prevForceCpu = process.env.QMD_FORCE_CPU;
process.env.QMD_FORCE_CPU = "0";
try {
expect(resolveLlamaGpuMode(undefined)).toBe("auto");
} finally {
if (prevForceCpu === undefined) delete process.env.QMD_FORCE_CPU;
else process.env.QMD_FORCE_CPU = prevForceCpu;
}
});
test("warns and falls back to auto for unsupported values", () => {
const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
try {
@ -90,6 +115,71 @@ describe("QMD_LLAMA_GPU resolution", () => {
});
});
describe("native llama stdout containment", () => {
test("redirects native stdout noise to stderr while JSON callers are initializing llama", async () => {
const stdoutSpy = vi.spyOn(process.stdout, "write").mockReturnValue(true);
const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
try {
await withNativeStdoutRedirectedToStderr(async () => {
process.stdout.write("cmake build spam\n");
return "ok";
});
expect(stdoutSpy).not.toHaveBeenCalled();
expect(stderrSpy).toHaveBeenCalledWith("cmake build spam\n", undefined, undefined);
} finally {
stdoutSpy.mockRestore();
stderrSpy.mockRestore();
}
});
test("keeps native GPU failure noise off stdout and caches failed GPU init", async () => {
const prevGpu = process.env.QMD_LLAMA_GPU;
const prevForceCpu = process.env.QMD_FORCE_CPU;
process.env.QMD_LLAMA_GPU = "cuda";
delete process.env.QMD_FORCE_CPU;
const calls: unknown[] = [];
const fakeLlama = { gpu: false, cpuMathCores: 4 };
setNodeLlamaCppModuleForTest({
LlamaLogLevel: { error: "error" },
resolveModelFile: vi.fn(),
LlamaChatSession: vi.fn() as any,
getLlama: vi.fn(async (options: Record<string, unknown>) => {
calls.push(options.gpu);
if (options.gpu === "cuda") {
process.stdout.write("cmake build spam\n");
throw new Error("CUDA unavailable");
}
return fakeLlama as any;
}),
});
const stdoutSpy = vi.spyOn(process.stdout, "write").mockReturnValue(true);
const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
try {
const first = new LlamaCpp();
const second = new LlamaCpp();
await (first as any).ensureLlama();
await (second as any).ensureLlama();
expect(stdoutSpy).not.toHaveBeenCalled();
expect(stderrSpy).toHaveBeenCalledWith("cmake build spam\n", undefined, undefined);
expect(calls).toEqual(["cuda", false, false]);
expect(String(stderrSpy.mock.calls.map(call => call[0]).join(""))).toContain("skipping previously failed GPU init");
} finally {
stdoutSpy.mockRestore();
stderrSpy.mockRestore();
setNodeLlamaCppModuleForTest(null);
if (prevGpu === undefined) delete process.env.QMD_LLAMA_GPU;
else process.env.QMD_LLAMA_GPU = prevGpu;
if (prevForceCpu === undefined) delete process.env.QMD_FORCE_CPU;
else process.env.QMD_FORCE_CPU = prevForceCpu;
}
});
});
describe("LLM context parallelism safety", () => {
test("defaults Windows CUDA to one context to avoid ggml-cuda.cu:98 crashes", () => {
expect(resolveSafeParallelism({

View File

@ -913,6 +913,22 @@ describe.skipIf(!!process.env.CI)("MCP HTTP Transport", () => {
initTestDatabase(db);
seedTestData(db);
// 300 pad lines (37 chars each = 11100 chars) puts the marker past the
// first chunk boundary at CHUNK_SIZE_CHARS = 3600.
{
const padLine = "Pad line for chunk boundary coverage\n";
const absLineFixtureBody =
padLine.repeat(300) +
"UNIQUE_KEYWORD_XYZ marker\n" +
padLine.repeat(20);
const fixtureHash = "hash-abslines";
const now = new Date().toISOString();
db.prepare(`INSERT OR IGNORE INTO content (hash, doc, created_at) VALUES (?, ?, ?)`)
.run(fixtureHash, absLineFixtureBody, now);
db.prepare(`INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active) VALUES ('docs', ?, ?, ?, ?, ?, 1)`)
.run("absolute-line-fixture.md", "Absolute Line Fixture", fixtureHash, now, now);
}
// Sync config into SQLite
const httpTestConfig: CollectionConfig = {
collections: {
@ -1074,4 +1090,29 @@ describe.skipIf(!!process.env.CI)("MCP HTTP Transport", () => {
expect(json.result).toBeDefined();
expect(json.result.content.length).toBeGreaterThan(0);
});
test("POST /mcp tools/call query returns absolute source-file line numbers, not chunk-local", async () => {
await mcpRequest({
jsonrpc: "2.0", id: 1, method: "initialize",
params: { protocolVersion: "2025-03-26", capabilities: {}, clientInfo: { name: "test", version: "1.0" } },
});
const { status, json } = await mcpRequest({
jsonrpc: "2.0", id: 5, method: "tools/call",
params: {
name: "query",
arguments: {
searches: [{ type: "lex", query: "UNIQUE_KEYWORD_XYZ" }],
rerank: false,
},
},
});
expect(status).toBe(200);
const results = json.result.structuredContent.results;
expect(results.length).toBeGreaterThan(0);
const hit = results.find((r: any) => r.file === "docs/absolute-line-fixture.md");
expect(hit).toBeDefined();
expect(hit.line).toBe(301);
expect(hit.snippet).toMatch(/^\d+: @@ -3\d\d,/);
});
});

View File

@ -1713,6 +1713,21 @@ describe("Document Retrieval", () => {
expect(body).toBeNull();
await cleanupTestDb(store);
});
test("getDocumentBody clamps negative fromLine to top of document", async () => {
const store = await createTestStore();
const collectionName = await createTestCollection({ pwd: "/path" });
await insertTestDocument(store.db, collectionName, {
name: "mydoc",
displayPath: "mydoc.md",
body: "Line 1\nLine 2\nLine 3\nLine 4\nLine 5",
});
const body = store.getDocumentBody({ filepath: "/path/mydoc.md" }, -19, 80);
expect(body).toBe("Line 1\nLine 2\nLine 3\nLine 4\nLine 5");
await cleanupTestDb(store);
});
});
describe("findDocuments (multi-get)", () => {
@ -2001,6 +2016,33 @@ describe("Snippet Extraction", () => {
expect(line).toBe(51); // "Target keyword" is line 51
expect(linesBefore).toBeGreaterThan(40); // Many lines before
});
test("extractSnippet anchors on chunkPos when lexical scoring finds no match", () => {
// The snippet tokenizer does not strip FTS5 syntax, so a quoted-phrase query
// tokenises into terms with embedded quotes that never appear in body text.
// bestScore stays at 0 even though the reranker correctly identified a chunk;
// the fallback should anchor on chunkPos rather than defaulting to line 1.
const padLine = "Lorem ipsum dolor sit amet\n";
const padding = padLine.repeat(100);
const body = padding + "chunk content here\nmore chunk content\n" + padding;
const chunkPos = padding.length;
const { line } = extractSnippet(body, '"unrelated quoted phrase"', 200, chunkPos);
expect(line).toBeGreaterThan(50);
expect(line).toBeLessThan(110);
});
test("extractSnippet with chunkPos=0 falls back to full-body scan when chunk has no match", () => {
// chunkPos=0 may be the chunk selector's bestIdx=0 default rather than a real
// first-chunk hit, so the fallback must consider matches outside chunk 0.
const padding = "Lorem ipsum dolor sit amet\n".repeat(200);
const body = padding + "TARGET_KEYWORD line content\ntail line\n";
const { line } = extractSnippet(body, "TARGET_KEYWORD", 200, 0);
expect(line).toBe(201);
});
});
// =============================================================================
@ -2239,6 +2281,26 @@ describe("Index Status", () => {
await cleanupTestDb(store);
});
test("embedding health is scoped to the active embed model", async () => {
const store = await createTestStore();
const collectionName = await createTestCollection();
const activeModel = "hf:active/embed-model.gguf";
const staleModel = "hf:stale/embed-model.gguf";
const now = new Date().toISOString();
store.llm = { embedModelName: activeModel } as any;
store.ensureVecTable(3);
await insertTestDocument(store.db, collectionName, { name: "doc1", hash: "hash1" });
store.insertEmbedding("hash1", 0, 0, new Float32Array([1, 2, 3]), staleModel, now, 1);
expect(store.getHashesNeedingEmbedding()).toBe(1);
expect(store.getStatus().needsEmbedding).toBe(1);
expect(store.getIndexHealth().needsEmbedding).toBe(1);
expect(store.getHashesNeedingEmbedding(staleModel)).toBe(0);
await cleanupTestDb(store);
});
test("getIndexHealth returns health info", async () => {
const store = await createTestStore();
const collectionName = await createTestCollection();
@ -3051,6 +3113,68 @@ describe("Embedding batching", () => {
}
});
test("generateEmbeddings does not mark a partially embedded multi-chunk document complete", async () => {
const store = await createTestStore();
const db = store.db;
const fakeLlm = {
async embed(_text: string, _options?: { model?: string }) {
return { embedding: [0.1, 0.2, 0.3], model: "fake-embed" };
},
async embedBatch(texts: string[], _options?: { model?: string }) {
return texts.map((_text, index) => index === 0
? { embedding: [1, 2, 3], model: "fake-embed" }
: null
);
},
};
setDefaultLlamaCpp(createFakeTokenizer() as any);
store.llm = fakeLlm as any;
try {
await insertTestDocument(db, "docs", {
name: "long-doc",
body: "# Long doc\n\n" + "partial embedding regression ".repeat(260),
});
const result = await generateEmbeddings(store);
expect(result.errors).toBeGreaterThan(0);
expect(db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get()).toEqual({ count: 0 });
expect(db.prepare(`SELECT COUNT(*) as count FROM vectors_vec`).get()).toEqual({ count: 0 });
expect(store.getHashesNeedingEmbedding()).toBe(1);
expect(store.getStatus().needsEmbedding).toBe(1);
} finally {
setDefaultLlamaCpp(null);
await cleanupTestDb(store);
}
});
test("generateEmbeddings opens a long-lived LLM session for embed runs", async () => {
const store = await createTestStore();
const fakeLlm = createFakeEmbedLlm();
const sessionSpy = vi.spyOn(llmModule, "withLLMSessionForLlm");
setDefaultLlamaCpp(createFakeTokenizer() as any);
store.llm = fakeLlm as any;
try {
await insertTestDocument(store.db, "docs", { name: "one", body: "# One\n\nAlpha" });
await generateEmbeddings(store);
expect(sessionSpy).toHaveBeenCalledWith(
fakeLlm,
expect.any(Function),
expect.objectContaining({ maxDuration: 30 * 60 * 1000, name: "generateEmbeddings" }),
);
} finally {
sessionSpy.mockRestore();
setDefaultLlamaCpp(null);
await cleanupTestDb(store);
}
});
test("vectorSearchQuery uses the active llm embed model for vector lookups", async () => {
const store = await createTestStore();
const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";