Merge remote-tracking branch 'origin/main' into feat/local-qmd-index-bench
# Conflicts: # src/cli/qmd.ts
This commit is contained in:
commit
b2550d273a
20
CHANGELOG.md
20
CHANGELOG.md
@ -4,6 +4,23 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
- GPU: add `QMD_FORCE_CPU=1` / `--no-gpu` to bypass CUDA/Vulkan/Metal probing entirely, and route native llama.cpp stdout noise to stderr so JSON output stays parseable during search/query commands.
|
||||
- Snippet line numbers: `qmd_query` (MCP), HTTP `/query`, and `qmd query`
|
||||
(CLI JSON output and snippet headers) now return absolute source-file
|
||||
line numbers instead of chunk-local ones, so the `line` field can be
|
||||
passed back to `qmd_get` as `fromLine` without a separate lookup.
|
||||
Snippet selection remains scoped to the best matching chunk
|
||||
(preserves #149).
|
||||
- CLI: `qmd query --full` now emits the full document body in all output
|
||||
formats (json, csv, md, xml), restoring the documented behavior of the
|
||||
flag. Previously it returned only the best matching chunk (~3.6KB max
|
||||
per result). Output payload for `--full` queries is now proportional
|
||||
to total document size.
|
||||
- macOS Metal: `qmd query --json` now flushes successful JSON output and uses a safe immediate-exit path on Darwin to avoid ggml Metal finalizer aborts; other commands still dispose LLM contexts/models before the llama runtime. #368
|
||||
- Embedding: require complete chunk coverage before treating a document as
|
||||
embedded, remove partial vectors when chunk/session failures leave a
|
||||
document incomplete, and keep `qmd status` pending counts honest after
|
||||
interrupted long embed runs. #637 #378
|
||||
- Embedding: `qmd embed -c <collection>` now scopes pending-doc selection
|
||||
to the requested collection instead of embedding global pending work.
|
||||
Scoped `--force` clears only collection-owned vectors, preserves shared
|
||||
@ -33,6 +50,9 @@
|
||||
- Packaging: install AST grammar WASM packages as required dependencies so
|
||||
Bun global installs include TypeScript/TSX/JavaScript grammars, and add a
|
||||
`smoke:package-grammars` verification command. #595
|
||||
- Launcher: add wrapper smoke coverage for scoped package, npm/npx,
|
||||
Homebrew/Linuxbrew, Bun global symlink layouts, and `$BUN_INSTALL`
|
||||
false-positive runtime selection regressions. #351 #353 #354 #356 #358 #359
|
||||
|
||||
## [2.1.0] - 2026-04-05
|
||||
|
||||
|
||||
@ -798,6 +798,7 @@ llm_cache -- Cached LLM responses (query expansion, rerank scores)
|
||||
|----------|---------|-------------|
|
||||
| `XDG_CACHE_HOME` | `~/.cache` | Cache directory location |
|
||||
| `QMD_LLAMA_GPU` | `auto` | Force llama.cpp GPU backend (`metal`, `vulkan`, `cuda`) or disable GPU with `false` |
|
||||
| `QMD_FORCE_CPU` | unset | Set to `1`/`true` to force CPU mode before any CUDA/Vulkan/Metal probing. Equivalent CLI flag: `--no-gpu`. |
|
||||
| `QMD_EMBED_PARALLELISM` | automatic | Override embedding/reranking context parallelism (1-8). Windows CUDA defaults to `1` because parallel CUDA contexts can crash with `ggml-cuda.cu:98`; use Vulkan or raise this only if your driver is stable. |
|
||||
|
||||
## How It Works
|
||||
|
||||
111
src/cli/qmd.ts
111
src/cli/qmd.ts
@ -212,6 +212,76 @@ const cursor = {
|
||||
show() { process.stderr.write('\x1b[?25h'); },
|
||||
};
|
||||
|
||||
type CliLifecycleWritable = {
|
||||
write(chunk: string | Uint8Array, callback?: (error?: Error | null) => void): boolean;
|
||||
};
|
||||
|
||||
type FinishSuccessfulCliCommandOptions = {
|
||||
command: string;
|
||||
format?: OutputFormat;
|
||||
cleanup?: () => Promise<void>;
|
||||
exit?: (code: number) => void;
|
||||
immediateExit?: (code: number) => void;
|
||||
stdout?: CliLifecycleWritable;
|
||||
stderr?: CliLifecycleWritable;
|
||||
platform?: NodeJS.Platform;
|
||||
};
|
||||
|
||||
async function flushWritable(stream: CliLifecycleWritable): Promise<void> {
|
||||
await new Promise<void>((resolve) => {
|
||||
stream.write("", () => resolve());
|
||||
});
|
||||
}
|
||||
|
||||
function shouldBypassNativeCleanup(options: FinishSuccessfulCliCommandOptions): boolean {
|
||||
return (
|
||||
(options.platform ?? process.platform) === "darwin" &&
|
||||
options.command === "query" &&
|
||||
options.format === "json" &&
|
||||
process.env.QMD_DISABLE_DARWIN_QUERY_JSON_SAFE_EXIT !== "1"
|
||||
);
|
||||
}
|
||||
|
||||
function immediateProcessExit(code: number): void {
|
||||
const processWithReallyExit = process as NodeJS.Process & { reallyExit?: (code?: number) => void };
|
||||
if (typeof processWithReallyExit.reallyExit === "function") {
|
||||
processWithReallyExit.reallyExit(code);
|
||||
return;
|
||||
}
|
||||
process.exit(code);
|
||||
}
|
||||
|
||||
/**
|
||||
* Finish a successful CLI command after output has been flushed. On macOS JSON
|
||||
* query runs, skip normal native teardown and use Node/Bun's immediate exit path:
|
||||
* ggml Metal can abort from C++ finalizers after valid JSON has already been
|
||||
* produced (#368). This wrapper is only reached after the command completed, so
|
||||
* real query failures still exit through the normal error path before this runs.
|
||||
*/
|
||||
export async function finishSuccessfulCliCommand(options: FinishSuccessfulCliCommandOptions): Promise<void> {
|
||||
const stderr = options.stderr ?? process.stderr;
|
||||
const exit = options.exit ?? ((code: number) => process.exit(code));
|
||||
const immediateExit = options.immediateExit ?? immediateProcessExit;
|
||||
|
||||
await flushWritable(options.stdout ?? process.stdout);
|
||||
|
||||
if (shouldBypassNativeCleanup(options)) {
|
||||
await flushWritable(stderr);
|
||||
immediateExit(0);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await (options.cleanup ?? disposeDefaultLlamaCpp)();
|
||||
} catch (error) {
|
||||
stderr.write(
|
||||
`QMD Warning: cleanup after successful output failed (${error instanceof Error ? error.message : String(error)}); exiting 0 because command output completed.\n`
|
||||
);
|
||||
}
|
||||
await flushWritable(stderr);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// Ensure cursor is restored on exit
|
||||
process.on('SIGINT', () => { cursor.show(); process.exit(130); });
|
||||
process.on('SIGTERM', () => { cursor.show(); process.exit(143); });
|
||||
@ -849,6 +919,7 @@ function getDocument(filename: string, fromLine?: number, maxLines?: number, lin
|
||||
inputPath = inputPath.slice(0, -colonMatch[0].length);
|
||||
}
|
||||
}
|
||||
if (fromLine !== undefined) fromLine = Math.max(1, fromLine);
|
||||
|
||||
const parsedIndexPath = isVirtualPath(inputPath) ? parseVirtualPath(inputPath) : null;
|
||||
if (parsedIndexPath?.indexName) {
|
||||
@ -1740,7 +1811,7 @@ async function vectorIndex(
|
||||
}
|
||||
|
||||
// Check if there's work to do before starting
|
||||
const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection);
|
||||
const hashesToEmbed = getHashesNeedingEmbedding(db, batchOptions?.collection, model);
|
||||
if (hashesToEmbed === 0 && !force) {
|
||||
console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
|
||||
closeDb();
|
||||
@ -1930,6 +2001,7 @@ type OutputRow = {
|
||||
score: number;
|
||||
context?: string | null;
|
||||
chunkPos?: number;
|
||||
chunkLen?: number;
|
||||
hash?: string;
|
||||
docid?: string;
|
||||
explain?: HybridQueryExplain;
|
||||
@ -2012,9 +2084,9 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
|
||||
// JSON output for LLM consumption
|
||||
const output = filtered.map(row => {
|
||||
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
|
||||
const snippetInfo = extractSnippet(row.body, query, 300, row.chunkPos, row.chunkLen, opts.intent);
|
||||
let body = opts.full ? row.body : undefined;
|
||||
const snippetInfo = !opts.full ? extractSnippet(row.body, query, 300, row.chunkPos, undefined, opts.intent) : undefined;
|
||||
let snippet = snippetInfo?.snippet;
|
||||
let snippet = !opts.full ? snippetInfo.snippet : undefined;
|
||||
if (opts.lineNumbers) {
|
||||
if (body) body = addLineNumbers(body);
|
||||
if (snippet) snippet = addLineNumbers(snippet);
|
||||
@ -2023,7 +2095,7 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
|
||||
...(docid && { docid: `#${docid}` }),
|
||||
score: Math.round(row.score * 100) / 100,
|
||||
file: toQmdPath(row.displayPath),
|
||||
...(snippetInfo && { line: snippetInfo.line }),
|
||||
line: snippetInfo.line,
|
||||
title: row.title,
|
||||
...(row.context && { context: row.context }),
|
||||
...(body && { body }),
|
||||
@ -2046,7 +2118,7 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
|
||||
for (let i = 0; i < filtered.length; i++) {
|
||||
const row = filtered[i];
|
||||
if (!row) continue;
|
||||
const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
|
||||
const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent);
|
||||
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
|
||||
|
||||
// Line 1: filepath with docid
|
||||
@ -2110,8 +2182,9 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
|
||||
console.log();
|
||||
|
||||
// Snippet with highlighting (diff-style header included)
|
||||
let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet;
|
||||
const highlighted = highlightTerms(displaySnippet, query);
|
||||
const content = opts.full ? row.body : snippet;
|
||||
const displayContent = opts.lineNumbers ? addLineNumbers(content, opts.full ? 1 : line) : content;
|
||||
const highlighted = highlightTerms(displayContent, query);
|
||||
console.log(highlighted);
|
||||
|
||||
// Double empty line between results
|
||||
@ -2123,7 +2196,7 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
|
||||
if (!row) continue;
|
||||
const heading = row.title || row.displayPath;
|
||||
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined);
|
||||
let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
|
||||
let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent).snippet;
|
||||
if (opts.lineNumbers) {
|
||||
content = addLineNumbers(content);
|
||||
}
|
||||
@ -2136,7 +2209,7 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
|
||||
const titleAttr = row.title ? ` title="${row.title.replace(/"/g, '"')}"` : "";
|
||||
const contextAttr = row.context ? ` context="${row.context.replace(/"/g, '"')}"` : "";
|
||||
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
|
||||
let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent).snippet;
|
||||
let content = opts.full ? row.body : extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent).snippet;
|
||||
if (opts.lineNumbers) {
|
||||
content = addLineNumbers(content);
|
||||
}
|
||||
@ -2146,10 +2219,10 @@ function outputResults(results: OutputRow[], query: string, opts: OutputOptions)
|
||||
// CSV format
|
||||
console.log("docid,score,file,title,context,line,snippet");
|
||||
for (const row of filtered) {
|
||||
const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, undefined, opts.intent);
|
||||
const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos, row.chunkLen, opts.intent);
|
||||
let content = opts.full ? row.body : snippet;
|
||||
if (opts.lineNumbers) {
|
||||
content = addLineNumbers(content, line);
|
||||
content = addLineNumbers(content, opts.full ? 1 : line);
|
||||
}
|
||||
const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : "");
|
||||
const snippetText = content || "";
|
||||
@ -2505,13 +2578,13 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
|
||||
? (structuredQueries.find(s => s.type === 'lex')?.query || structuredQueries.find(s => s.type === 'vec')?.query || query)
|
||||
: query;
|
||||
|
||||
// Map to CLI output format — use bestChunk for snippet display
|
||||
outputResults(results.map(r => ({
|
||||
file: r.file,
|
||||
displayPath: r.displayPath,
|
||||
title: r.title,
|
||||
body: r.bestChunk,
|
||||
body: r.body,
|
||||
chunkPos: r.bestChunkPos,
|
||||
chunkLen: r.bestChunk.length,
|
||||
score: r.score,
|
||||
context: r.context,
|
||||
docid: r.docid,
|
||||
@ -2567,6 +2640,7 @@ function parseCLI() {
|
||||
// Query options
|
||||
"candidate-limit": { type: "string", short: "C" },
|
||||
"no-rerank": { type: "boolean", default: false },
|
||||
"no-gpu": { type: "boolean", default: false },
|
||||
intent: { type: "string" },
|
||||
// Chunking options
|
||||
"chunk-strategy": { type: "string" }, // "regex" (default) or "auto" (AST for code files)
|
||||
@ -2579,6 +2653,10 @@ function parseCLI() {
|
||||
strict: false, // Allow unknown options to pass through
|
||||
});
|
||||
|
||||
if (values["no-gpu"]) {
|
||||
process.env.QMD_FORCE_CPU = "1";
|
||||
}
|
||||
|
||||
// Select index name (default: "index"). If no explicit --index is supplied,
|
||||
// a project-local .qmd/index.yaml overrides the global config/cache paths.
|
||||
const indexName = values.index as string | undefined;
|
||||
@ -2842,6 +2920,7 @@ function showHelp(): void {
|
||||
console.log(" --full - Output full document instead of snippet");
|
||||
console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
|
||||
console.log(" --no-rerank - Skip LLM reranking (use RRF scores only, much faster on CPU)");
|
||||
console.log(" --no-gpu - Force CPU mode for llama.cpp operations (same as QMD_FORCE_CPU=1)");
|
||||
console.log(" --line-numbers - Include line numbers in output");
|
||||
console.log(" --explain - Include retrieval score traces (query --json/CLI)");
|
||||
console.log(" --files | --json | --csv | --md | --xml - Output format");
|
||||
@ -3430,8 +3509,10 @@ if (isMain) {
|
||||
}
|
||||
|
||||
if (cli.command !== "mcp") {
|
||||
await disposeDefaultLlamaCpp();
|
||||
process.exit(0);
|
||||
await finishSuccessfulCliCommand({
|
||||
command: cli.command,
|
||||
format: cli.opts.format,
|
||||
});
|
||||
}
|
||||
|
||||
} // end if (main module)
|
||||
|
||||
126
src/llm.ts
126
src/llm.ts
@ -22,10 +22,45 @@ type NodeLlamaCppModule = {
|
||||
|
||||
let nodeLlamaCppImport: Promise<NodeLlamaCppModule> | null = null;
|
||||
async function loadNodeLlamaCpp(): Promise<NodeLlamaCppModule> {
|
||||
nodeLlamaCppImport ??= import("node-llama-cpp") as Promise<NodeLlamaCppModule>;
|
||||
nodeLlamaCppImport ??= withNativeStdoutRedirectedToStderr(
|
||||
() => import("node-llama-cpp") as Promise<NodeLlamaCppModule>
|
||||
);
|
||||
return nodeLlamaCppImport;
|
||||
}
|
||||
|
||||
export function setNodeLlamaCppModuleForTest(module: NodeLlamaCppModule | null): void {
|
||||
nodeLlamaCppImport = module ? Promise.resolve(module) : null;
|
||||
failedGpuInitModes.clear();
|
||||
}
|
||||
|
||||
type StdoutWrite = typeof process.stdout.write;
|
||||
let nativeStdoutRedirectDepth = 0;
|
||||
let originalStdoutWrite: StdoutWrite | null = null;
|
||||
|
||||
/**
|
||||
* Some node-llama-cpp native build/probe paths write library noise to stdout.
|
||||
* JSON APIs must reserve stdout for machine-readable payloads, so route that
|
||||
* noise to stderr while native llama initialization is in progress.
|
||||
*/
|
||||
export async function withNativeStdoutRedirectedToStderr<T>(fn: () => Promise<T>): Promise<T> {
|
||||
if (nativeStdoutRedirectDepth === 0) {
|
||||
originalStdoutWrite = process.stdout.write.bind(process.stdout) as StdoutWrite;
|
||||
process.stdout.write = ((chunk: any, encoding?: any, cb?: any) => {
|
||||
return process.stderr.write(chunk, encoding, cb as any);
|
||||
}) as StdoutWrite;
|
||||
}
|
||||
nativeStdoutRedirectDepth++;
|
||||
try {
|
||||
return await fn();
|
||||
} finally {
|
||||
nativeStdoutRedirectDepth--;
|
||||
if (nativeStdoutRedirectDepth === 0 && originalStdoutWrite) {
|
||||
process.stdout.write = originalStdoutWrite;
|
||||
originalStdoutWrite = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
import { homedir } from "os";
|
||||
import { join } from "path";
|
||||
import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync, writeFileSync, openSync, readSync, closeSync } from "fs";
|
||||
@ -487,7 +522,15 @@ export function resolveSafeParallelism(options: ParallelismOptions): number {
|
||||
return Math.max(1, options.computed);
|
||||
}
|
||||
|
||||
export function resolveLlamaGpuMode(envValue = process.env.QMD_LLAMA_GPU): LlamaGpuMode {
|
||||
export function resolveLlamaGpuMode(
|
||||
envValue = process.env.QMD_LLAMA_GPU,
|
||||
forceCpuValue = process.env.QMD_FORCE_CPU
|
||||
): LlamaGpuMode {
|
||||
const forceCpu = forceCpuValue?.trim().toLowerCase() ?? "";
|
||||
if (forceCpu && !["false", "off", "none", "disable", "disabled", "0"].includes(forceCpu)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const normalized = envValue?.trim().toLowerCase() ?? "";
|
||||
if (!normalized) return "auto";
|
||||
if (["false", "off", "none", "disable", "disabled", "0"].includes(normalized)) return false;
|
||||
@ -497,6 +540,23 @@ export function resolveLlamaGpuMode(envValue = process.env.QMD_LLAMA_GPU): Llama
|
||||
return "auto";
|
||||
}
|
||||
|
||||
async function disposeWithTimeout(resourceName: string, dispose: () => Promise<void>, timeoutMs = 1000): Promise<void> {
|
||||
const timeoutPromise = new Promise<"timeout">((resolve) => {
|
||||
setTimeout(() => resolve("timeout"), timeoutMs).unref();
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await Promise.race([dispose(), timeoutPromise]);
|
||||
if (result === "timeout") {
|
||||
process.stderr.write(`QMD Warning: timed out disposing ${resourceName}; continuing shutdown.\n`);
|
||||
}
|
||||
} catch (error) {
|
||||
process.stderr.write(
|
||||
`QMD Warning: failed to dispose ${resourceName} (${error instanceof Error ? error.message : String(error)}); continuing shutdown.\n`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function resolveExpandContextSize(configValue?: number): number {
|
||||
if (configValue !== undefined) {
|
||||
if (!Number.isInteger(configValue) || configValue <= 0) {
|
||||
@ -518,6 +578,8 @@ function resolveExpandContextSize(configValue?: number): number {
|
||||
return parsed;
|
||||
}
|
||||
|
||||
const failedGpuInitModes = new Set<LlamaGpuMode>();
|
||||
|
||||
export class LlamaCpp implements LLM {
|
||||
private readonly _ciMode = !!process.env.CI;
|
||||
private llama: Llama | null = null;
|
||||
@ -668,22 +730,29 @@ export class LlamaCpp implements LLM {
|
||||
|
||||
const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
|
||||
const loadLlama = async (gpu: LlamaGpuMode) =>
|
||||
await getLlama({
|
||||
await withNativeStdoutRedirectedToStderr(() => getLlama({
|
||||
build: allowBuild ? "autoAttempt" : "never",
|
||||
logLevel: LlamaLogLevel.error,
|
||||
gpu,
|
||||
skipDownload: !allowBuild,
|
||||
});
|
||||
}));
|
||||
|
||||
let llama: Llama;
|
||||
if (gpuMode === false) {
|
||||
if (gpuMode === false || failedGpuInitModes.has(gpuMode)) {
|
||||
if (gpuMode !== false && failedGpuInitModes.has(gpuMode)) {
|
||||
process.stderr.write(
|
||||
`QMD Warning: skipping previously failed GPU init${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`}, using CPU.\n`
|
||||
);
|
||||
}
|
||||
llama = await loadLlama(false);
|
||||
} else {
|
||||
try {
|
||||
llama = await loadLlama(gpuMode);
|
||||
} catch (err) {
|
||||
// GPU backend (e.g. Vulkan on headless/driverless machines) can throw at init.
|
||||
// Fall back to CPU so qmd still works.
|
||||
// GPU backend (e.g. Vulkan/CUDA on headless/driverless machines) can throw at init.
|
||||
// Fall back to CPU so qmd still works, and cache the failure to avoid repeated
|
||||
// expensive native build/probe attempts in this process.
|
||||
failedGpuInitModes.add(gpuMode);
|
||||
process.stderr.write(
|
||||
`QMD Warning: GPU init failed${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`} (${err instanceof Error ? err.message : String(err)}), falling back to CPU.\n`
|
||||
);
|
||||
@ -1413,22 +1482,37 @@ export class LlamaCpp implements LLM {
|
||||
this.inactivityTimer = null;
|
||||
}
|
||||
|
||||
// Disposing llama cascades to models and contexts automatically
|
||||
// See: https://node-llama-cpp.withcat.ai/guide/objects-lifecycle
|
||||
// Note: llama.dispose() can hang indefinitely, so we use a timeout
|
||||
if (this.llama) {
|
||||
const disposePromise = this.llama.dispose();
|
||||
const timeoutPromise = new Promise<void>((resolve) => setTimeout(resolve, 1000));
|
||||
await Promise.race([disposePromise, timeoutPromise]);
|
||||
// Explicitly dispose in dependency order: contexts first, then models, then llama.
|
||||
// Relying only on llama.dispose() leaves Metal resource sets alive until process
|
||||
// finalization on Apple Silicon, where ggml_metal_device_free can abort after
|
||||
// otherwise-successful CLI output (#368).
|
||||
for (const ctx of this.embedContexts) {
|
||||
await disposeWithTimeout("embedding context", () => ctx.dispose());
|
||||
}
|
||||
this.embedContexts = [];
|
||||
|
||||
for (const ctx of this.rerankContexts) {
|
||||
await disposeWithTimeout("rerank context", () => ctx.dispose());
|
||||
}
|
||||
this.rerankContexts = [];
|
||||
|
||||
if (this.embedModel) {
|
||||
await disposeWithTimeout("embedding model", () => this.embedModel!.dispose());
|
||||
this.embedModel = null;
|
||||
}
|
||||
if (this.generateModel) {
|
||||
await disposeWithTimeout("generation model", () => this.generateModel!.dispose());
|
||||
this.generateModel = null;
|
||||
}
|
||||
if (this.rerankModel) {
|
||||
await disposeWithTimeout("rerank model", () => this.rerankModel!.dispose());
|
||||
this.rerankModel = null;
|
||||
}
|
||||
|
||||
// Clear references
|
||||
this.embedContexts = [];
|
||||
this.rerankContexts = [];
|
||||
this.embedModel = null;
|
||||
this.generateModel = null;
|
||||
this.rerankModel = null;
|
||||
this.llama = null;
|
||||
if (this.llama) {
|
||||
await disposeWithTimeout("llama runtime", () => this.llama!.dispose());
|
||||
this.llama = null;
|
||||
}
|
||||
|
||||
// Clear any in-flight load/create promises
|
||||
this.embedModelLoadPromise = null;
|
||||
|
||||
@ -42,6 +42,7 @@ type SearchResultItem = {
|
||||
title: string;
|
||||
score: number;
|
||||
context: string | null;
|
||||
line: number; // Absolute line in source markdown
|
||||
snippet: string;
|
||||
};
|
||||
|
||||
@ -239,6 +240,8 @@ async function createMcpServer(store: QMDStore): Promise<McpServer> {
|
||||
title: "Query",
|
||||
description: `Search the knowledge base using a query document — one or more typed sub-queries combined for best recall.
|
||||
|
||||
Each result includes a \`line\` field with the absolute 1-indexed line of the best match in the source markdown. To read more context around a hit, call \`get(file, fromLine = max(1, line - 20), maxLines = 80, lineNumbers = true)\`.
|
||||
|
||||
## Query Types
|
||||
|
||||
**lex** — BM25 keyword search. Fast, exact, no LLM needed.
|
||||
@ -339,13 +342,14 @@ Intent-aware lex (C++ performance, not sports):
|
||||
|| searches[0]?.query || "";
|
||||
|
||||
const filtered: SearchResultItem[] = results.map(r => {
|
||||
const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300, undefined, undefined, intent);
|
||||
const { line, snippet } = extractSnippet(r.body, primaryQuery, 300, r.bestChunkPos, r.bestChunk.length, intent);
|
||||
return {
|
||||
docid: `#${r.docid}`,
|
||||
file: r.displayPath,
|
||||
title: r.title,
|
||||
score: Math.round(r.score * 100) / 100,
|
||||
context: r.context,
|
||||
line,
|
||||
snippet: addLineNumbers(snippet, line),
|
||||
};
|
||||
});
|
||||
@ -383,6 +387,7 @@ Intent-aware lex (C++ performance, not sports):
|
||||
parsedFromLine = parseInt(colonMatch[1], 10);
|
||||
lookup = lookup.slice(0, -colonMatch[0].length);
|
||||
}
|
||||
if (parsedFromLine !== undefined) parsedFromLine = Math.max(1, parsedFromLine);
|
||||
|
||||
const result = await store.get(lookup, { includeBody: false });
|
||||
|
||||
@ -701,13 +706,14 @@ export async function startMcpHttpServer(
|
||||
|| params.searches[0]?.query || "";
|
||||
|
||||
const formatted = results.map(r => {
|
||||
const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300);
|
||||
const { line, snippet } = extractSnippet(r.body, primaryQuery, 300, r.bestChunkPos, r.bestChunk.length, params.intent);
|
||||
return {
|
||||
docid: `#${r.docid}`,
|
||||
file: r.displayPath,
|
||||
title: r.title,
|
||||
score: Math.round(r.score * 100) / 100,
|
||||
context: r.context,
|
||||
line,
|
||||
snippet: addLineNumbers(snippet, line),
|
||||
};
|
||||
});
|
||||
|
||||
142
src/store.ts
142
src/store.ts
@ -871,10 +871,15 @@ function initializeDatabase(db: Database): void {
|
||||
seq INTEGER NOT NULL DEFAULT 0,
|
||||
pos INTEGER NOT NULL DEFAULT 0,
|
||||
model TEXT NOT NULL,
|
||||
total_chunks INTEGER NOT NULL DEFAULT 1,
|
||||
embedded_at TEXT NOT NULL,
|
||||
PRIMARY KEY (hash, seq)
|
||||
)
|
||||
`);
|
||||
const cvInfoAfterCreate = db.prepare(`PRAGMA table_info(content_vectors)`).all() as { name: string }[];
|
||||
if (!cvInfoAfterCreate.some(col => col.name === 'total_chunks')) {
|
||||
db.exec(`ALTER TABLE content_vectors ADD COLUMN total_chunks INTEGER NOT NULL DEFAULT 1`);
|
||||
}
|
||||
|
||||
// Store collections — makes the DB self-contained (no external config needed)
|
||||
db.exec(`
|
||||
@ -1167,9 +1172,9 @@ export type Store = {
|
||||
ensureVecTable: (dimensions: number) => void;
|
||||
|
||||
// Index health
|
||||
getHashesNeedingEmbedding: () => number;
|
||||
getIndexHealth: () => IndexHealthInfo;
|
||||
getStatus: () => IndexStatus;
|
||||
getHashesNeedingEmbedding: (model?: string) => number;
|
||||
getIndexHealth: (model?: string) => IndexHealthInfo;
|
||||
getStatus: (model?: string) => IndexStatus;
|
||||
|
||||
// Caching
|
||||
getCacheKey: typeof getCacheKey;
|
||||
@ -1229,7 +1234,7 @@ export type Store = {
|
||||
// Vector/embedding operations
|
||||
getHashesForEmbedding: () => { hash: string; body: string; path: string }[];
|
||||
clearAllEmbeddings: () => void;
|
||||
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string) => void;
|
||||
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string, totalChunks?: number) => void;
|
||||
};
|
||||
|
||||
// =============================================================================
|
||||
@ -1420,18 +1425,31 @@ function resolveEmbedOptions(options?: EmbedOptions): Required<Pick<EmbedOptions
|
||||
};
|
||||
}
|
||||
|
||||
function getPendingEmbeddingDocs(db: Database, collection?: string): PendingEmbeddingDoc[] {
|
||||
function contentVectorExpectedChunksExpr(db: Database): string {
|
||||
const columns = db.prepare(`PRAGMA table_info(content_vectors)`).all() as { name: string }[];
|
||||
return columns.some(col => col.name === 'total_chunks') ? 'MAX(total_chunks)' : '1';
|
||||
}
|
||||
|
||||
function getPendingEmbeddingDocs(db: Database, collection?: string, model: string = DEFAULT_EMBED_MODEL): PendingEmbeddingDoc[] {
|
||||
const collectionFilter = collection ? `AND d.collection = ?` : ``;
|
||||
const expectedChunksExpr = contentVectorExpectedChunksExpr(db);
|
||||
const stmt = db.prepare(`
|
||||
SELECT d.hash, MIN(d.path) as path, length(CAST(c.doc AS BLOB)) as bytes
|
||||
FROM documents d
|
||||
JOIN content c ON d.hash = c.hash
|
||||
LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
|
||||
WHERE d.active = 1 AND v.hash IS NULL ${collectionFilter}
|
||||
LEFT JOIN (
|
||||
SELECT hash, model, COUNT(*) AS chunk_count, ${expectedChunksExpr} AS expected_chunks
|
||||
FROM content_vectors
|
||||
WHERE model = ?
|
||||
GROUP BY hash, model
|
||||
) v ON d.hash = v.hash
|
||||
WHERE d.active = 1
|
||||
AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
|
||||
${collectionFilter}
|
||||
GROUP BY d.hash
|
||||
ORDER BY MIN(d.path)
|
||||
`);
|
||||
return (collection ? stmt.all(collection) : stmt.all()) as PendingEmbeddingDoc[];
|
||||
return (collection ? stmt.all(model, collection) : stmt.all(model)) as PendingEmbeddingDoc[];
|
||||
}
|
||||
|
||||
function buildEmbeddingBatches(
|
||||
@ -1502,7 +1520,7 @@ export async function generateEmbeddings(
|
||||
clearAllEmbeddings(db, options?.collection);
|
||||
}
|
||||
|
||||
const docsToEmbed = getPendingEmbeddingDocs(db, options?.collection);
|
||||
const docsToEmbed = getPendingEmbeddingDocs(db, options?.collection, model);
|
||||
|
||||
if (docsToEmbed.length === 0) {
|
||||
return { docsProcessed: 0, chunksEmbedded: 0, errors: 0, durationMs: 0 };
|
||||
@ -1533,6 +1551,7 @@ export async function generateEmbeddings(
|
||||
|
||||
const batchDocs = getEmbeddingDocsForBatch(db, batchMeta);
|
||||
const batchChunks: ChunkItem[] = [];
|
||||
const expectedChunksByHash = new Map<string, number>();
|
||||
const batchBytes = batchMeta.reduce((sum, doc) => sum + Math.max(0, doc.bytes), 0);
|
||||
|
||||
for (const doc of batchDocs) {
|
||||
@ -1558,6 +1577,7 @@ export async function generateEmbeddings(
|
||||
bytes: encoder.encode(chunks[seq]!.text).length,
|
||||
});
|
||||
}
|
||||
expectedChunksByHash.set(doc.hash, chunks.length);
|
||||
}
|
||||
|
||||
totalChunks += batchChunks.length;
|
||||
@ -1610,7 +1630,7 @@ export async function generateEmbeddings(
|
||||
const chunk = chunkBatch[i]!;
|
||||
const embedding = embeddings[i];
|
||||
if (embedding) {
|
||||
insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now);
|
||||
insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now, expectedChunksByHash.get(chunk.hash) ?? 1);
|
||||
chunksEmbedded++;
|
||||
} else {
|
||||
errors++;
|
||||
@ -1629,7 +1649,7 @@ export async function generateEmbeddings(
|
||||
const text = formatDocForEmbedding(chunk.text, chunk.title, embedModelUri);
|
||||
const result = await session.embed(text, { model });
|
||||
if (result) {
|
||||
insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
|
||||
insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now, expectedChunksByHash.get(chunk.hash) ?? 1);
|
||||
chunksEmbedded++;
|
||||
} else {
|
||||
errors++;
|
||||
@ -1654,6 +1674,11 @@ export async function generateEmbeddings(
|
||||
});
|
||||
}
|
||||
|
||||
const removedPartialChunks = removeIncompleteEmbeddings(db, expectedChunksByHash, model);
|
||||
if (removedPartialChunks > 0) {
|
||||
chunksEmbedded = Math.max(0, chunksEmbedded - removedPartialChunks);
|
||||
}
|
||||
|
||||
bytesProcessed += batchBytes;
|
||||
options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors });
|
||||
}
|
||||
@ -1688,9 +1713,9 @@ export function createStore(dbPath?: string): Store {
|
||||
ensureVecTable: (dimensions: number) => ensureVecTableInternal(db, dimensions),
|
||||
|
||||
// Index health
|
||||
getHashesNeedingEmbedding: () => getHashesNeedingEmbedding(db),
|
||||
getIndexHealth: () => getIndexHealth(db),
|
||||
getStatus: () => getStatus(db),
|
||||
getHashesNeedingEmbedding: (model?: string) => getHashesNeedingEmbedding(db, undefined, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
|
||||
getIndexHealth: (model?: string) => getIndexHealth(db, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
|
||||
getStatus: (model?: string) => getStatus(db, model ?? store.llm?.embedModelName ?? DEFAULT_EMBED_MODEL),
|
||||
|
||||
// Caching
|
||||
getCacheKey,
|
||||
@ -1750,7 +1775,7 @@ export function createStore(dbPath?: string): Store {
|
||||
// Vector/embedding operations
|
||||
getHashesForEmbedding: () => getHashesForEmbedding(db),
|
||||
clearAllEmbeddings: () => clearAllEmbeddings(db),
|
||||
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string) => insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt),
|
||||
insertEmbedding: (hash: string, seq: number, pos: number, embedding: Float32Array, model: string, embeddedAt: string, totalChunks?: number) => insertEmbedding(db, hash, seq, pos, embedding, model, embeddedAt, totalChunks),
|
||||
};
|
||||
|
||||
return store;
|
||||
@ -1949,15 +1974,23 @@ export type IndexStatus = {
|
||||
// Index health
|
||||
// =============================================================================
|
||||
|
||||
export function getHashesNeedingEmbedding(db: Database, collection?: string): number {
|
||||
export function getHashesNeedingEmbedding(db: Database, collection?: string, model: string = DEFAULT_EMBED_MODEL): number {
|
||||
const collectionFilter = collection ? `AND d.collection = ?` : ``;
|
||||
const expectedChunksExpr = contentVectorExpectedChunksExpr(db);
|
||||
const stmt = db.prepare(`
|
||||
SELECT COUNT(DISTINCT d.hash) as count
|
||||
FROM documents d
|
||||
LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
|
||||
WHERE d.active = 1 AND v.hash IS NULL ${collectionFilter}
|
||||
LEFT JOIN (
|
||||
SELECT hash, model, COUNT(*) AS chunk_count, ${expectedChunksExpr} AS expected_chunks
|
||||
FROM content_vectors
|
||||
WHERE model = ?
|
||||
GROUP BY hash, model
|
||||
) v ON d.hash = v.hash
|
||||
WHERE d.active = 1
|
||||
AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
|
||||
${collectionFilter}
|
||||
`);
|
||||
const result = (collection ? stmt.get(collection) : stmt.get()) as { count: number };
|
||||
const result = (collection ? stmt.get(model, collection) : stmt.get(model)) as { count: number };
|
||||
return result.count;
|
||||
}
|
||||
|
||||
@ -1967,8 +2000,8 @@ export type IndexHealthInfo = {
|
||||
daysStale: number | null;
|
||||
};
|
||||
|
||||
export function getIndexHealth(db: Database): IndexHealthInfo {
|
||||
const needsEmbedding = getHashesNeedingEmbedding(db);
|
||||
export function getIndexHealth(db: Database, model: string = DEFAULT_EMBED_MODEL): IndexHealthInfo {
|
||||
const needsEmbedding = getHashesNeedingEmbedding(db, undefined, model);
|
||||
const totalDocs = (db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get() as { count: number }).count;
|
||||
|
||||
const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null };
|
||||
@ -3316,15 +3349,22 @@ async function getEmbedding(text: string, model: string, isQuery: boolean, sessi
|
||||
* Get all unique content hashes that need embeddings (from active documents).
|
||||
* Returns hash, document body, and a sample path for display purposes.
|
||||
*/
|
||||
export function getHashesForEmbedding(db: Database): { hash: string; body: string; path: string }[] {
|
||||
export function getHashesForEmbedding(db: Database, model: string = DEFAULT_EMBED_MODEL): { hash: string; body: string; path: string }[] {
|
||||
const expectedChunksExpr = contentVectorExpectedChunksExpr(db);
|
||||
return db.prepare(`
|
||||
SELECT d.hash, c.doc as body, MIN(d.path) as path
|
||||
FROM documents d
|
||||
JOIN content c ON d.hash = c.hash
|
||||
LEFT JOIN content_vectors v ON d.hash = v.hash AND v.seq = 0
|
||||
WHERE d.active = 1 AND v.hash IS NULL
|
||||
LEFT JOIN (
|
||||
SELECT hash, model, COUNT(*) AS chunk_count, ${expectedChunksExpr} AS expected_chunks
|
||||
FROM content_vectors
|
||||
WHERE model = ?
|
||||
GROUP BY hash, model
|
||||
) v ON d.hash = v.hash
|
||||
WHERE d.active = 1
|
||||
AND (v.hash IS NULL OR v.chunk_count < v.expected_chunks)
|
||||
GROUP BY d.hash
|
||||
`).all() as { hash: string; body: string; path: string }[];
|
||||
`).all(model) as { hash: string; body: string; path: string }[];
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3409,13 +3449,14 @@ export function insertEmbedding(
|
||||
pos: number,
|
||||
embedding: Float32Array,
|
||||
model: string,
|
||||
embeddedAt: string
|
||||
embeddedAt: string,
|
||||
totalChunks: number = 1
|
||||
): void {
|
||||
const hashSeq = `${hash}_${seq}`;
|
||||
|
||||
// Insert content_vectors first — crash-safe ordering (see getHashesForEmbedding)
|
||||
const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, ?, ?, ?, ?)`);
|
||||
insertContentVectorStmt.run(hash, seq, pos, model, embeddedAt);
|
||||
const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, total_chunks, embedded_at) VALUES (?, ?, ?, ?, ?, ?)`);
|
||||
insertContentVectorStmt.run(hash, seq, pos, model, totalChunks, embeddedAt);
|
||||
|
||||
// vec0 virtual tables don't support OR REPLACE — use DELETE + INSERT
|
||||
const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
|
||||
@ -3424,6 +3465,26 @@ export function insertEmbedding(
|
||||
insertVecStmt.run(hashSeq, embedding);
|
||||
}
|
||||
|
||||
function removeIncompleteEmbeddings(db: Database, expectedChunksByHash: Map<string, number>, model: string): number {
|
||||
let removed = 0;
|
||||
const rowsStmt = db.prepare(`SELECT seq FROM content_vectors WHERE hash = ? AND model = ?`);
|
||||
const deleteContentStmt = db.prepare(`DELETE FROM content_vectors WHERE hash = ? AND model = ?`);
|
||||
const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
|
||||
|
||||
for (const [hash, expectedChunks] of expectedChunksByHash) {
|
||||
const rows = rowsStmt.all(hash, model) as { seq: number }[];
|
||||
if (rows.length === 0 || rows.length === expectedChunks) continue;
|
||||
|
||||
for (const row of rows) {
|
||||
deleteVecStmt.run(`${hash}_${row.seq}`);
|
||||
}
|
||||
deleteContentStmt.run(hash, model);
|
||||
removed += rows.length;
|
||||
}
|
||||
|
||||
return removed;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Query expansion
|
||||
// =============================================================================
|
||||
@ -3800,7 +3861,7 @@ export function getDocumentBody(db: Database, doc: DocumentResult | { filepath:
|
||||
let body = row.body;
|
||||
if (fromLine !== undefined || maxLines !== undefined) {
|
||||
const lines = body.split('\n');
|
||||
const start = (fromLine || 1) - 1;
|
||||
const start = Math.max(0, (fromLine || 1) - 1);
|
||||
const end = maxLines !== undefined ? start + maxLines : lines.length;
|
||||
body = lines.slice(start, end).join('\n');
|
||||
}
|
||||
@ -3922,7 +3983,7 @@ export function findDocuments(
|
||||
// Status
|
||||
// =============================================================================
|
||||
|
||||
export function getStatus(db: Database): IndexStatus {
|
||||
export function getStatus(db: Database, model: string = DEFAULT_EMBED_MODEL): IndexStatus {
|
||||
// DB is source of truth for collections — config provides supplementary metadata
|
||||
const dbCollections = db.prepare(`
|
||||
SELECT
|
||||
@ -3957,7 +4018,7 @@ export function getStatus(db: Database): IndexStatus {
|
||||
});
|
||||
|
||||
const totalDocs = (db.prepare(`SELECT COUNT(*) as c FROM documents WHERE active = 1`).get() as { c: number }).c;
|
||||
const needsEmbedding = getHashesNeedingEmbedding(db);
|
||||
const needsEmbedding = getHashesNeedingEmbedding(db, undefined, model);
|
||||
const hasVectors = !!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
|
||||
|
||||
return {
|
||||
@ -4023,7 +4084,7 @@ export function extractSnippet(body: string, query: string, maxLen = 500, chunkP
|
||||
let searchBody = body;
|
||||
let lineOffset = 0;
|
||||
|
||||
if (chunkPos && chunkPos > 0) {
|
||||
if (chunkPos !== undefined && chunkPos >= 0) {
|
||||
// Search within the chunk region, with some padding for context
|
||||
// Use provided chunkLen or fall back to max chunk size (covers variable-length chunks)
|
||||
const searchLen = chunkLen || CHUNK_SIZE_CHARS;
|
||||
@ -4055,6 +4116,23 @@ export function extractSnippet(body: string, query: string, maxLen = 500, chunkP
|
||||
}
|
||||
}
|
||||
|
||||
if (chunkPos !== undefined && chunkPos >= 0 && bestScore <= 0) {
|
||||
if (chunkPos === 0) {
|
||||
// chunkPos=0 may be the chunk selector's initialization default for queries
|
||||
// where lexical chunk scoring found no winner (e.g. tokens filtered to empty
|
||||
// by the length>2 guard). Retry with full body so the real match isn't missed.
|
||||
return extractSnippet(body, query, maxLen, undefined, undefined, intent);
|
||||
}
|
||||
// For chunkPos > 0 the reranker actively picked this chunk. Tokens failing to
|
||||
// match literally is most likely a tokenizer limitation (quoted phrases, FTS5
|
||||
// syntax, HYDE passages, semantic hits), so anchor on the chunk start rather
|
||||
// than disregarding the reranker's pick.
|
||||
const contextStart = Math.max(0, chunkPos - 100);
|
||||
bestLine = chunkPos > contextStart
|
||||
? searchBody.slice(0, chunkPos - contextStart).split('\n').length - 1
|
||||
: 0;
|
||||
}
|
||||
|
||||
const start = Math.max(0, bestLine - 1);
|
||||
const end = Math.min(lines.length, bestLine + 3);
|
||||
const snippetLines = lines.slice(start, end);
|
||||
|
||||
164
test/bin-wrapper.test.ts
Normal file
164
test/bin-wrapper.test.ts
Normal file
@ -0,0 +1,164 @@
|
||||
import { afterEach, describe, expect, test } from "vitest";
|
||||
import { chmodSync, copyFileSync, mkdtempSync, mkdirSync, readFileSync, realpathSync, rmSync, symlinkSync, writeFileSync } from "node:fs";
|
||||
import { tmpdir } from "node:os";
|
||||
import { dirname, join, relative } from "node:path";
|
||||
import { execFileSync } from "node:child_process";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const repoRoot = fileURLToPath(new URL("..", import.meta.url));
|
||||
const fixtures: string[] = [];
|
||||
|
||||
function makeTempFixture() {
|
||||
const root = mkdtempSync(join(tmpdir(), "qmd-bin-wrapper-"));
|
||||
fixtures.push(root);
|
||||
const capturePath = join(root, "capture.txt");
|
||||
const runtimeBin = join(root, "runtime-bin");
|
||||
mkdirSync(runtimeBin, { recursive: true });
|
||||
|
||||
for (const runtime of ["node", "bun"]) {
|
||||
const runtimePath = join(runtimeBin, runtime);
|
||||
writeFileSync(
|
||||
runtimePath,
|
||||
`#!/bin/sh\n{\n printf '%s\\n' '${runtime}'\n printf '%s\\n' "$1"\n shift\n printf '%s\\n' "$@"\n} > "$QMD_WRAPPER_CAPTURE"\n`,
|
||||
);
|
||||
chmodSync(runtimePath, 0o755);
|
||||
}
|
||||
|
||||
return { root, capturePath, runtimeBin };
|
||||
}
|
||||
|
||||
function makePackage(root: string, packagePath: string, lockfiles: string[] = []) {
|
||||
const packageRoot = join(root, packagePath);
|
||||
mkdirSync(join(packageRoot, "bin"), { recursive: true });
|
||||
mkdirSync(join(packageRoot, "dist", "cli"), { recursive: true });
|
||||
copyFileSync(join(repoRoot, "bin", "qmd"), join(packageRoot, "bin", "qmd"));
|
||||
chmodSync(join(packageRoot, "bin", "qmd"), 0o755);
|
||||
writeFileSync(join(packageRoot, "dist", "cli", "qmd.js"), "// fixture\n");
|
||||
for (const lockfile of lockfiles) {
|
||||
writeFileSync(join(packageRoot, lockfile), "");
|
||||
}
|
||||
return packageRoot;
|
||||
}
|
||||
|
||||
function symlinkRelative(target: string, linkPath: string) {
|
||||
mkdirSync(dirname(linkPath), { recursive: true });
|
||||
symlinkSync(relative(dirname(linkPath), target), linkPath);
|
||||
}
|
||||
|
||||
function runWrapper(commandPath: string, runtimeBin: string, capturePath: string, env: Record<string, string> = {}) {
|
||||
rmSync(capturePath, { force: true });
|
||||
execFileSync(commandPath, ["--version"], {
|
||||
env: {
|
||||
...process.env,
|
||||
...env,
|
||||
PATH: `${runtimeBin}:${process.env.PATH ?? ""}`,
|
||||
QMD_WRAPPER_CAPTURE: capturePath,
|
||||
},
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
});
|
||||
const [runtime, scriptPath, ...args] = readFileSync(capturePath, "utf8").trimEnd().split("\n");
|
||||
return { runtime, scriptPath, args };
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
for (const fixture of fixtures.splice(0)) {
|
||||
rmSync(fixture, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
|
||||
describe("bin/qmd package wrapper", () => {
|
||||
test("direct package invocation resolves dist/cli/qmd.js from the package root", () => {
|
||||
const { root, runtimeBin, capturePath } = makeTempFixture();
|
||||
const packageRoot = makePackage(root, "node_modules/@tobilu/qmd");
|
||||
|
||||
const result = runWrapper(join(packageRoot, "bin", "qmd"), runtimeBin, capturePath);
|
||||
|
||||
expect(result.runtime).toBe("node");
|
||||
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
|
||||
expect(result.args).toEqual(["--version"]);
|
||||
});
|
||||
|
||||
test("npm/Homebrew global bin symlink resolves scoped package path", () => {
|
||||
const { root, runtimeBin, capturePath } = makeTempFixture();
|
||||
const packageRoot = makePackage(root, "opt/homebrew/lib/node_modules/@tobilu/qmd");
|
||||
const globalBin = join(root, "opt", "homebrew", "bin", "qmd");
|
||||
symlinkRelative(join(packageRoot, "bin", "qmd"), globalBin);
|
||||
|
||||
const result = runWrapper(globalBin, runtimeBin, capturePath);
|
||||
|
||||
expect(result.runtime).toBe("node");
|
||||
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
|
||||
});
|
||||
|
||||
test("multi-hop global bin symlink chain resolves to the real package root", () => {
|
||||
const { root, runtimeBin, capturePath } = makeTempFixture();
|
||||
const packageRoot = makePackage(root, "opt/homebrew/lib/node_modules/@tobilu/qmd");
|
||||
const globalBin = join(root, "opt", "homebrew", "bin", "qmd");
|
||||
const shim = join(root, "opt", "homebrew", "Cellar", "qmd", "current", "bin", "qmd");
|
||||
symlinkRelative(join(packageRoot, "bin", "qmd"), shim);
|
||||
symlinkRelative(shim, globalBin);
|
||||
|
||||
const result = runWrapper(globalBin, runtimeBin, capturePath);
|
||||
|
||||
expect(result.runtime).toBe("node");
|
||||
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
|
||||
});
|
||||
|
||||
test("linuxbrew global bin symlink resolves lib/node_modules scoped package path", () => {
|
||||
const { root, runtimeBin, capturePath } = makeTempFixture();
|
||||
const packageRoot = makePackage(root, "home/linuxbrew/.linuxbrew/lib/node_modules/@tobilu/qmd");
|
||||
const globalBin = join(root, "home", "linuxbrew", ".linuxbrew", "bin", "qmd");
|
||||
symlinkRelative(join(packageRoot, "bin", "qmd"), globalBin);
|
||||
|
||||
const result = runWrapper(globalBin, runtimeBin, capturePath);
|
||||
|
||||
expect(result.runtime).toBe("node");
|
||||
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
|
||||
});
|
||||
|
||||
test("npx scoped package .bin symlink resolves @tobilu/qmd package path", () => {
|
||||
const { root, runtimeBin, capturePath } = makeTempFixture();
|
||||
const packageRoot = makePackage(root, "npm/_npx/abc123/node_modules/@tobilu/qmd");
|
||||
const npxBin = join(root, "npm", "_npx", "abc123", "node_modules", ".bin", "qmd");
|
||||
symlinkRelative(join(packageRoot, "bin", "qmd"), npxBin);
|
||||
|
||||
const result = runWrapper(npxBin, runtimeBin, capturePath);
|
||||
|
||||
expect(result.runtime).toBe("node");
|
||||
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
|
||||
});
|
||||
|
||||
test("bun global symlink uses bun when package-local bun lockfile exists", () => {
|
||||
const { root, runtimeBin, capturePath } = makeTempFixture();
|
||||
const packageRoot = makePackage(root, "home/user/.bun/install/global/node_modules/@tobilu/qmd", ["bun.lock"]);
|
||||
const bunBin = join(root, "home", "user", ".bun", "bin", "qmd");
|
||||
symlinkRelative(join(packageRoot, "bin", "qmd"), bunBin);
|
||||
|
||||
const result = runWrapper(bunBin, runtimeBin, capturePath);
|
||||
|
||||
expect(result.runtime).toBe("bun");
|
||||
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
|
||||
});
|
||||
|
||||
test("ambient BUN_INSTALL alone does not select bun for an npm-installed package", () => {
|
||||
const { root, runtimeBin, capturePath } = makeTempFixture();
|
||||
const packageRoot = makePackage(root, "opt/homebrew/lib/node_modules/@tobilu/qmd");
|
||||
const globalBin = join(root, "opt", "homebrew", "bin", "qmd");
|
||||
symlinkRelative(join(packageRoot, "bin", "qmd"), globalBin);
|
||||
|
||||
const result = runWrapper(globalBin, runtimeBin, capturePath, { BUN_INSTALL: join(root, ".bun") });
|
||||
|
||||
expect(result.runtime).toBe("node");
|
||||
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
|
||||
});
|
||||
|
||||
test("package-lock.json takes priority over bun lockfiles", () => {
|
||||
const { root, runtimeBin, capturePath } = makeTempFixture();
|
||||
const packageRoot = makePackage(root, "node_modules/@tobilu/qmd", ["package-lock.json", "bun.lock"]);
|
||||
|
||||
const result = runWrapper(join(packageRoot, "bin", "qmd"), runtimeBin, capturePath);
|
||||
|
||||
expect(result.runtime).toBe("node");
|
||||
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
|
||||
});
|
||||
});
|
||||
82
test/cli-exit-lifecycle.test.ts
Normal file
82
test/cli-exit-lifecycle.test.ts
Normal file
@ -0,0 +1,82 @@
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { finishSuccessfulCliCommand } from "../src/cli/qmd.ts";
|
||||
import { LlamaCpp } from "../src/llm.ts";
|
||||
|
||||
describe("CLI successful-exit lifecycle", () => {
|
||||
test("exits 0 after successful JSON output when post-output LLM cleanup fails", async () => {
|
||||
const exitCodes: number[] = [];
|
||||
const stderr: string[] = [];
|
||||
const flushed: string[] = [];
|
||||
|
||||
await finishSuccessfulCliCommand({
|
||||
command: "query",
|
||||
format: "json",
|
||||
platform: "linux",
|
||||
cleanup: async () => {
|
||||
throw new Error("ggml_metal_device_free abort simulation");
|
||||
},
|
||||
exit: (code) => {
|
||||
exitCodes.push(code);
|
||||
},
|
||||
stdout: { write: (chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { flushed.push(String(chunk)); cb?.(); return true; } },
|
||||
stderr: { write: (chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { stderr.push(String(chunk)); cb?.(); return true; } },
|
||||
});
|
||||
|
||||
expect(exitCodes).toEqual([0]);
|
||||
expect(stderr.join("")).toContain("QMD Warning: cleanup after successful output failed");
|
||||
expect(flushed).toEqual([""]);
|
||||
});
|
||||
|
||||
test("uses immediate exit for successful macOS JSON query after stdout flush", async () => {
|
||||
const calls: string[] = [];
|
||||
|
||||
await finishSuccessfulCliCommand({
|
||||
command: "query",
|
||||
format: "json",
|
||||
platform: "darwin",
|
||||
cleanup: async () => {
|
||||
calls.push("cleanup");
|
||||
},
|
||||
exit: (code) => {
|
||||
calls.push(`exit:${code}`);
|
||||
},
|
||||
immediateExit: (code) => {
|
||||
calls.push(`immediate-exit:${code}`);
|
||||
},
|
||||
stdout: { write: (_chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { calls.push("stdout-flush"); cb?.(); return true; } },
|
||||
stderr: { write: (_chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { calls.push("stderr-flush"); cb?.(); return true; } },
|
||||
});
|
||||
|
||||
expect(calls).toEqual(["stdout-flush", "stderr-flush", "immediate-exit:0"]);
|
||||
});
|
||||
|
||||
test("disposes Llama resources in dependency order before CLI exit", async () => {
|
||||
const calls: string[] = [];
|
||||
const llm = new LlamaCpp({ inactivityTimeoutMs: 0 });
|
||||
const disposable = (name: string) => ({
|
||||
dispose: async () => {
|
||||
calls.push(name);
|
||||
},
|
||||
});
|
||||
|
||||
Object.assign(llm as unknown as Record<string, unknown>, {
|
||||
embedContexts: [disposable("embed-context")],
|
||||
rerankContexts: [disposable("rerank-context")],
|
||||
embedModel: disposable("embed-model"),
|
||||
generateModel: disposable("generate-model"),
|
||||
rerankModel: disposable("rerank-model"),
|
||||
llama: disposable("llama"),
|
||||
});
|
||||
|
||||
await llm.dispose();
|
||||
|
||||
expect(calls).toEqual([
|
||||
"embed-context",
|
||||
"rerank-context",
|
||||
"embed-model",
|
||||
"generate-model",
|
||||
"rerank-model",
|
||||
"llama",
|
||||
]);
|
||||
});
|
||||
});
|
||||
@ -233,6 +233,7 @@ describe("CLI Help", () => {
|
||||
expect(stdout).toContain("Usage:");
|
||||
expect(stdout).toContain("qmd collection add");
|
||||
expect(stdout).toContain("qmd search");
|
||||
expect(stdout).toContain("--no-gpu");
|
||||
expect(stdout).toContain("qmd skill show/install");
|
||||
});
|
||||
|
||||
@ -507,6 +508,16 @@ describe("CLI Search Command", () => {
|
||||
// Error message goes to stderr
|
||||
expect(stderr).toContain("Usage:");
|
||||
});
|
||||
|
||||
test("--json --full includes line field for round-tripping to qmd get", async () => {
|
||||
const { stdout, exitCode } = await runQmd(["search", "meeting", "--json", "--full", "-n", "1"]);
|
||||
expect(exitCode).toBe(0);
|
||||
const results = JSON.parse(stdout);
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
expect(results[0].line).toBeTypeOf("number");
|
||||
expect(results[0].line).toBeGreaterThan(0);
|
||||
expect(results[0].body).toBeTypeOf("string");
|
||||
});
|
||||
});
|
||||
|
||||
describe("CLI Get Command", () => {
|
||||
@ -532,6 +543,13 @@ describe("CLI Get Command", () => {
|
||||
// Should indicate file not found
|
||||
expect(exitCode).toBe(1);
|
||||
});
|
||||
|
||||
test("clamps negative --from to top of file (no silent tail content)", async () => {
|
||||
const baseline = await runQmd(["get", "README.md"]);
|
||||
const negative = await runQmd(["get", "README.md", "--from", "-19"]);
|
||||
expect(negative.exitCode).toBe(0);
|
||||
expect(negative.stdout).toBe(baseline.stdout);
|
||||
});
|
||||
});
|
||||
|
||||
describe("CLI Multi-Get Command", () => {
|
||||
|
||||
@ -13,6 +13,8 @@ import {
|
||||
getDefaultLlamaCpp,
|
||||
disposeDefaultLlamaCpp,
|
||||
resolveLlamaGpuMode,
|
||||
setNodeLlamaCppModuleForTest,
|
||||
withNativeStdoutRedirectedToStderr,
|
||||
resolveParallelismOverride,
|
||||
resolveSafeParallelism,
|
||||
withLLMSession,
|
||||
@ -78,6 +80,29 @@ describe("QMD_LLAMA_GPU resolution", () => {
|
||||
expect(resolveLlamaGpuMode(" cuda ")).toBe("cuda");
|
||||
});
|
||||
|
||||
test("QMD_FORCE_CPU disables GPU before QMD_LLAMA_GPU auto-detection", () => {
|
||||
const prevForceCpu = process.env.QMD_FORCE_CPU;
|
||||
process.env.QMD_FORCE_CPU = "1";
|
||||
try {
|
||||
expect(resolveLlamaGpuMode(undefined)).toBe(false);
|
||||
expect(resolveLlamaGpuMode("cuda")).toBe(false);
|
||||
} finally {
|
||||
if (prevForceCpu === undefined) delete process.env.QMD_FORCE_CPU;
|
||||
else process.env.QMD_FORCE_CPU = prevForceCpu;
|
||||
}
|
||||
});
|
||||
|
||||
test("QMD_FORCE_CPU ignores false-ish values", () => {
|
||||
const prevForceCpu = process.env.QMD_FORCE_CPU;
|
||||
process.env.QMD_FORCE_CPU = "0";
|
||||
try {
|
||||
expect(resolveLlamaGpuMode(undefined)).toBe("auto");
|
||||
} finally {
|
||||
if (prevForceCpu === undefined) delete process.env.QMD_FORCE_CPU;
|
||||
else process.env.QMD_FORCE_CPU = prevForceCpu;
|
||||
}
|
||||
});
|
||||
|
||||
test("warns and falls back to auto for unsupported values", () => {
|
||||
const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
|
||||
try {
|
||||
@ -90,6 +115,71 @@ describe("QMD_LLAMA_GPU resolution", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("native llama stdout containment", () => {
|
||||
test("redirects native stdout noise to stderr while JSON callers are initializing llama", async () => {
|
||||
const stdoutSpy = vi.spyOn(process.stdout, "write").mockReturnValue(true);
|
||||
const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
|
||||
try {
|
||||
await withNativeStdoutRedirectedToStderr(async () => {
|
||||
process.stdout.write("cmake build spam\n");
|
||||
return "ok";
|
||||
});
|
||||
|
||||
expect(stdoutSpy).not.toHaveBeenCalled();
|
||||
expect(stderrSpy).toHaveBeenCalledWith("cmake build spam\n", undefined, undefined);
|
||||
} finally {
|
||||
stdoutSpy.mockRestore();
|
||||
stderrSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
|
||||
test("keeps native GPU failure noise off stdout and caches failed GPU init", async () => {
|
||||
const prevGpu = process.env.QMD_LLAMA_GPU;
|
||||
const prevForceCpu = process.env.QMD_FORCE_CPU;
|
||||
process.env.QMD_LLAMA_GPU = "cuda";
|
||||
delete process.env.QMD_FORCE_CPU;
|
||||
|
||||
const calls: unknown[] = [];
|
||||
const fakeLlama = { gpu: false, cpuMathCores: 4 };
|
||||
setNodeLlamaCppModuleForTest({
|
||||
LlamaLogLevel: { error: "error" },
|
||||
resolveModelFile: vi.fn(),
|
||||
LlamaChatSession: vi.fn() as any,
|
||||
getLlama: vi.fn(async (options: Record<string, unknown>) => {
|
||||
calls.push(options.gpu);
|
||||
if (options.gpu === "cuda") {
|
||||
process.stdout.write("cmake build spam\n");
|
||||
throw new Error("CUDA unavailable");
|
||||
}
|
||||
return fakeLlama as any;
|
||||
}),
|
||||
});
|
||||
|
||||
const stdoutSpy = vi.spyOn(process.stdout, "write").mockReturnValue(true);
|
||||
const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
|
||||
try {
|
||||
const first = new LlamaCpp();
|
||||
const second = new LlamaCpp();
|
||||
|
||||
await (first as any).ensureLlama();
|
||||
await (second as any).ensureLlama();
|
||||
|
||||
expect(stdoutSpy).not.toHaveBeenCalled();
|
||||
expect(stderrSpy).toHaveBeenCalledWith("cmake build spam\n", undefined, undefined);
|
||||
expect(calls).toEqual(["cuda", false, false]);
|
||||
expect(String(stderrSpy.mock.calls.map(call => call[0]).join(""))).toContain("skipping previously failed GPU init");
|
||||
} finally {
|
||||
stdoutSpy.mockRestore();
|
||||
stderrSpy.mockRestore();
|
||||
setNodeLlamaCppModuleForTest(null);
|
||||
if (prevGpu === undefined) delete process.env.QMD_LLAMA_GPU;
|
||||
else process.env.QMD_LLAMA_GPU = prevGpu;
|
||||
if (prevForceCpu === undefined) delete process.env.QMD_FORCE_CPU;
|
||||
else process.env.QMD_FORCE_CPU = prevForceCpu;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("LLM context parallelism safety", () => {
|
||||
test("defaults Windows CUDA to one context to avoid ggml-cuda.cu:98 crashes", () => {
|
||||
expect(resolveSafeParallelism({
|
||||
|
||||
@ -913,6 +913,22 @@ describe.skipIf(!!process.env.CI)("MCP HTTP Transport", () => {
|
||||
initTestDatabase(db);
|
||||
seedTestData(db);
|
||||
|
||||
// 300 pad lines (37 chars each = 11100 chars) puts the marker past the
|
||||
// first chunk boundary at CHUNK_SIZE_CHARS = 3600.
|
||||
{
|
||||
const padLine = "Pad line for chunk boundary coverage\n";
|
||||
const absLineFixtureBody =
|
||||
padLine.repeat(300) +
|
||||
"UNIQUE_KEYWORD_XYZ marker\n" +
|
||||
padLine.repeat(20);
|
||||
const fixtureHash = "hash-abslines";
|
||||
const now = new Date().toISOString();
|
||||
db.prepare(`INSERT OR IGNORE INTO content (hash, doc, created_at) VALUES (?, ?, ?)`)
|
||||
.run(fixtureHash, absLineFixtureBody, now);
|
||||
db.prepare(`INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active) VALUES ('docs', ?, ?, ?, ?, ?, 1)`)
|
||||
.run("absolute-line-fixture.md", "Absolute Line Fixture", fixtureHash, now, now);
|
||||
}
|
||||
|
||||
// Sync config into SQLite
|
||||
const httpTestConfig: CollectionConfig = {
|
||||
collections: {
|
||||
@ -1074,4 +1090,29 @@ describe.skipIf(!!process.env.CI)("MCP HTTP Transport", () => {
|
||||
expect(json.result).toBeDefined();
|
||||
expect(json.result.content.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
test("POST /mcp tools/call query returns absolute source-file line numbers, not chunk-local", async () => {
|
||||
await mcpRequest({
|
||||
jsonrpc: "2.0", id: 1, method: "initialize",
|
||||
params: { protocolVersion: "2025-03-26", capabilities: {}, clientInfo: { name: "test", version: "1.0" } },
|
||||
});
|
||||
|
||||
const { status, json } = await mcpRequest({
|
||||
jsonrpc: "2.0", id: 5, method: "tools/call",
|
||||
params: {
|
||||
name: "query",
|
||||
arguments: {
|
||||
searches: [{ type: "lex", query: "UNIQUE_KEYWORD_XYZ" }],
|
||||
rerank: false,
|
||||
},
|
||||
},
|
||||
});
|
||||
expect(status).toBe(200);
|
||||
const results = json.result.structuredContent.results;
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
const hit = results.find((r: any) => r.file === "docs/absolute-line-fixture.md");
|
||||
expect(hit).toBeDefined();
|
||||
expect(hit.line).toBe(301);
|
||||
expect(hit.snippet).toMatch(/^\d+: @@ -3\d\d,/);
|
||||
});
|
||||
});
|
||||
|
||||
@ -1713,6 +1713,21 @@ describe("Document Retrieval", () => {
|
||||
expect(body).toBeNull();
|
||||
await cleanupTestDb(store);
|
||||
});
|
||||
|
||||
test("getDocumentBody clamps negative fromLine to top of document", async () => {
|
||||
const store = await createTestStore();
|
||||
const collectionName = await createTestCollection({ pwd: "/path" });
|
||||
await insertTestDocument(store.db, collectionName, {
|
||||
name: "mydoc",
|
||||
displayPath: "mydoc.md",
|
||||
body: "Line 1\nLine 2\nLine 3\nLine 4\nLine 5",
|
||||
});
|
||||
|
||||
const body = store.getDocumentBody({ filepath: "/path/mydoc.md" }, -19, 80);
|
||||
expect(body).toBe("Line 1\nLine 2\nLine 3\nLine 4\nLine 5");
|
||||
|
||||
await cleanupTestDb(store);
|
||||
});
|
||||
});
|
||||
|
||||
describe("findDocuments (multi-get)", () => {
|
||||
@ -2001,6 +2016,33 @@ describe("Snippet Extraction", () => {
|
||||
expect(line).toBe(51); // "Target keyword" is line 51
|
||||
expect(linesBefore).toBeGreaterThan(40); // Many lines before
|
||||
});
|
||||
|
||||
test("extractSnippet anchors on chunkPos when lexical scoring finds no match", () => {
|
||||
// The snippet tokenizer does not strip FTS5 syntax, so a quoted-phrase query
|
||||
// tokenises into terms with embedded quotes that never appear in body text.
|
||||
// bestScore stays at 0 even though the reranker correctly identified a chunk;
|
||||
// the fallback should anchor on chunkPos rather than defaulting to line 1.
|
||||
const padLine = "Lorem ipsum dolor sit amet\n";
|
||||
const padding = padLine.repeat(100);
|
||||
const body = padding + "chunk content here\nmore chunk content\n" + padding;
|
||||
const chunkPos = padding.length;
|
||||
|
||||
const { line } = extractSnippet(body, '"unrelated quoted phrase"', 200, chunkPos);
|
||||
|
||||
expect(line).toBeGreaterThan(50);
|
||||
expect(line).toBeLessThan(110);
|
||||
});
|
||||
|
||||
test("extractSnippet with chunkPos=0 falls back to full-body scan when chunk has no match", () => {
|
||||
// chunkPos=0 may be the chunk selector's bestIdx=0 default rather than a real
|
||||
// first-chunk hit, so the fallback must consider matches outside chunk 0.
|
||||
const padding = "Lorem ipsum dolor sit amet\n".repeat(200);
|
||||
const body = padding + "TARGET_KEYWORD line content\ntail line\n";
|
||||
|
||||
const { line } = extractSnippet(body, "TARGET_KEYWORD", 200, 0);
|
||||
|
||||
expect(line).toBe(201);
|
||||
});
|
||||
});
|
||||
|
||||
// =============================================================================
|
||||
@ -2239,6 +2281,26 @@ describe("Index Status", () => {
|
||||
await cleanupTestDb(store);
|
||||
});
|
||||
|
||||
test("embedding health is scoped to the active embed model", async () => {
|
||||
const store = await createTestStore();
|
||||
const collectionName = await createTestCollection();
|
||||
const activeModel = "hf:active/embed-model.gguf";
|
||||
const staleModel = "hf:stale/embed-model.gguf";
|
||||
const now = new Date().toISOString();
|
||||
|
||||
store.llm = { embedModelName: activeModel } as any;
|
||||
store.ensureVecTable(3);
|
||||
await insertTestDocument(store.db, collectionName, { name: "doc1", hash: "hash1" });
|
||||
store.insertEmbedding("hash1", 0, 0, new Float32Array([1, 2, 3]), staleModel, now, 1);
|
||||
|
||||
expect(store.getHashesNeedingEmbedding()).toBe(1);
|
||||
expect(store.getStatus().needsEmbedding).toBe(1);
|
||||
expect(store.getIndexHealth().needsEmbedding).toBe(1);
|
||||
expect(store.getHashesNeedingEmbedding(staleModel)).toBe(0);
|
||||
|
||||
await cleanupTestDb(store);
|
||||
});
|
||||
|
||||
test("getIndexHealth returns health info", async () => {
|
||||
const store = await createTestStore();
|
||||
const collectionName = await createTestCollection();
|
||||
@ -3051,6 +3113,68 @@ describe("Embedding batching", () => {
|
||||
}
|
||||
});
|
||||
|
||||
test("generateEmbeddings does not mark a partially embedded multi-chunk document complete", async () => {
|
||||
const store = await createTestStore();
|
||||
const db = store.db;
|
||||
const fakeLlm = {
|
||||
async embed(_text: string, _options?: { model?: string }) {
|
||||
return { embedding: [0.1, 0.2, 0.3], model: "fake-embed" };
|
||||
},
|
||||
async embedBatch(texts: string[], _options?: { model?: string }) {
|
||||
return texts.map((_text, index) => index === 0
|
||||
? { embedding: [1, 2, 3], model: "fake-embed" }
|
||||
: null
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
setDefaultLlamaCpp(createFakeTokenizer() as any);
|
||||
store.llm = fakeLlm as any;
|
||||
|
||||
try {
|
||||
await insertTestDocument(db, "docs", {
|
||||
name: "long-doc",
|
||||
body: "# Long doc\n\n" + "partial embedding regression ".repeat(260),
|
||||
});
|
||||
|
||||
const result = await generateEmbeddings(store);
|
||||
|
||||
expect(result.errors).toBeGreaterThan(0);
|
||||
expect(db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get()).toEqual({ count: 0 });
|
||||
expect(db.prepare(`SELECT COUNT(*) as count FROM vectors_vec`).get()).toEqual({ count: 0 });
|
||||
expect(store.getHashesNeedingEmbedding()).toBe(1);
|
||||
expect(store.getStatus().needsEmbedding).toBe(1);
|
||||
} finally {
|
||||
setDefaultLlamaCpp(null);
|
||||
await cleanupTestDb(store);
|
||||
}
|
||||
});
|
||||
|
||||
test("generateEmbeddings opens a long-lived LLM session for embed runs", async () => {
|
||||
const store = await createTestStore();
|
||||
const fakeLlm = createFakeEmbedLlm();
|
||||
const sessionSpy = vi.spyOn(llmModule, "withLLMSessionForLlm");
|
||||
|
||||
setDefaultLlamaCpp(createFakeTokenizer() as any);
|
||||
store.llm = fakeLlm as any;
|
||||
|
||||
try {
|
||||
await insertTestDocument(store.db, "docs", { name: "one", body: "# One\n\nAlpha" });
|
||||
|
||||
await generateEmbeddings(store);
|
||||
|
||||
expect(sessionSpy).toHaveBeenCalledWith(
|
||||
fakeLlm,
|
||||
expect.any(Function),
|
||||
expect.objectContaining({ maxDuration: 30 * 60 * 1000, name: "generateEmbeddings" }),
|
||||
);
|
||||
} finally {
|
||||
sessionSpy.mockRestore();
|
||||
setDefaultLlamaCpp(null);
|
||||
await cleanupTestDb(store);
|
||||
}
|
||||
});
|
||||
|
||||
test("vectorSearchQuery uses the active llm embed model for vector lookups", async () => {
|
||||
const store = await createTestStore();
|
||||
const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
|
||||
|
||||
Loading…
Reference in New Issue
Block a user