Improve qmd diagnostics and embed resilience
This commit is contained in:
parent
105c577b3b
commit
b5f156c313
@ -10,6 +10,13 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
- Launcher: prefer runnable TypeScript source in git checkouts even when ignored `dist/` artifacts exist, while packaged installs continue to run `dist/`.
|
||||
- GPU: keep node-llama-cpp's documented `gpu: "auto"` initialization as the primary path, then perform no-build packaged CUDA/Vulkan/Metal probes only if auto falls back to CPU.
|
||||
- CLI: move GPU/CPU runtime diagnostics out of `qmd status`; use `qmd doctor` for device probing and related environment guidance.
|
||||
- CLI: point unexpected command/setup failures toward `qmd doctor` so diagnostics are the default next step when QMD behaves incorrectly.
|
||||
- Doctor: explicitly warn when `content_vectors` contains multiple non-empty embedding fingerprint names, with the per-fingerprint document/chunk breakdown.
|
||||
- Embed: make the TTY progress line label byte-based input progress explicitly, show embedded chunks as a count, and shorten the displayed model name.
|
||||
- Embed: retain per-chunk failure details, retry failed chunks after later successful embeds and again when no other chunks remain, clear recovered errors, and cap retries to avoid endless loops.
|
||||
- Embedding: fingerprint vector metadata using the active embedding model and formatting/chunking parameters so stale vectors are treated as pending after search semantics change. Legacy `content_vectors` columns are migrated lazily on first vector-health/write use to preserve fast QMD startup.
|
||||
|
||||
- Skill: expand the packaged QMD skill with retrieval-first workflows, structured query examples, wiki/source collection guidance, and safe fallbacks when model-backed search is unavailable.
|
||||
|
||||
29
bin/qmd
29
bin/qmd
@ -26,27 +26,28 @@ if [ "$1" = "mcp" ]; then
|
||||
fi
|
||||
|
||||
JS="$DIR/dist/cli/qmd.js"
|
||||
TS="$DIR/src/cli/qmd.ts"
|
||||
|
||||
# In published packages dist/ is always present. In a fresh checkout, however,
|
||||
# people often run ./bin/qmd before building. Prefer a source-mode fallback when
|
||||
# dependencies are installed; otherwise fail with an actionable message instead
|
||||
# of a low-level "Module not found" from Node/Bun.
|
||||
if [ ! -f "$JS" ]; then
|
||||
TS="$DIR/src/cli/qmd.ts"
|
||||
if [ -f "$TS" ]; then
|
||||
if [ -f "$DIR/bun.lock" ] || [ -f "$DIR/bun.lockb" ]; then
|
||||
if command -v bun >/dev/null 2>&1; then
|
||||
exec bun "$TS" "$@"
|
||||
fi
|
||||
fi
|
||||
if [ -f "$DIR/node_modules/tsx/dist/cli.mjs" ]; then
|
||||
exec node "$DIR/node_modules/tsx/dist/cli.mjs" "$TS" "$@"
|
||||
# In published packages, bin/qmd must run dist/. In a git checkout, however,
|
||||
# dist/ is often ignored and can be stale after git reset or branch switches.
|
||||
# Prefer source mode only for checkouts so ./bin/qmd reflects the checked-out
|
||||
# source without changing packaged/runtime behavior.
|
||||
if [ -e "$DIR/.git" ] && [ -f "$TS" ]; then
|
||||
if [ -f "$DIR/bun.lock" ] || [ -f "$DIR/bun.lockb" ]; then
|
||||
if command -v bun >/dev/null 2>&1; then
|
||||
exec bun "$TS" "$@"
|
||||
fi
|
||||
fi
|
||||
if [ -f "$DIR/node_modules/tsx/dist/cli.mjs" ]; then
|
||||
exec node "$DIR/node_modules/tsx/dist/cli.mjs" "$TS" "$@"
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! -f "$JS" ]; then
|
||||
echo "qmd is not built: missing $JS" >&2
|
||||
echo "Run: bun install && bun run build" >&2
|
||||
echo "Or: npm install && npm run build" >&2
|
||||
echo "After building, run: qmd doctor" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
@ -260,16 +260,18 @@ async function main() {
|
||||
const r = await benchmarkConfig(model, llama, docs, p, true);
|
||||
results.push(r);
|
||||
process.stdout.write(` ${r.medianMs.toFixed(0)}ms (${r.docsPerSec.toFixed(1)} docs/s)\n`);
|
||||
} catch (e: any) {
|
||||
process.stdout.write(` failed: ${e.message}\n`);
|
||||
} catch (e: unknown) {
|
||||
const message = e instanceof Error ? e.message : String(e);
|
||||
process.stdout.write(` failed: ${message}\n`);
|
||||
// Try without flash
|
||||
process.stdout.write(` [${p} ctx, no flash] running...`);
|
||||
try {
|
||||
const r = await benchmarkConfig(model, llama, docs, p, false);
|
||||
results.push(r);
|
||||
process.stdout.write(` ${r.medianMs.toFixed(0)}ms (${r.docsPerSec.toFixed(1)} docs/s)\n`);
|
||||
} catch (e2: any) {
|
||||
process.stdout.write(` failed: ${e2.message}\n`);
|
||||
} catch (e2: unknown) {
|
||||
const message = e2 instanceof Error ? e2.message : String(e2);
|
||||
process.stdout.write(` failed: ${message}\n`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -176,7 +176,7 @@ async function runQuery(
|
||||
let resultFiles: string[];
|
||||
try {
|
||||
resultFiles = await backend.run(store, query, limit, collection);
|
||||
} catch (err: any) {
|
||||
} catch {
|
||||
// Backend may not be available (e.g., no embeddings for vector search)
|
||||
return {
|
||||
precision_at_k: 0,
|
||||
|
||||
181
src/cli/qmd.ts
181
src/cli/qmd.ts
@ -1,5 +1,5 @@
|
||||
import { isBun, openDatabase } from "../db.js";
|
||||
import type { Database } from "../db.js";
|
||||
import type { Database, SQLiteValue } from "../db.js";
|
||||
import fastGlob from "fast-glob";
|
||||
import { execSync, spawn as nodeSpawn } from "child_process";
|
||||
import { fileURLToPath } from "url";
|
||||
@ -623,40 +623,6 @@ async function showStatus(): Promise<void> {
|
||||
console.log(` Generation: ${hfLink(activeModels.generate)}`);
|
||||
}
|
||||
|
||||
// Device / GPU info
|
||||
// Important: probing node-llama-cpp can abort the whole process on machines with
|
||||
// incompatible GPU drivers (for example Vulkan loader present but no usable driver).
|
||||
// Keep the native probe opt-in, but always show how QMD is configured and how to probe.
|
||||
console.log(`\n${c.bold}Device${c.reset}`);
|
||||
const configuredGpuMode = configuredGpuModeLabel();
|
||||
console.log(` Mode: ${configuredGpuMode}`);
|
||||
if (process.env.QMD_STATUS_DEVICE_PROBE !== "1") {
|
||||
console.log(` Status: ${c.dim}not probed${c.reset} (set QMD_STATUS_DEVICE_PROBE=1 to test GPU/CPU backend)`);
|
||||
} else {
|
||||
console.log(` Status: probing native llama backend...`);
|
||||
try {
|
||||
const llm = getDefaultLlamaCpp();
|
||||
const device = await llm.getDeviceInfo({ allowBuild: false });
|
||||
if (device.gpu) {
|
||||
console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
|
||||
if (device.gpuDevices.length > 0) {
|
||||
console.log(` Devices: ${summarizeDeviceNames(device.gpuDevices)}`);
|
||||
}
|
||||
if (device.vram) {
|
||||
console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
|
||||
}
|
||||
} else {
|
||||
console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`);
|
||||
console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
|
||||
}
|
||||
console.log(` CPU: ${device.cpuCores} math cores`);
|
||||
} catch (error) {
|
||||
console.log(` Status: ${c.dim}probe failed${c.reset}`);
|
||||
if (error instanceof Error && error.message) {
|
||||
console.log(` ${c.dim}${sanitizeDiagnosticMessage(error.message)}${c.reset}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Tips section
|
||||
const tips: string[] = [];
|
||||
@ -1514,7 +1480,7 @@ function listFiles(pathArg?: string): void {
|
||||
|
||||
// List files in the collection with size and modification time
|
||||
let query: string;
|
||||
let params: any[];
|
||||
let params: SQLiteValue[];
|
||||
|
||||
if (pathPrefix) {
|
||||
// List files under a specific path
|
||||
@ -1764,7 +1730,7 @@ async function indexFiles(pwd?: string, globPattern: string = DEFAULT_GLOB, coll
|
||||
let content: string;
|
||||
try {
|
||||
content = readFileSync(filepath, "utf-8");
|
||||
} catch (err: any) {
|
||||
} catch {
|
||||
// Skip files that can't be read (e.g. iCloud evicted files returning EAGAIN)
|
||||
processed++;
|
||||
progress.set((processed / total) * 100);
|
||||
@ -1929,7 +1895,7 @@ async function vectorIndex(
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(`${c.dim}Model: ${model}${c.reset}\n`);
|
||||
console.log(`${c.dim}Model: ${shortModelName(model)}${c.reset}\n`);
|
||||
if (batchOptions?.maxDocsPerBatch !== undefined || batchOptions?.maxBatchBytes !== undefined) {
|
||||
const maxDocsPerBatch = batchOptions.maxDocsPerBatch ?? DEFAULT_EMBED_MAX_DOCS_PER_BATCH;
|
||||
const maxBatchBytes = batchOptions.maxBatchBytes ?? DEFAULT_EMBED_MAX_BATCH_BYTES;
|
||||
@ -1949,21 +1915,28 @@ async function vectorIndex(
|
||||
chunkStrategy: batchOptions?.chunkStrategy,
|
||||
onProgress: (info) => {
|
||||
if (info.totalBytes === 0) return;
|
||||
const percent = (info.bytesProcessed / info.totalBytes) * 100;
|
||||
// Progress is measured by input bytes, not by chunks. The final chunk
|
||||
// count is discovered lazily batch-by-batch, so displaying
|
||||
// chunksEmbedded/totalChunks makes the percent look wrong when a few
|
||||
// large documents remain. Show chunks as a count and label the byte
|
||||
// percentage explicitly as input progress.
|
||||
const percent = Math.min(100, (info.bytesProcessed / info.totalBytes) * 100);
|
||||
progress.set(percent);
|
||||
|
||||
const elapsed = (Date.now() - startTime) / 1000;
|
||||
const bytesPerSec = info.bytesProcessed / elapsed;
|
||||
const remainingBytes = info.totalBytes - info.bytesProcessed;
|
||||
const etaSec = remainingBytes / bytesPerSec;
|
||||
const bytesPerSec = elapsed > 0 ? info.bytesProcessed / elapsed : 0;
|
||||
const remainingBytes = Math.max(0, info.totalBytes - info.bytesProcessed);
|
||||
const etaSec = bytesPerSec > 0 ? remainingBytes / bytesPerSec : Number.POSITIVE_INFINITY;
|
||||
|
||||
const bar = renderProgressBar(percent);
|
||||
const percentStr = percent.toFixed(0).padStart(3);
|
||||
const throughput = `${formatBytes(bytesPerSec)}/s`;
|
||||
const eta = elapsed > 2 ? formatETA(etaSec) : "...";
|
||||
const errStr = info.errors > 0 ? ` ${c.yellow}${info.errors} err${c.reset}` : "";
|
||||
const throughput = bytesPerSec > 0 ? `${formatBytes(bytesPerSec)}/s` : ".../s";
|
||||
const eta = elapsed > 2 && Number.isFinite(etaSec) ? formatETA(etaSec) : "...";
|
||||
const inputStr = `${formatBytes(info.bytesProcessed)}/${formatBytes(info.totalBytes)} input`;
|
||||
const chunkStr = `${formatCount(info.chunksEmbedded)} chunks`;
|
||||
const errStr = info.errors > 0 ? ` ${c.yellow}${formatCount(info.errors)} err${c.reset}` : "";
|
||||
|
||||
if (isTTY) process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}%${c.reset} ${c.dim}${info.chunksEmbedded}/${info.totalChunks}${c.reset}${errStr} ${c.dim}${throughput} ETA ${eta}${c.reset} `);
|
||||
if (isTTY) process.stderr.write(`\r${c.cyan}${bar}${c.reset} ${c.bold}${percentStr}% input${c.reset} ${c.dim}${chunkStr}${errStr} · ${inputStr} · ${throughput} · ETA ${eta}${c.reset} `);
|
||||
},
|
||||
});
|
||||
|
||||
@ -1978,7 +1951,13 @@ async function vectorIndex(
|
||||
console.log(`\r${c.green}${renderProgressBar(100)}${c.reset} ${c.bold}100%${c.reset} `);
|
||||
console.log(`\n${c.green}✓ Done!${c.reset} Embedded ${c.bold}${result.chunksEmbedded}${c.reset} chunks from ${c.bold}${result.docsProcessed}${c.reset} documents in ${c.bold}${formatETA(totalTimeSec)}${c.reset}`);
|
||||
if (result.errors > 0) {
|
||||
console.log(`${c.yellow}⚠ ${result.errors} chunks failed${c.reset}`);
|
||||
console.log(`${c.yellow}⚠ ${formatCount(result.errors)} chunks still failed after retries${c.reset}`);
|
||||
for (const failure of (result.failures ?? []).slice(0, 8)) {
|
||||
console.log(` ${c.dim}${failure.path}#${failure.seq} (${failure.attempts} attempts): ${failure.reason}${c.reset}`);
|
||||
}
|
||||
if ((result.failures?.length ?? 0) > 8) {
|
||||
console.log(` ${c.dim}...and ${formatCount((result.failures?.length ?? 0) - 8)} more${c.reset}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -3457,7 +3436,6 @@ function collectEnvironmentOverrides(activeModels: { embed: string; generate: st
|
||||
add("QMD_FORCE_CPU", "forces llama.cpp to bypass GPU backends; embeddings/query will be slower but GPU crashes are avoided");
|
||||
add("QMD_LLAMA_GPU", "selects llama.cpp GPU backend (metal/cuda/vulkan) or disables GPU when set to false/off/0");
|
||||
add("QMD_DOCTOR_DEVICE_PROBE", "controls qmd doctor native device probing; 0/off skips GPU probing");
|
||||
add("QMD_STATUS_DEVICE_PROBE", "controls qmd status native device probing only; qmd doctor probes independently");
|
||||
add("QMD_EMBED_PARALLELISM", "overrides embedding parallel context count; too high can exhaust RAM/VRAM");
|
||||
add("QMD_EXPAND_CONTEXT_SIZE", "overrides query expansion context size; larger values use more memory");
|
||||
add("QMD_RERANK_CONTEXT_SIZE", "overrides reranker context size; larger values use more memory");
|
||||
@ -3655,6 +3633,60 @@ async function checkEmbeddingVectorSamples(db: Database, model: string, fingerpr
|
||||
};
|
||||
}
|
||||
|
||||
function hasLibraryInDirs(libraryBaseName: string, dirs: string[]): boolean {
|
||||
for (const dir of dirs) {
|
||||
if (!dir || !existsSync(dir)) continue;
|
||||
try {
|
||||
for (const entry of readdirSync(dir)) {
|
||||
if (entry === libraryBaseName || entry.startsWith(`${libraryBaseName}.`)) return true;
|
||||
}
|
||||
} catch { /* ignore unreadable system library dirs */ }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function linuxCudaRuntimeDiagnostic(): string | null {
|
||||
if (process.platform !== "linux") return null;
|
||||
|
||||
const dirs = new Set<string>();
|
||||
for (const value of [process.env.LD_LIBRARY_PATH, process.env.CUDA_PATH]) {
|
||||
for (const part of (value ?? "").split(":")) {
|
||||
if (part) dirs.add(part);
|
||||
}
|
||||
}
|
||||
if (process.env.CUDA_PATH) {
|
||||
dirs.add(pathJoin(process.env.CUDA_PATH, "lib64"));
|
||||
dirs.add(pathJoin(process.env.CUDA_PATH, "targets", "x86_64-linux", "lib"));
|
||||
}
|
||||
for (const dir of ["/usr/lib", "/usr/lib64", "/usr/lib/x86_64-linux-gnu", "/usr/local/cuda/lib64", "/usr/local/cuda/targets/x86_64-linux/lib"]) {
|
||||
dirs.add(dir);
|
||||
}
|
||||
try {
|
||||
for (const entry of readdirSync("/usr/local")) {
|
||||
if (!entry.toLowerCase().startsWith("cuda-")) continue;
|
||||
const cudaRoot = pathJoin("/usr/local", entry);
|
||||
dirs.add(pathJoin(cudaRoot, "lib64"));
|
||||
dirs.add(pathJoin(cudaRoot, "targets", "x86_64-linux", "lib"));
|
||||
}
|
||||
} catch { /* /usr/local may not be readable in restricted environments */ }
|
||||
|
||||
const searchDirs = [...dirs];
|
||||
const hasDriver = hasLibraryInDirs("libcuda.so", searchDirs) || hasLibraryInDirs("libnvidia-ml.so", searchDirs);
|
||||
if (!hasDriver) return null;
|
||||
|
||||
const cudaLibraries: [library: string, label: string][] = [
|
||||
["libcudart.so", "CUDA runtime"],
|
||||
["libcublas.so", "cuBLAS"],
|
||||
["libcublasLt.so", "cuBLASLt"],
|
||||
];
|
||||
const missing = cudaLibraries
|
||||
.filter(([library]) => !hasLibraryInDirs(library, searchDirs))
|
||||
.map(([, label]) => label);
|
||||
|
||||
if (missing.length === 0) return null;
|
||||
return `NVIDIA driver libraries are visible, but CUDA user-space libraries are missing from loader paths (${missing.join(", ")})`;
|
||||
}
|
||||
|
||||
async function runDoctorDeviceChecks(nextSteps: string[]): Promise<void> {
|
||||
const mode = configuredGpuModeLabel();
|
||||
doctorCheck("device mode", true, mode);
|
||||
@ -3691,8 +3723,14 @@ async function runDoctorDeviceChecks(nextSteps: string[]): Promise<void> {
|
||||
nextSteps.push("GPU was detected but offloading is disabled; check `QMD_LLAMA_GPU=metal|cuda|vulkan` and rerun `qmd doctor`.");
|
||||
}
|
||||
} else {
|
||||
doctorCheck("device probe", false, `running on CPU (${device.cpuCores} math cores). Next: install/configure Metal, CUDA, or Vulkan for faster embeddings, or set QMD_FORCE_CPU=1 to make CPU mode explicit`);
|
||||
nextSteps.push("Vector operations are running on CPU; install/configure Metal, CUDA, or Vulkan if embedding/query performance is too slow.");
|
||||
const cudaDiagnostic = linuxCudaRuntimeDiagnostic();
|
||||
const diagnosticSuffix = cudaDiagnostic ? ` ${cudaDiagnostic}.` : "";
|
||||
doctorCheck("device probe", false, `running on CPU (${device.cpuCores} math cores).${diagnosticSuffix} Next: install/configure Metal, CUDA, or Vulkan for faster embeddings, or set QMD_FORCE_CPU=1 to make CPU mode explicit`);
|
||||
if (cudaDiagnostic) {
|
||||
nextSteps.push(`${cudaDiagnostic}; install CUDA runtime/cuBLAS libraries or add their directory to LD_LIBRARY_PATH, then rerun \`qmd doctor\`.`);
|
||||
} else {
|
||||
nextSteps.push("Vector operations are running on CPU; install/configure Metal, CUDA, or Vulkan if embedding/query performance is too slow.");
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (process.stdout.isTTY) {
|
||||
@ -3779,6 +3817,15 @@ async function showDoctor(): Promise<void> {
|
||||
const label = row.fingerprint === fingerprint ? "current" : (row.fingerprint || "legacy");
|
||||
return `${shortModelName(row.model)}:${label} ${formatCount(row.docs)} docs/${formatCount(row.chunks)} chunks`;
|
||||
}).join("; ");
|
||||
const namedFingerprintRows = rows.filter(row => row.fingerprint);
|
||||
const namedFingerprints = [...new Set(namedFingerprintRows.map(row => row.fingerprint))];
|
||||
if (namedFingerprints.length > 1) {
|
||||
const namedGroups = namedFingerprintRows
|
||||
.map(row => `${row.fingerprint}${row.fingerprint === fingerprint ? " (current)" : ""}: ${shortModelName(row.model)} ${formatCount(row.docs)} docs/${formatCount(row.chunks)} chunks`)
|
||||
.join("; ");
|
||||
doctorCheck("mixed named embedding fingerprints", false, `content_vectors contains ${namedFingerprints.length} named fingerprints: ${namedGroups}. Next: \`qmd embed\` or \`qmd embed --force\``);
|
||||
nextSteps.push("Run `qmd embed` to converge mixed named embedding fingerprints; use `qmd embed --force` if old named fingerprints or vector sample mismatches remain.");
|
||||
}
|
||||
const details = rows.length === 0
|
||||
? `no vectors yet; current fingerprint ${fingerprint}`
|
||||
: ok
|
||||
@ -3815,7 +3862,23 @@ async function showDoctor(): Promise<void> {
|
||||
closeDb();
|
||||
}
|
||||
|
||||
function readPackageJson(): any {
|
||||
function printDoctorHint(): void {
|
||||
console.error("If qmd still behaves unexpectedly, run 'qmd doctor' for diagnostics.");
|
||||
}
|
||||
|
||||
function exitWithError(error: unknown, code = 1): never {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
printDoctorHint();
|
||||
process.exit(code);
|
||||
}
|
||||
|
||||
type PackageJson = {
|
||||
version: string;
|
||||
dependencies?: Record<string, string>;
|
||||
devDependencies?: Record<string, string>;
|
||||
};
|
||||
|
||||
function readPackageJson(): PackageJson {
|
||||
const scriptDir = dirname(fileURLToPath(import.meta.url));
|
||||
const pkgPath = resolve(scriptDir, "..", "..", "package.json");
|
||||
return JSON.parse(readFileSync(pkgPath, "utf-8"));
|
||||
@ -4122,6 +4185,7 @@ if (isMain) {
|
||||
default:
|
||||
console.error(`Unknown subcommand: ${subcommand}`);
|
||||
console.error("Run 'qmd collection help' for usage");
|
||||
printDoctorHint();
|
||||
process.exit(1);
|
||||
}
|
||||
break;
|
||||
@ -4131,8 +4195,7 @@ if (isMain) {
|
||||
try {
|
||||
initLocalIndex();
|
||||
} catch (error) {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
process.exit(1);
|
||||
exitWithError(error);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -4166,8 +4229,7 @@ if (isMain) {
|
||||
collection: embedCollection,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
process.exit(1);
|
||||
exitWithError(error);
|
||||
}
|
||||
break;
|
||||
|
||||
@ -4314,8 +4376,8 @@ if (isMain) {
|
||||
const { startMcpHttpServer } = await import("../mcp/server.js");
|
||||
try {
|
||||
await startMcpHttpServer(port, { dbPath: getDbPath() });
|
||||
} catch (e: any) {
|
||||
if (e?.code === "EADDRINUSE") {
|
||||
} catch (e: unknown) {
|
||||
if (typeof e === "object" && e !== null && "code" in e && e.code === "EADDRINUSE") {
|
||||
console.error(`Port ${port} already in use. Try a different port with --port.`);
|
||||
process.exit(1);
|
||||
}
|
||||
@ -4359,8 +4421,7 @@ if (isMain) {
|
||||
try {
|
||||
await installSkill(Boolean(cli.values.global), Boolean(cli.values.force), Boolean(cli.values.yes));
|
||||
} catch (error) {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
process.exit(1);
|
||||
exitWithError(error);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -4383,6 +4444,7 @@ if (isMain) {
|
||||
default:
|
||||
console.error(`Unknown subcommand: ${subcommand}`);
|
||||
console.error("Run 'qmd skill help' for usage");
|
||||
printDoctorHint();
|
||||
process.exit(1);
|
||||
}
|
||||
break;
|
||||
@ -4420,6 +4482,7 @@ if (isMain) {
|
||||
default:
|
||||
console.error(`Unknown command: ${cli.command}`);
|
||||
console.error("Run 'qmd --help' for usage.");
|
||||
printDoctorHint();
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
|
||||
26
src/db.ts
26
src/db.ts
@ -11,10 +11,16 @@
|
||||
* SQLite build before creating any database instances.
|
||||
*/
|
||||
|
||||
export const isBun = typeof globalThis.Bun !== "undefined";
|
||||
export const isBun = "Bun" in globalThis;
|
||||
|
||||
let _Database: any;
|
||||
let _sqliteVecLoad: ((db: any) => void) | null;
|
||||
export type SQLiteValue = string | number | bigint | Buffer | Uint8Array | Float32Array | null;
|
||||
export type SQLiteParams = readonly SQLiteValue[];
|
||||
|
||||
type DatabaseConstructor = new (path: string) => Database;
|
||||
type LoadableSqliteDatabase = Pick<Database, "loadExtension">;
|
||||
|
||||
let _Database: DatabaseConstructor;
|
||||
let _sqliteVecLoad: ((db: LoadableSqliteDatabase) => void) | null;
|
||||
|
||||
if (isBun) {
|
||||
// Dynamic string prevents tsc from resolving bun:sqlite on Node.js builds
|
||||
@ -44,15 +50,15 @@ if (isBun) {
|
||||
const testDb = new BunDatabase(":memory:");
|
||||
testDb.loadExtension(vecPath);
|
||||
testDb.close();
|
||||
_sqliteVecLoad = (db: any) => db.loadExtension(vecPath);
|
||||
_sqliteVecLoad = (db: LoadableSqliteDatabase) => db.loadExtension(vecPath);
|
||||
} catch {
|
||||
// Vector search won't work, but BM25 and other operations are unaffected.
|
||||
_sqliteVecLoad = null;
|
||||
}
|
||||
} else {
|
||||
_Database = (await import("better-sqlite3")).default;
|
||||
_Database = (await import("better-sqlite3")).default as unknown as DatabaseConstructor;
|
||||
const sqliteVec = await import("sqlite-vec");
|
||||
_sqliteVecLoad = (db: any) => sqliteVec.load(db);
|
||||
_sqliteVecLoad = (db: LoadableSqliteDatabase) => sqliteVec.load(db as Parameters<typeof sqliteVec.load>[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -69,14 +75,14 @@ export interface Database {
|
||||
exec(sql: string): void;
|
||||
prepare(sql: string): Statement;
|
||||
loadExtension(path: string): void;
|
||||
transaction<T extends (...args: any[]) => any>(fn: T): T;
|
||||
transaction<T extends (...args: SQLiteValue[]) => unknown>(fn: T): T;
|
||||
close(): void;
|
||||
}
|
||||
|
||||
export interface Statement {
|
||||
run(...params: any[]): { changes: number; lastInsertRowid: number | bigint };
|
||||
get(...params: any[]): any;
|
||||
all(...params: any[]): any[];
|
||||
run(...params: SQLiteValue[]): { changes: number; lastInsertRowid: number | bigint };
|
||||
get<T = unknown>(...params: SQLiteValue[]): T | undefined;
|
||||
all<T = unknown>(...params: SQLiteValue[]): T[];
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
55
src/llm.ts
55
src/llm.ts
@ -11,8 +11,12 @@ import type {
|
||||
Token as LlamaToken,
|
||||
} from "node-llama-cpp";
|
||||
|
||||
type StdoutChunk = string | Uint8Array;
|
||||
type WriteCallback = (err?: Error | null) => void;
|
||||
|
||||
type NodeLlamaCppModule = {
|
||||
getLlama: (options: Record<string, unknown>) => Promise<Llama>;
|
||||
getLlamaGpuTypes?: (include?: "supported" | "allValid") => Promise<LlamaGpuMode[]>;
|
||||
resolveModelFile: (model: string, cacheDir: string) => Promise<string>;
|
||||
LlamaChatSession: new (options: { contextSequence: unknown }) => {
|
||||
prompt: (prompt: string, options?: Record<string, unknown>) => Promise<string>;
|
||||
@ -47,8 +51,11 @@ let originalStdoutWrite: StdoutWrite | null = null;
|
||||
export async function withNativeStdoutRedirectedToStderr<T>(fn: () => Promise<T>): Promise<T> {
|
||||
if (nativeStdoutRedirectDepth === 0) {
|
||||
originalStdoutWrite = process.stdout.write.bind(process.stdout) as StdoutWrite;
|
||||
process.stdout.write = ((chunk: any, encoding?: any, cb?: any) => {
|
||||
return process.stderr.write(chunk, encoding, cb as any);
|
||||
process.stdout.write = ((chunk: StdoutChunk, encodingOrCallback?: BufferEncoding | WriteCallback, callback?: WriteCallback) => {
|
||||
if (typeof encodingOrCallback === "function") {
|
||||
return process.stderr.write(chunk, encodingOrCallback);
|
||||
}
|
||||
return process.stderr.write(chunk, encodingOrCallback, callback);
|
||||
}) as StdoutWrite;
|
||||
}
|
||||
nativeStdoutRedirectDepth++;
|
||||
@ -839,14 +846,15 @@ export class LlamaCpp implements LLM {
|
||||
if (!this.llama) {
|
||||
const gpuMode = resolveLlamaGpuMode();
|
||||
|
||||
const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
|
||||
const loadLlama = async (gpu: LlamaGpuMode, sourceBuildAllowed = allowBuild) =>
|
||||
const { getLlama, getLlamaGpuTypes, LlamaLogLevel } = await loadNodeLlamaCpp();
|
||||
const loadLlama = async (gpu: LlamaGpuMode, sourceBuildAllowed = allowBuild, buildOverride?: "auto" | "never") =>
|
||||
await withNativeStdoutRedirectedToStderr(() => getLlama({
|
||||
// Prefer packaged prebuilt bindings before compiling llama.cpp locally.
|
||||
// "autoAttempt" can try to compile a missing requested backend before
|
||||
// falling back to another prebuilt backend; "auto" uses prebuilt/local
|
||||
// binaries first and only builds when none are usable.
|
||||
build: sourceBuildAllowed ? "auto" : "never",
|
||||
// node-llama-cpp documents gpu:"auto" as the best default: Metal on
|
||||
// Apple Silicon, CUDA when fully available, Vulkan where available,
|
||||
// then CPU. Use build:"auto" for normal loads and build:"never" for
|
||||
// diagnostic/probe paths that must not compile llama.cpp.
|
||||
build: buildOverride ?? (sourceBuildAllowed ? "auto" : "never"),
|
||||
logLevel: LlamaLogLevel.error,
|
||||
gpu,
|
||||
progressLogs: false,
|
||||
@ -881,6 +889,30 @@ export class LlamaCpp implements LLM {
|
||||
} else {
|
||||
try {
|
||||
llama = await loadLlama(gpuMode);
|
||||
|
||||
// If node-llama-cpp auto-detection chose CPU, do one no-build pass
|
||||
// over all OS-valid packaged GPU backends. This preserves the
|
||||
// documented auto mode for Metal/CUDA/Vulkan while recovering on
|
||||
// systems where a packaged backend can load but detection is too
|
||||
// conservative. Never compile during these extra probes.
|
||||
if (gpuMode === "auto" && llama.gpu === false && getLlamaGpuTypes) {
|
||||
const candidates = (await getLlamaGpuTypes("allValid"))
|
||||
.filter((candidate): candidate is Exclude<LlamaGpuMode, "auto" | false> => candidate !== false && candidate !== "auto");
|
||||
for (const candidate of candidates) {
|
||||
if (failedGpuInitModes.has(candidate)) continue;
|
||||
try {
|
||||
const gpuLlama = await loadLlama(candidate, false, "never");
|
||||
if (gpuLlama.gpu !== false) {
|
||||
await disposeWithTimeout("CPU llama runtime", () => llama.dispose());
|
||||
llama = gpuLlama;
|
||||
break;
|
||||
}
|
||||
await disposeWithTimeout(`${candidate} probe runtime`, () => gpuLlama.dispose());
|
||||
} catch {
|
||||
failedGpuInitModes.add(candidate);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
// GPU backend (e.g. Vulkan/CUDA on headless/driverless machines) can throw at init.
|
||||
// Fall back to CPU so qmd still works, and cache the failure to avoid repeated
|
||||
@ -896,7 +928,7 @@ export class LlamaCpp implements LLM {
|
||||
if (llama.gpu === false && !noGpuAccelerationWarningShown) {
|
||||
noGpuAccelerationWarningShown = true;
|
||||
process.stderr.write(
|
||||
"QMD Warning: no GPU acceleration, running on CPU (slow). Run 'QMD_STATUS_DEVICE_PROBE=1 qmd status' for device details.\n"
|
||||
"QMD Warning: no GPU acceleration, running on CPU (slow). Run 'qmd doctor' for device diagnostics.\n"
|
||||
);
|
||||
}
|
||||
this.llama = llama;
|
||||
@ -1143,9 +1175,8 @@ export class LlamaCpp implements LLM {
|
||||
try {
|
||||
this.rerankContexts.push(await model.createRankingContext({
|
||||
contextSize: LlamaCpp.RERANK_CONTEXT_SIZE,
|
||||
flashAttention: true,
|
||||
...(threads > 0 ? { threads } : {}),
|
||||
} as any));
|
||||
}));
|
||||
} catch {
|
||||
if (this.rerankContexts.length === 0) {
|
||||
// Flash attention might not be supported — retry without it
|
||||
@ -1359,7 +1390,7 @@ export class LlamaCpp implements LLM {
|
||||
temperature,
|
||||
topK: 20,
|
||||
topP: 0.8,
|
||||
onTextChunk: (text) => {
|
||||
onTextChunk: (text: string) => {
|
||||
result += text;
|
||||
},
|
||||
});
|
||||
|
||||
@ -626,9 +626,21 @@ export async function startMcpHttpServer(
|
||||
return new Date().toISOString().slice(11, 23); // HH:mm:ss.SSS
|
||||
}
|
||||
|
||||
type JsonRpcLikeBody = {
|
||||
method?: unknown;
|
||||
params?: {
|
||||
name?: unknown;
|
||||
arguments?: Record<string, unknown>;
|
||||
};
|
||||
};
|
||||
type RestSearchInput = {
|
||||
type?: unknown;
|
||||
query?: unknown;
|
||||
};
|
||||
|
||||
/** Extract a human-readable label from a JSON-RPC body */
|
||||
function describeRequest(body: any): string {
|
||||
const method = body?.method ?? "unknown";
|
||||
function describeRequest(body: JsonRpcLikeBody): string {
|
||||
const method = typeof body.method === "string" ? body.method : "unknown";
|
||||
if (method === "tools/call") {
|
||||
const tool = body.params?.name ?? "?";
|
||||
const args = body.params?.arguments;
|
||||
@ -672,7 +684,7 @@ export async function startMcpHttpServer(
|
||||
// REST endpoint: POST /query (alias: /search) — structured search without MCP protocol
|
||||
if ((pathname === "/query" || pathname === "/search") && nodeReq.method === "POST") {
|
||||
const rawBody = await collectBody(nodeReq);
|
||||
const params = JSON.parse(rawBody);
|
||||
const params = JSON.parse(rawBody) as Record<string, unknown>;
|
||||
|
||||
// Validate required fields
|
||||
if (!params.searches || !Array.isArray(params.searches)) {
|
||||
@ -682,31 +694,32 @@ export async function startMcpHttpServer(
|
||||
}
|
||||
|
||||
// Map to internal format
|
||||
const queries: ExpandedQuery[] = params.searches.map((s: any) => ({
|
||||
const searches = params.searches as RestSearchInput[];
|
||||
const queries: ExpandedQuery[] = searches.map((s) => ({
|
||||
type: s.type as 'lex' | 'vec' | 'hyde',
|
||||
query: String(s.query || ""),
|
||||
}));
|
||||
|
||||
// Use default collections if none specified
|
||||
const effectiveCollections = params.collections ?? defaultCollectionNames;
|
||||
const effectiveCollections = Array.isArray(params.collections) ? params.collections.map(String) : defaultCollectionNames;
|
||||
|
||||
const results = await store.search({
|
||||
queries,
|
||||
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
|
||||
limit: params.limit ?? 10,
|
||||
minScore: params.minScore ?? 0,
|
||||
candidateLimit: params.candidateLimit,
|
||||
intent: params.intent,
|
||||
rerank: params.rerank,
|
||||
limit: typeof params.limit === "number" ? params.limit : 10,
|
||||
minScore: typeof params.minScore === "number" ? params.minScore : 0,
|
||||
candidateLimit: typeof params.candidateLimit === "number" ? params.candidateLimit : undefined,
|
||||
intent: typeof params.intent === "string" ? params.intent : undefined,
|
||||
rerank: typeof params.rerank === "boolean" ? params.rerank : undefined,
|
||||
});
|
||||
|
||||
// Use first lex or vec query for snippet extraction
|
||||
const primaryQuery = params.searches.find((s: any) => s.type === 'lex')?.query
|
||||
|| params.searches.find((s: any) => s.type === 'vec')?.query
|
||||
|| params.searches[0]?.query || "";
|
||||
const primaryQuery = searches.find((s) => s.type === 'lex')?.query
|
||||
|| searches.find((s) => s.type === 'vec')?.query
|
||||
|| searches[0]?.query || "";
|
||||
|
||||
const formatted = results.map(r => {
|
||||
const { line, snippet } = extractSnippet(r.body, primaryQuery, 300, r.bestChunkPos, r.bestChunk.length, params.intent);
|
||||
const { line, snippet } = extractSnippet(r.body, String(primaryQuery), 300, r.bestChunkPos, r.bestChunk.length, typeof params.intent === "string" ? params.intent : undefined);
|
||||
return {
|
||||
docid: `#${r.docid}`,
|
||||
file: r.displayPath,
|
||||
|
||||
161
src/store.ts
161
src/store.ts
@ -1371,18 +1371,30 @@ export async function reindexCollection(
|
||||
return { indexed, updated, unchanged, removed, orphanedCleaned };
|
||||
}
|
||||
|
||||
export type EmbedFailure = {
|
||||
path: string;
|
||||
hash: string;
|
||||
seq: number;
|
||||
attempts: number;
|
||||
reason: string;
|
||||
};
|
||||
|
||||
export type EmbedProgress = {
|
||||
chunksEmbedded: number;
|
||||
totalChunks: number;
|
||||
bytesProcessed: number;
|
||||
totalBytes: number;
|
||||
/** Active failed chunks still awaiting a successful retry. */
|
||||
errors: number;
|
||||
failures?: EmbedFailure[];
|
||||
};
|
||||
|
||||
export type EmbedResult = {
|
||||
docsProcessed: number;
|
||||
chunksEmbedded: number;
|
||||
/** Active failed chunks that did not recover after retries. */
|
||||
errors: number;
|
||||
failures?: EmbedFailure[];
|
||||
durationMs: number;
|
||||
};
|
||||
|
||||
@ -1412,12 +1424,14 @@ type EmbeddingDoc = PendingEmbeddingDoc & {
|
||||
|
||||
type ChunkItem = {
|
||||
hash: string;
|
||||
path: string;
|
||||
title: string;
|
||||
text: string;
|
||||
seq: number;
|
||||
pos: number;
|
||||
tokens: number;
|
||||
bytes: number;
|
||||
expectedTotalChunks: number;
|
||||
};
|
||||
|
||||
function validatePositiveIntegerOption(name: string, value: number | undefined, fallback: number): number {
|
||||
@ -1591,11 +1605,81 @@ export async function generateEmbeddings(
|
||||
// Create a session manager for this llm instance
|
||||
const result = await withLLMSessionForLlm(llm, async (session) => {
|
||||
let chunksEmbedded = 0;
|
||||
let errors = 0;
|
||||
let bytesProcessed = 0;
|
||||
let totalChunks = 0;
|
||||
let vectorTableInitialized = false;
|
||||
const BATCH_SIZE = 32;
|
||||
const RETRY_AFTER_SUCCESSFUL_CHUNKS = 64;
|
||||
const MAX_RETRY_ATTEMPTS = 3;
|
||||
const failures = new Map<string, EmbedFailure>();
|
||||
const retryQueue = new Map<string, ChunkItem>();
|
||||
let successesSinceRetry = 0;
|
||||
|
||||
const failureList = () => [...failures.values()];
|
||||
const activeErrorCount = () => failures.size;
|
||||
const chunkKey = (chunk: ChunkItem) => `${chunk.hash}:${chunk.seq}`;
|
||||
const reasonFromError = (error: unknown) => {
|
||||
const raw = error instanceof Error ? error.message : String(error);
|
||||
return raw.length > 180 ? `${raw.slice(0, 177)}...` : raw;
|
||||
};
|
||||
const recordFailure = (chunk: ChunkItem, reason: string) => {
|
||||
const key = chunkKey(chunk);
|
||||
const previous = failures.get(key);
|
||||
failures.set(key, {
|
||||
path: chunk.path,
|
||||
hash: chunk.hash,
|
||||
seq: chunk.seq,
|
||||
attempts: (previous?.attempts ?? 0) + 1,
|
||||
reason,
|
||||
});
|
||||
retryQueue.set(key, chunk);
|
||||
};
|
||||
const clearFailure = (chunk: ChunkItem) => {
|
||||
const key = chunkKey(chunk);
|
||||
failures.delete(key);
|
||||
retryQueue.delete(key);
|
||||
};
|
||||
const tryEmbedChunk = async (chunk: ChunkItem): Promise<boolean> => {
|
||||
try {
|
||||
const text = formatDocForEmbedding(chunk.text, chunk.title, embedModelUri);
|
||||
const result = await session.embed(text, { model });
|
||||
if (!result) {
|
||||
recordFailure(chunk, "embedding returned no vector");
|
||||
return false;
|
||||
}
|
||||
insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now, chunk.expectedTotalChunks, fingerprint);
|
||||
chunksEmbedded++;
|
||||
successesSinceRetry++;
|
||||
clearFailure(chunk);
|
||||
return true;
|
||||
} catch (error) {
|
||||
recordFailure(chunk, reasonFromError(error));
|
||||
return false;
|
||||
}
|
||||
};
|
||||
const retryFailedChunks = async (force = false) => {
|
||||
if (!session.isValid || retryQueue.size === 0) return;
|
||||
if (!force && successesSinceRetry < RETRY_AFTER_SUCCESSFUL_CHUNKS) return;
|
||||
successesSinceRetry = 0;
|
||||
|
||||
// Normal mode: one retry pass after enough unrelated chunks succeeded.
|
||||
// Force mode: we have run out of other chunks for this batch, so keep
|
||||
// retrying outstanding failures until they recover or hit the cap. The
|
||||
// cap prevents endless loops on permanently bad chunks.
|
||||
do {
|
||||
let retried = 0;
|
||||
for (const [key, chunk] of [...retryQueue]) {
|
||||
const failure = failures.get(key);
|
||||
if (!failure || failure.attempts >= MAX_RETRY_ATTEMPTS) continue;
|
||||
retried++;
|
||||
await tryEmbedChunk(chunk);
|
||||
}
|
||||
if (!force || retried === 0) break;
|
||||
} while (session.isValid && [...retryQueue].some(([key]) => {
|
||||
const failure = failures.get(key);
|
||||
return !!failure && failure.attempts < MAX_RETRY_ATTEMPTS;
|
||||
}));
|
||||
};
|
||||
const batches = buildEmbeddingBatches(docsToEmbed, maxDocsPerBatch, maxBatchBytes);
|
||||
|
||||
for (const batchMeta of batches) {
|
||||
@ -1625,12 +1709,14 @@ export async function generateEmbeddings(
|
||||
for (let seq = 0; seq < chunks.length; seq++) {
|
||||
batchChunks.push({
|
||||
hash: doc.hash,
|
||||
path: doc.path,
|
||||
title,
|
||||
text: chunks[seq]!.text,
|
||||
seq,
|
||||
pos: chunks[seq]!.pos,
|
||||
tokens: chunks[seq]!.tokens,
|
||||
bytes: encoder.encode(chunks[seq]!.text).length,
|
||||
expectedTotalChunks: chunks.length,
|
||||
});
|
||||
}
|
||||
expectedChunksByHash.set(doc.hash, chunks.length);
|
||||
@ -1640,7 +1726,7 @@ export async function generateEmbeddings(
|
||||
|
||||
if (batchChunks.length === 0) {
|
||||
bytesProcessed += batchBytes;
|
||||
options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors });
|
||||
options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors: activeErrorCount(), failures: failureList() });
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1661,18 +1747,18 @@ export async function generateEmbeddings(
|
||||
for (let batchStart = 0; batchStart < batchChunks.length; batchStart += BATCH_SIZE) {
|
||||
// Abort early if session has been invalidated (e.g. max duration exceeded)
|
||||
if (!session.isValid) {
|
||||
const remaining = batchChunks.length - batchStart;
|
||||
errors += remaining;
|
||||
console.warn(`⚠ Session expired — skipping ${remaining} remaining chunks`);
|
||||
const remainingChunks = batchChunks.slice(batchStart);
|
||||
for (const chunk of remainingChunks) recordFailure(chunk, "LLM session expired before embedding chunk");
|
||||
console.warn(`⚠ Session expired — skipping ${remainingChunks.length} remaining chunks`);
|
||||
break;
|
||||
}
|
||||
|
||||
// Abort early if error rate is too high (>80% of processed chunks failed)
|
||||
const processed = chunksEmbedded + errors;
|
||||
if (processed >= BATCH_SIZE && errors > processed * 0.8) {
|
||||
const remaining = batchChunks.length - batchStart;
|
||||
errors += remaining;
|
||||
console.warn(`⚠ Error rate too high (${errors}/${processed}) — aborting embedding`);
|
||||
// Abort early if active error rate is too high (>80% of attempted chunks failed)
|
||||
const processed = chunksEmbedded + activeErrorCount();
|
||||
if (processed >= BATCH_SIZE && activeErrorCount() > processed * 0.8) {
|
||||
const remainingChunks = batchChunks.slice(batchStart);
|
||||
for (const chunk of remainingChunks) recordFailure(chunk, "embedding aborted because error rate was too high");
|
||||
console.warn(`⚠ Error rate too high (${activeErrorCount()}/${processed}) — aborting embedding`);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1686,34 +1772,29 @@ export async function generateEmbeddings(
|
||||
const chunk = chunkBatch[i]!;
|
||||
const embedding = embeddings[i];
|
||||
if (embedding) {
|
||||
insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now, expectedChunksByHash.get(chunk.hash) ?? 1, fingerprint);
|
||||
insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embedding.embedding), model, now, chunk.expectedTotalChunks, fingerprint);
|
||||
chunksEmbedded++;
|
||||
successesSinceRetry++;
|
||||
clearFailure(chunk);
|
||||
} else {
|
||||
errors++;
|
||||
recordFailure(chunk, "batch embedding returned no vector");
|
||||
}
|
||||
batchChunkBytesProcessed += chunk.bytes;
|
||||
}
|
||||
} catch {
|
||||
// Batch failed — try individual embeddings as fallback
|
||||
// But skip if session is already invalid (avoids N doomed retries)
|
||||
await retryFailedChunks();
|
||||
} catch (error) {
|
||||
// Batch failed — try individual embeddings as fallback. If an
|
||||
// individual retry succeeds, any prior failure for that chunk is
|
||||
// cleared, so the visible error count reflects outstanding failures.
|
||||
const batchReason = reasonFromError(error);
|
||||
if (!session.isValid) {
|
||||
errors += chunkBatch.length;
|
||||
for (const chunk of chunkBatch) recordFailure(chunk, `batch failed and session expired: ${batchReason}`);
|
||||
batchChunkBytesProcessed += chunkBatch.reduce((sum, c) => sum + c.bytes, 0);
|
||||
} else {
|
||||
for (const chunk of chunkBatch) {
|
||||
try {
|
||||
const text = formatDocForEmbedding(chunk.text, chunk.title, embedModelUri);
|
||||
const result = await session.embed(text, { model });
|
||||
if (result) {
|
||||
insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now, expectedChunksByHash.get(chunk.hash) ?? 1, fingerprint);
|
||||
chunksEmbedded++;
|
||||
} else {
|
||||
errors++;
|
||||
}
|
||||
} catch {
|
||||
errors++;
|
||||
}
|
||||
await tryEmbedChunk(chunk);
|
||||
batchChunkBytesProcessed += chunk.bytes;
|
||||
await retryFailedChunks();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1726,26 +1807,30 @@ export async function generateEmbeddings(
|
||||
totalChunks,
|
||||
bytesProcessed: bytesProcessed + proportionalBytes,
|
||||
totalBytes,
|
||||
errors,
|
||||
errors: activeErrorCount(),
|
||||
failures: failureList(),
|
||||
});
|
||||
}
|
||||
|
||||
await retryFailedChunks(true);
|
||||
|
||||
const removedPartialChunks = removeIncompleteEmbeddings(db, expectedChunksByHash, model);
|
||||
if (removedPartialChunks > 0) {
|
||||
chunksEmbedded = Math.max(0, chunksEmbedded - removedPartialChunks);
|
||||
}
|
||||
|
||||
bytesProcessed += batchBytes;
|
||||
options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors });
|
||||
options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors: activeErrorCount(), failures: failureList() });
|
||||
}
|
||||
|
||||
return { chunksEmbedded, errors };
|
||||
return { chunksEmbedded, errors: activeErrorCount(), failures: failureList() };
|
||||
}, { maxDuration: 30 * 60 * 1000, name: 'generateEmbeddings' });
|
||||
|
||||
return {
|
||||
docsProcessed: totalDocs,
|
||||
chunksEmbedded: result.chunksEmbedded,
|
||||
errors: result.errors,
|
||||
failures: result.failures,
|
||||
durationMs: Date.now() - startTime,
|
||||
};
|
||||
}
|
||||
@ -3635,12 +3720,14 @@ export async function expandQuery(query: string, model: string = DEFAULT_QUERY_M
|
||||
const cached = getCachedResult(db, cacheKey);
|
||||
if (cached) {
|
||||
try {
|
||||
const parsed = JSON.parse(cached) as any[];
|
||||
const parsed = JSON.parse(cached) as unknown;
|
||||
if (!Array.isArray(parsed)) return [];
|
||||
const rows = parsed as Array<Record<string, unknown>>;
|
||||
// Migrate old cache format: { type, text } → { type, query }
|
||||
if (parsed.length > 0 && parsed[0].query) {
|
||||
return parsed as ExpandedQuery[];
|
||||
} else if (parsed.length > 0 && parsed[0].text) {
|
||||
return parsed.map((r: any) => ({ type: r.type, query: r.text }));
|
||||
if (rows.length > 0 && typeof rows[0]?.query === "string") {
|
||||
return rows.map((r) => ({ type: r.type as ExpandedQuery["type"], query: String(r.query) }));
|
||||
} else if (rows.length > 0 && typeof rows[0]?.text === "string") {
|
||||
return rows.map((r) => ({ type: r.type as ExpandedQuery["type"], query: String(r.text) }));
|
||||
}
|
||||
} catch {
|
||||
// Old cache format (pre-typed, newline-separated text) — re-expand
|
||||
|
||||
4
src/types/picomatch.d.ts
vendored
Normal file
4
src/types/picomatch.d.ts
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
declare module "picomatch" {
|
||||
export type Matcher = (input: string) => boolean;
|
||||
export default function picomatch(pattern: string | string[], options?: Record<string, unknown>): Matcher;
|
||||
}
|
||||
@ -27,7 +27,7 @@ function makeTempFixture() {
|
||||
return { root, capturePath, runtimeBin };
|
||||
}
|
||||
|
||||
function makePackage(root: string, packagePath: string, lockfiles: string[] = [], options: { dist?: boolean; source?: boolean; tsx?: boolean } = {}) {
|
||||
function makePackage(root: string, packagePath: string, lockfiles: string[] = [], options: { dist?: boolean; source?: boolean; tsx?: boolean; git?: boolean } = {}) {
|
||||
const packageRoot = join(root, packagePath);
|
||||
const includeDist = options.dist ?? true;
|
||||
mkdirSync(join(packageRoot, "bin"), { recursive: true });
|
||||
@ -45,6 +45,9 @@ function makePackage(root: string, packagePath: string, lockfiles: string[] = []
|
||||
mkdirSync(join(packageRoot, "node_modules", "tsx", "dist"), { recursive: true });
|
||||
writeFileSync(join(packageRoot, "node_modules", "tsx", "dist", "cli.mjs"), "// tsx fixture\n");
|
||||
}
|
||||
if (options.git) {
|
||||
mkdirSync(join(packageRoot, ".git"), { recursive: true });
|
||||
}
|
||||
for (const lockfile of lockfiles) {
|
||||
writeFileSync(join(packageRoot, lockfile), "");
|
||||
}
|
||||
@ -173,9 +176,19 @@ describe("bin/qmd package wrapper", () => {
|
||||
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
|
||||
});
|
||||
|
||||
test("falls back to source with bun in an unbuilt Bun checkout", () => {
|
||||
test("packaged tree uses dist even if source files are present", () => {
|
||||
const { root, runtimeBin, capturePath } = makeTempFixture();
|
||||
const packageRoot = makePackage(root, "qmd", ["bun.lock"], { dist: false, source: true });
|
||||
const packageRoot = makePackage(root, "node_modules/@tobilu/qmd", ["bun.lock"], { source: true });
|
||||
|
||||
const result = runWrapper(join(packageRoot, "bin", "qmd"), runtimeBin, capturePath);
|
||||
|
||||
expect(result.runtime).toBe("bun");
|
||||
expect(result.scriptPath).toBe(realpathSync(join(packageRoot, "dist", "cli", "qmd.js")));
|
||||
});
|
||||
|
||||
test("prefers source with bun in a Bun checkout even when dist exists", () => {
|
||||
const { root, runtimeBin, capturePath } = makeTempFixture();
|
||||
const packageRoot = makePackage(root, "qmd", ["bun.lock"], { source: true, git: true });
|
||||
|
||||
const result = runWrapper(join(packageRoot, "bin", "qmd"), runtimeBin, capturePath);
|
||||
|
||||
@ -184,9 +197,9 @@ describe("bin/qmd package wrapper", () => {
|
||||
expect(result.args).toEqual(["--version"]);
|
||||
});
|
||||
|
||||
test("falls back to source through tsx in an unbuilt Node checkout", () => {
|
||||
test("prefers source through tsx in a Node checkout even when dist exists", () => {
|
||||
const { root, runtimeBin, capturePath } = makeTempFixture();
|
||||
const packageRoot = makePackage(root, "qmd", [], { dist: false, source: true, tsx: true });
|
||||
const packageRoot = makePackage(root, "qmd", [], { source: true, tsx: true, git: true });
|
||||
|
||||
const result = runWrapper(join(packageRoot, "bin", "qmd"), runtimeBin, capturePath);
|
||||
|
||||
@ -212,5 +225,6 @@ describe("bin/qmd package wrapper", () => {
|
||||
expect(result.stderr).toContain("qmd is not built");
|
||||
expect(result.stderr).toContain("bun install && bun run build");
|
||||
expect(result.stderr).toContain("npm install && npm run build");
|
||||
expect(result.stderr).toContain("qmd doctor");
|
||||
});
|
||||
});
|
||||
|
||||
@ -630,7 +630,6 @@ describe("CLI Status Command", () => {
|
||||
const overrides = {
|
||||
XDG_CACHE_HOME: join(env.configDir, "cache"),
|
||||
QMD_DOCTOR_DEVICE_PROBE: "0",
|
||||
QMD_STATUS_DEVICE_PROBE: "1",
|
||||
QMD_FORCE_CPU: "1",
|
||||
QMD_LLAMA_GPU: "metal",
|
||||
QMD_EMBED_PARALLELISM: "2",
|
||||
@ -665,15 +664,21 @@ describe("CLI Status Command", () => {
|
||||
test("qmd doctor flags mixed embedding fingerprints", async () => {
|
||||
const db = openDatabase(testDbPath);
|
||||
const doc = db.prepare(`SELECT hash FROM documents WHERE active = 1 LIMIT 1`).get() as { hash: string };
|
||||
const now = new Date().toISOString();
|
||||
db.prepare(`
|
||||
INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embed_fingerprint, total_chunks, embedded_at)
|
||||
VALUES (?, 0, 0, ?, 'stale1', 1, ?)
|
||||
`).run(doc.hash, resolveEmbedModelForCli(), new Date().toISOString());
|
||||
VALUES (?, 0, 0, ?, 'stale1', 2, ?)
|
||||
`).run(doc.hash, resolveEmbedModelForCli(), now);
|
||||
db.prepare(`
|
||||
INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embed_fingerprint, total_chunks, embedded_at)
|
||||
VALUES (?, 1, 1, ?, 'stale2', 2, ?)
|
||||
`).run(doc.hash, resolveEmbedModelForCli(), now);
|
||||
db.close();
|
||||
|
||||
const { stdout, exitCode } = await runQmd(["doctor"]);
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toContain("embedding fingerprints");
|
||||
expect(stdout).toContain("mixed named embedding fingerprints");
|
||||
expect(stdout).toContain("stale1");
|
||||
}, 20000);
|
||||
|
||||
@ -684,13 +689,12 @@ describe("CLI Status Command", () => {
|
||||
expect(stdout).toContain("Collection");
|
||||
});
|
||||
|
||||
test("shows device mode without native probing by default", async () => {
|
||||
test("status omits device probing details; doctor owns GPU diagnostics", async () => {
|
||||
const { stdout, exitCode } = await runQmd(["status"]);
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toContain("Device");
|
||||
expect(stdout).toContain("Mode:");
|
||||
expect(stdout).toContain("not probed");
|
||||
expect(stdout).toContain("QMD_STATUS_DEVICE_PROBE=1");
|
||||
expect(stdout).not.toContain("Device");
|
||||
expect(stdout).not.toContain("QMD_STATUS_DEVICE_PROBE");
|
||||
expect(stdout).not.toContain("not probed");
|
||||
});
|
||||
});
|
||||
|
||||
@ -973,8 +977,9 @@ describe("CLI Error Handling", () => {
|
||||
test("handles unknown command", async () => {
|
||||
const { stderr, exitCode } = await runQmd(["unknowncommand"]);
|
||||
expect(exitCode).toBe(1);
|
||||
// Should indicate unknown command
|
||||
// Should indicate unknown command and point users to diagnostics
|
||||
expect(stderr).toContain("Unknown command");
|
||||
expect(stderr).toContain("qmd doctor");
|
||||
});
|
||||
|
||||
test("uses INDEX_PATH environment variable", async () => {
|
||||
@ -1750,11 +1755,15 @@ describe("status and collection list hide filesystem paths", () => {
|
||||
});
|
||||
|
||||
test("doctor does not show full filesystem paths", async () => {
|
||||
const { stdout, exitCode } = await runQmd(["doctor"], { dbPath: localDbPath, configDir: localConfigDir });
|
||||
const { stdout, exitCode } = await runQmd(["doctor"], {
|
||||
dbPath: localDbPath,
|
||||
configDir: localConfigDir,
|
||||
env: { QMD_DOCTOR_DEVICE_PROBE: "0" },
|
||||
});
|
||||
expect(exitCode).toBe(0);
|
||||
|
||||
expect(stdout).toContain("QMD Doctor");
|
||||
const lines = stdout.split('\n').filter(l => !l.includes('Index:'));
|
||||
const lines = stdout.split('\n').filter(l => !l.includes('Index:') && !l.includes('INDEX_PATH=') && !l.includes('QMD_CONFIG_DIR='));
|
||||
const pathLines = lines.filter(l => l.includes('/Users/') || l.includes('/home/') || l.includes('/tmp/'));
|
||||
expect(pathLines.length).toBe(0);
|
||||
}, 20000);
|
||||
@ -2079,6 +2088,7 @@ describe("mcp stdio launcher", () => {
|
||||
try {
|
||||
await mkdir(join(tempPackage, "bin"), { recursive: true });
|
||||
await mkdir(join(tempPackage, "dist", "cli"), { recursive: true });
|
||||
await writeFile(join(tempPackage, "dist", "cli", "qmd.js"), "// fixture\n");
|
||||
await mkdir(join(tempPackage, "fake-bin"), { recursive: true });
|
||||
|
||||
const qmdBin = join(tempPackage, "bin", "qmd");
|
||||
|
||||
@ -263,7 +263,8 @@ describe("native llama stdout containment", () => {
|
||||
|
||||
const stderr = String(stderrSpy.mock.calls.map(call => call[0]).join(""));
|
||||
expect(stderr.match(/no GPU acceleration/g)?.length).toBe(1);
|
||||
expect(stderr).toContain("QMD_STATUS_DEVICE_PROBE=1 qmd status");
|
||||
expect(stderr).toContain("qmd doctor");
|
||||
expect(stderr).not.toContain("QMD_STATUS_DEVICE_PROBE");
|
||||
} finally {
|
||||
stderrSpy.mockRestore();
|
||||
setNodeLlamaCppModuleForTest(null);
|
||||
|
||||
@ -3242,9 +3242,13 @@ describe("Embedding batching", () => {
|
||||
test("generateEmbeddings does not mark a partially embedded multi-chunk document complete", async () => {
|
||||
const store = await createTestStore();
|
||||
const db = store.db;
|
||||
let embedCalls = 0;
|
||||
const fakeLlm = {
|
||||
async embed(_text: string, _options?: { model?: string }) {
|
||||
return { embedding: [0.1, 0.2, 0.3], model: "fake-embed" };
|
||||
embedCalls++;
|
||||
return embedCalls === 1
|
||||
? { embedding: [0.1, 0.2, 0.3], model: "fake-embed" }
|
||||
: null;
|
||||
},
|
||||
async embedBatch(texts: string[], _options?: { model?: string }) {
|
||||
return texts.map((_text, index) => index === 0
|
||||
@ -3266,6 +3270,7 @@ describe("Embedding batching", () => {
|
||||
const result = await generateEmbeddings(store);
|
||||
|
||||
expect(result.errors).toBeGreaterThan(0);
|
||||
expect(result.failures?.[0]?.attempts).toBe(3);
|
||||
expect(db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get()).toEqual({ count: 0 });
|
||||
expect(db.prepare(`SELECT COUNT(*) as count FROM vectors_vec`).get()).toEqual({ count: 0 });
|
||||
expect(store.getHashesNeedingEmbedding()).toBe(1);
|
||||
@ -3276,6 +3281,42 @@ describe("Embedding batching", () => {
|
||||
}
|
||||
});
|
||||
|
||||
test("generateEmbeddings clears chunk errors after successful retry", async () => {
|
||||
const store = await createTestStore();
|
||||
const db = store.db;
|
||||
const fakeLlm = {
|
||||
async embed(_text: string, _options?: { model?: string }) {
|
||||
return { embedding: [0.1, 0.2, 0.3], model: "fake-embed" };
|
||||
},
|
||||
async embedBatch(texts: string[], _options?: { model?: string }) {
|
||||
return texts.map((_text, index) => index === 0
|
||||
? { embedding: [1, 2, 3], model: "fake-embed" }
|
||||
: null
|
||||
);
|
||||
},
|
||||
};
|
||||
|
||||
setDefaultLlamaCpp(createFakeTokenizer() as any);
|
||||
store.llm = fakeLlm as any;
|
||||
|
||||
try {
|
||||
await insertTestDocument(db, "docs", {
|
||||
name: "retry-doc",
|
||||
body: "# Retry doc\n\n" + "transient embedding failure ".repeat(260),
|
||||
});
|
||||
|
||||
const result = await generateEmbeddings(store);
|
||||
|
||||
expect(result.errors).toBe(0);
|
||||
expect(result.failures).toEqual([]);
|
||||
expect(db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get()).toEqual({ count: result.chunksEmbedded });
|
||||
expect(store.getHashesNeedingEmbedding()).toBe(0);
|
||||
} finally {
|
||||
setDefaultLlamaCpp(null);
|
||||
await cleanupTestDb(store);
|
||||
}
|
||||
});
|
||||
|
||||
test("generateEmbeddings opens a long-lived LLM session for embed runs", async () => {
|
||||
const store = await createTestStore();
|
||||
const fakeLlm = createFakeEmbedLlm();
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
"noEmit": false,
|
||||
"outDir": "dist",
|
||||
"declaration": true,
|
||||
"noImplicitAny": false
|
||||
"noImplicitAny": true
|
||||
},
|
||||
"include": ["src/**/*.ts"],
|
||||
"exclude": ["src/**/*.test.ts", "src/test-preload.ts", "src/bench-*.ts"]
|
||||
|
||||
Loading…
Reference in New Issue
Block a user