From 783359f55cb278988d3f537ff01315401abf6193 Mon Sep 17 00:00:00 2001 From: Jarvis Date: Sat, 21 Mar 2026 20:59:11 -0700 Subject: [PATCH 01/13] =?UTF-8?q?fix:=20increase=20RERANK=5FCONTEXT=5FSIZE?= =?UTF-8?q?=20default=202048=E2=86=924096,=20make=20configurable=20via=20Q?= =?UTF-8?q?MD=5FRERANK=5FCONTEXT=5FSIZE=20env=20var,=20fix=20RERANK=5FTEMP?= =?UTF-8?q?LATE=5FOVERHEAD=20underestimate=20200=E2=86=92512?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Default 2048 was too small for longer documents (session transcripts, CJK text, large markdown files). After truncation the Qwen3 reranker template adds more overhead than the original 200-token estimate, causing node-llama-cpp to throw 'input lengths exceed context size'. Fixes: tobi/qmd#91 tobi/qmd#290 tobi/qmd#291 tobi/qmd#314 --- src/llm.ts | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/llm.ts b/src/llm.ts index 2385456..e194f49 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -757,9 +757,16 @@ export class LlamaCpp implements LLM { * - Combined: drops from 11.6 GB (auto, no flash) to 568 MB per context (20×) */ // Qwen3 reranker template adds ~200 tokens overhead (system prompt, tags, etc.) - // Chunks are max 800 tokens, so 800 + 200 + query ≈ 1100 tokens typical. - // Use 2048 for safety margin. Still 17× less than auto (40960). - private static readonly RERANK_CONTEXT_SIZE = 2048; + // Default 2048 was too small for longer documents (e.g. session transcripts, + // CJK text, or large markdown files) — callers hit "input lengths exceed + // context size" errors even after truncation because the overhead estimate + // was insufficient. 4096 comfortably fits the largest real-world chunks + // while staying well below the 40 960-token auto size. + // Override with QMD_RERANK_CONTEXT_SIZE env var if you need more headroom. + private static readonly RERANK_CONTEXT_SIZE: number = (() => { + const v = parseInt(process.env.QMD_RERANK_CONTEXT_SIZE ?? "", 10); + return Number.isFinite(v) && v > 0 ? v : 4096; + })(); private async ensureRerankContexts(): Promise>[]> { if (this.rerankContexts.length === 0) { const model = await this.ensureRerankModel(); @@ -1099,8 +1106,10 @@ export class LlamaCpp implements LLM { } } - // Qwen3 reranker chat template overhead (system prompt, tags, separators) - private static readonly RERANK_TEMPLATE_OVERHEAD = 200; + // Qwen3 reranker chat template overhead (system prompt, tags, separators). + // Measured at ~350 tokens on real queries; use 512 as a safe upper bound so + // the truncation budget never lets a document slip past the context limit. + private static readonly RERANK_TEMPLATE_OVERHEAD = 512; private static readonly RERANK_TARGET_DOCS_PER_CONTEXT = 10; async rerank( From 939d15652c68d1746c902867436af8a6720fb91b Mon Sep 17 00:00:00 2001 From: Mike Bannister Date: Mon, 23 Mar 2026 11:35:22 -0400 Subject: [PATCH 02/13] fix: use CTE in searchFTS to prevent query planner regression with collection filter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When searchFTS combines FTS5 MATCH with a collection filter (d.collection = ?) in the same WHERE clause, SQLite's query planner abandons the FTS5 index and falls back to a full scan. This turns an 8ms query into a 17+ second query on large collections (16K+ documents). The fix wraps the FTS5 query in a CTE so it runs first with proper index usage, then filters by collection on the materialized results. Benchmarks on a 16,258-document collection: Before: qmd search "knowctl" -c → 19.8s After: qmd search "knowctl" -c → 0.4s The CTE fetches limit*10 candidates from the FTS index to ensure enough results survive collection filtering. Without a collection filter, the query plan was already optimal, so no CTE overhead is added in that case. --- src/store.ts | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/src/store.ts b/src/store.ts index f17404d..73ad96a 100644 --- a/src/store.ts +++ b/src/store.ts @@ -2764,20 +2764,38 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle const ftsQuery = buildFTS5Query(query); if (!ftsQuery) return []; + // Use a CTE to force FTS5 to run first, then filter by collection. + // Without the CTE, SQLite's query planner combines FTS5 MATCH with the + // collection filter in a single WHERE clause, which can cause it to + // abandon the FTS5 index and fall back to a full scan — turning an 8ms + // query into a 17-second query on large collections. + const params: (string | number)[] = [ftsQuery]; + + // When filtering by collection, fetch extra candidates from the FTS index + // since some will be filtered out. Without a collection filter we can + // fetch exactly the requested limit. + const ftsLimit = collectionName ? limit * 10 : limit; + let sql = ` + WITH fts_matches AS ( + SELECT rowid, bm25(documents_fts, 10.0, 1.0) as bm25_score + FROM documents_fts + WHERE documents_fts MATCH ? + ORDER BY bm25_score ASC + LIMIT ${ftsLimit} + ) SELECT 'qmd://' || d.collection || '/' || d.path as filepath, d.collection || '/' || d.path as display_path, d.title, content.doc as body, d.hash, - bm25(documents_fts, 10.0, 1.0) as bm25_score - FROM documents_fts f - JOIN documents d ON d.id = f.rowid + fm.bm25_score + FROM fts_matches fm + JOIN documents d ON d.id = fm.rowid JOIN content ON content.hash = d.hash - WHERE documents_fts MATCH ? AND d.active = 1 + WHERE d.active = 1 `; - const params: (string | number)[] = [ftsQuery]; if (collectionName) { sql += ` AND d.collection = ?`; @@ -2785,7 +2803,7 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle } // bm25 lower is better; sort ascending. - sql += ` ORDER BY bm25_score ASC LIMIT ?`; + sql += ` ORDER BY fm.bm25_score ASC LIMIT ?`; params.push(limit); const rows = db.prepare(sql).all(...params) as { filepath: string; display_path: string; title: string; body: string; hash: string; bm25_score: number }[]; From bc80e72a0629c413f13b68fb1386b3dc16db5264 Mon Sep 17 00:00:00 2001 From: Mike Bannister Date: Mon, 23 Mar 2026 11:49:25 -0400 Subject: [PATCH 03/13] chore: update bun.lock after dependency install --- bun.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bun.lock b/bun.lock index 74cf1cb..de2be8c 100644 --- a/bun.lock +++ b/bun.lock @@ -12,7 +12,7 @@ "picomatch": "^4.0.0", "sqlite-vec": "^0.1.7-alpha.2", "yaml": "^2.8.2", - "zod": "^4.2.1", + "zod": "4.2.1", }, "devDependencies": { "@types/better-sqlite3": "^7.6.0", From 840a6142234a4da963f95298acff2778aa987b1e Mon Sep 17 00:00:00 2001 From: Antonio Date: Tue, 24 Mar 2026 11:07:01 -0300 Subject: [PATCH 04/13] fix: respect XDG_CACHE_HOME for model cache directory MODEL_CACHE_DIR was hardcoded to ~/.cache/qmd/models/, ignoring the XDG_CACHE_HOME environment variable. This was inconsistent with the rest of the codebase (store.ts, cli/qmd.ts) which already respects XDG paths. Fixes #425 Co-Authored-By: Claude Opus 4.6 (1M context) --- src/llm.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/llm.ts b/src/llm.ts index 2385456..43c9742 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -209,7 +209,9 @@ export const DEFAULT_RERANK_MODEL_URI = DEFAULT_RERANK_MODEL; export const DEFAULT_GENERATE_MODEL_URI = DEFAULT_GENERATE_MODEL; // Local model cache directory -const MODEL_CACHE_DIR = join(homedir(), ".cache", "qmd", "models"); +const MODEL_CACHE_DIR = process.env.XDG_CACHE_HOME + ? join(process.env.XDG_CACHE_HOME, "qmd", "models") + : join(homedir(), ".cache", "qmd", "models"); export const DEFAULT_MODEL_CACHE_DIR = MODEL_CACHE_DIR; export type PullResult = { From 902e14650e23db7d7b835745f977e3fb899ad9d6 Mon Sep 17 00:00:00 2001 From: Antonio Date: Tue, 24 Mar 2026 11:11:31 -0300 Subject: [PATCH 05/13] fix(embed): handle vec0 OR REPLACE limitation in insertEmbedding sqlite-vec's vec0 virtual tables silently ignore the OR REPLACE conflict clause. When a crash interrupts embedding mid-way, chunks that were inserted into vectors_vec but not content_vectors get re-selected by getHashesForEmbedding, causing a UNIQUE constraint error on re-embed. Two changes: 1. Insert content_vectors first so getHashesForEmbedding won't re-select the hash if a crash occurs between the two inserts. 2. Use DELETE + INSERT for vectors_vec instead of INSERT OR REPLACE. Fixes #445 Co-Authored-By: Claude Opus 4.6 (1M context) --- src/store.ts | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/store.ts b/src/store.ts index f17404d..1e92605 100644 --- a/src/store.ts +++ b/src/store.ts @@ -2943,6 +2943,12 @@ export function clearAllEmbeddings(db: Database): void { /** * Insert a single embedding into both content_vectors and vectors_vec tables. * The hash_seq key is formatted as "hash_seq" for the vectors_vec table. + * + * content_vectors is inserted first so that getHashesForEmbedding (which checks + * only content_vectors) won't re-select the hash on a crash between the two inserts. + * + * vectors_vec uses DELETE + INSERT instead of INSERT OR REPLACE because sqlite-vec's + * vec0 virtual tables silently ignore the OR REPLACE conflict clause. */ export function insertEmbedding( db: Database, @@ -2954,11 +2960,16 @@ export function insertEmbedding( embeddedAt: string ): void { const hashSeq = `${hash}_${seq}`; - const insertVecStmt = db.prepare(`INSERT OR REPLACE INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`); - const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, ?, ?, ?, ?)`); - insertVecStmt.run(hashSeq, embedding); + // Insert content_vectors first — crash-safe ordering (see getHashesForEmbedding) + const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, ?, ?, ?, ?)`); insertContentVectorStmt.run(hash, seq, pos, model, embeddedAt); + + // vec0 virtual tables don't support OR REPLACE — use DELETE + INSERT + const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`); + const insertVecStmt = db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`); + deleteVecStmt.run(hashSeq); + insertVecStmt.run(hashSeq, embedding); } // ============================================================================= From 70db2f5226a30d235f9a378199b92d2a321c003d Mon Sep 17 00:00:00 2001 From: Fred Date: Tue, 24 Mar 2026 22:38:57 +0100 Subject: [PATCH 06/13] fix: prevent qmd embed from running indefinitely MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the session's max duration timer fires (30 min), the embedding loop continued iterating over all remaining chunks. Each embed call threw SessionReleasedError, was caught, incremented errors, and the loop moved to the next chunk — burning 100% CPU for days with zero useful output. Three targeted fixes: 1. Check session.isValid before each batch iteration in the embedding loop, breaking early when the session has been aborted. 2. Pass the session's AbortSignal to chunkDocumentByTokens so tokenization also respects session expiry instead of running unbounded. 3. Add an error-rate circuit breaker: if >80% of processed chunks fail, abort early rather than grinding through the remaining work. Fixes #440 Co-Authored-By: Claude Opus 4.6 (1M context) --- src/store.ts | 60 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/src/store.ts b/src/store.ts index f17404d..ac3cf16 100644 --- a/src/store.ts +++ b/src/store.ts @@ -1337,6 +1337,12 @@ export async function generateEmbeddings( const batches = buildEmbeddingBatches(docsToEmbed, maxDocsPerBatch, maxBatchBytes); for (const batchMeta of batches) { + // Abort early if session has been invalidated + if (!session.isValid) { + console.warn(`⚠ Session expired — skipping remaining document batches`); + break; + } + const batchDocs = getEmbeddingDocsForBatch(db, batchMeta); const batchChunks: ChunkItem[] = []; const batchBytes = batchMeta.reduce((sum, doc) => sum + Math.max(0, doc.bytes), 0); @@ -1345,7 +1351,7 @@ export async function generateEmbeddings( if (!doc.body.trim()) continue; const title = extractTitle(doc.body, doc.path); - const chunks = await chunkDocumentByTokens(doc.body); + const chunks = await chunkDocumentByTokens(doc.body, undefined, undefined, undefined, session.signal); for (let seq = 0; seq < chunks.length; seq++) { batchChunks.push({ @@ -1383,6 +1389,23 @@ export async function generateEmbeddings( let batchChunkBytesProcessed = 0; for (let batchStart = 0; batchStart < batchChunks.length; batchStart += BATCH_SIZE) { + // Abort early if session has been invalidated (e.g. max duration exceeded) + if (!session.isValid) { + const remaining = batchChunks.length - batchStart; + errors += remaining; + console.warn(`⚠ Session expired — skipping ${remaining} remaining chunks`); + break; + } + + // Abort early if error rate is too high (>80% of processed chunks failed) + const processed = chunksEmbedded + errors; + if (processed >= BATCH_SIZE && errors > processed * 0.8) { + const remaining = batchChunks.length - batchStart; + errors += remaining; + console.warn(`⚠ Error rate too high (${errors}/${processed}) — aborting embedding`); + break; + } + const batchEnd = Math.min(batchStart + BATCH_SIZE, batchChunks.length); const chunkBatch = batchChunks.slice(batchStart, batchEnd); const texts = chunkBatch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title)); @@ -1402,20 +1425,26 @@ export async function generateEmbeddings( } } catch { // Batch failed — try individual embeddings as fallback - for (const chunk of chunkBatch) { - try { - const text = formatDocForEmbedding(chunk.text, chunk.title); - const result = await session.embed(text); - if (result) { - insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now); - chunksEmbedded++; - } else { + // But skip if session is already invalid (avoids N doomed retries) + if (!session.isValid) { + errors += chunkBatch.length; + batchChunkBytesProcessed += chunkBatch.reduce((sum, c) => sum + c.bytes, 0); + } else { + for (const chunk of chunkBatch) { + try { + const text = formatDocForEmbedding(chunk.text, chunk.title); + const result = await session.embed(text); + if (result) { + insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now); + chunksEmbedded++; + } else { + errors++; + } + } catch { errors++; } - } catch { - errors++; + batchChunkBytesProcessed += chunk.bytes; } - batchChunkBytesProcessed += chunk.bytes; } } @@ -2092,7 +2121,8 @@ export async function chunkDocumentByTokens( content: string, maxTokens: number = CHUNK_SIZE_TOKENS, overlapTokens: number = CHUNK_OVERLAP_TOKENS, - windowTokens: number = CHUNK_WINDOW_TOKENS + windowTokens: number = CHUNK_WINDOW_TOKENS, + signal?: AbortSignal ): Promise<{ text: string; pos: number; tokens: number }[]> { const llm = getDefaultLlamaCpp(); @@ -2110,6 +2140,9 @@ export async function chunkDocumentByTokens( const results: { text: string; pos: number; tokens: number }[] = []; for (const chunk of charChunks) { + // Respect abort signal to avoid runaway tokenization + if (signal?.aborted) break; + const tokens = await llm.tokenize(chunk.text); if (tokens.length <= maxTokens) { @@ -2123,6 +2156,7 @@ export async function chunkDocumentByTokens( const subChunks = chunkDocument(chunk.text, safeMaxChars, Math.floor(overlapChars * actualCharsPerToken / 2), Math.floor(windowChars * actualCharsPerToken / 2)); for (const subChunk of subChunks) { + if (signal?.aborted) break; const subTokens = await llm.tokenize(subChunk.text); results.push({ text: subChunk.text, From fa214db367f4c4ee5da8d8421bbf73113e7bc84d Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 24 Mar 2026 20:12:45 -0400 Subject: [PATCH 07/13] fix: correct BM25 field weights to include all 3 FTS columns The bm25() call only had 2 weights for 3 columns (filepath, title, body), giving body an implicit weight of 0. Add proper weights: filepath=1.5, title=4.0, body=1.0 so title matches are boosted and body content is scored. --- src/store.ts | 2 +- test/store.test.ts | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/store.ts b/src/store.ts index f17404d..5770107 100644 --- a/src/store.ts +++ b/src/store.ts @@ -2771,7 +2771,7 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle d.title, content.doc as body, d.hash, - bm25(documents_fts, 10.0, 1.0) as bm25_score + bm25(documents_fts, 1.5, 4.0, 1.0) as bm25_score FROM documents_fts f JOIN documents d ON d.id = f.rowid JOIN content ON content.hash = d.hash diff --git a/test/store.test.ts b/test/store.test.ts index c5755f8..a55996e 100644 --- a/test/store.test.ts +++ b/test/store.test.ts @@ -1203,6 +1203,34 @@ describe("FTS Search", () => { await cleanupTestDb(store); }); + test("searchFTS title boost outweighs higher body frequency", async () => { + const store = await createTestStore(); + const collectionName = await createTestCollection(); + + // Document with "quantum" mentioned in a longer body but NOT in the title + await insertTestDocument(store.db, collectionName, { + name: "body-only", + title: "General Science Notes", + body: "This research paper discusses quantum mechanics and the quantum model of computation. The quantum approach offers improvements over classical methods.", + displayPath: "test/body-only.md", + }); + + // Document with "quantum" in the title but a shorter body mention + await insertTestDocument(store.db, collectionName, { + name: "title-match", + title: "Quantum Computing Overview", + body: "An introduction to the fundamentals of this emerging computing paradigm.", + displayPath: "test/title-match.md", + }); + + const results = store.searchFTS("quantum", 10); + expect(results.length).toBe(2); + // Title-match doc should rank higher due to BM25 column weights boosting title + expect(results[0]!.displayPath).toBe(`${collectionName}/test/title-match.md`); + + await cleanupTestDb(store); + }); + test("searchFTS respects limit parameter", async () => { const store = await createTestStore(); const collectionName = await createTestCollection(); From 7b9bd01226ecf3e779ceac1017c111642ac8dc2b Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 24 Mar 2026 20:13:52 -0400 Subject: [PATCH 08/13] fix: handle hyphenated tokens in FTS5 lex queries Hyphenated terms like multi-agent, DEC-0054, gpt-4 were being stripped of hyphens and concatenated (e.g., "multiagent") which missed matches. Now they're split into FTS5 phrase queries ("multi agent") so the porter tokenizer matches them correctly. --- src/store.ts | 54 +++++++++++++++++++++++++++++----- test/structured-search.test.ts | 51 ++++++++++++++++++++++++++++++-- 2 files changed, 96 insertions(+), 9 deletions(-) diff --git a/src/store.ts b/src/store.ts index f17404d..8e1b1f9 100644 --- a/src/store.ts +++ b/src/store.ts @@ -2654,20 +2654,46 @@ function sanitizeFTS5Term(term: string): string { return term.replace(/[^\p{L}\p{N}']/gu, '').toLowerCase(); } +/** + * Check if a token is a hyphenated compound word (e.g., multi-agent, DEC-0054, gpt-4). + * Returns true if the token contains internal hyphens between word/digit characters. + */ +function isHyphenatedToken(token: string): boolean { + return /^[\p{L}\p{N}][\p{L}\p{N}'-]*-[\p{L}\p{N}][\p{L}\p{N}'-]*$/u.test(token); +} + +/** + * Sanitize a hyphenated term into an FTS5 phrase by splitting on hyphens + * and sanitizing each part. Returns the parts joined by spaces for use + * inside FTS5 quotes: "multi agent" matches "multi-agent" in porter tokenizer. + */ +function sanitizeHyphenatedTerm(term: string): string { + return term.split('-').map(t => sanitizeFTS5Term(t)).filter(t => t).join(' '); +} + /** * Parse lex query syntax into FTS5 query. * * Supports: * - Quoted phrases: "exact phrase" → "exact phrase" (exact match) * - Negation: -term or -"phrase" → uses FTS5 NOT operator + * - Hyphenated tokens: multi-agent, DEC-0054, gpt-4 → treated as phrases * - Plain terms: term → "term"* (prefix match) * * FTS5 NOT is a binary operator: `term1 NOT term2` means "match term1 but not term2". * So `-term` only works when there are also positive terms. * + * Hyphen disambiguation: `-sports` at a word boundary is negation, but `multi-agent` + * (where `-` is between word characters) is treated as a hyphenated phrase. + * When a leading `-` is followed by what looks like a hyphenated compound word + * (e.g., `-multi-agent`), the entire token is treated as a negated phrase. + * * Examples: * performance -sports → "performance"* NOT "sports"* * "machine learning" → "machine learning" + * multi-agent memory → "multi agent" AND "memory"* + * DEC-0054 → "dec 0054" + * -multi-agent → NOT "multi agent" */ function buildFTS5Query(query: string): string | null { const positive: string[] = []; @@ -2709,13 +2735,27 @@ function buildFTS5Query(query: string): string | null { while (i < s.length && !/[\s"]/.test(s[i]!)) i++; const term = s.slice(start, i); - const sanitized = sanitizeFTS5Term(term); - if (sanitized) { - const ftsTerm = `"${sanitized}"*`; // Prefix match - if (negated) { - negative.push(ftsTerm); - } else { - positive.push(ftsTerm); + // Handle hyphenated tokens: multi-agent, DEC-0054, gpt-4 + // These get split into phrase queries so FTS5 porter tokenizer matches them. + if (isHyphenatedToken(term)) { + const sanitized = sanitizeHyphenatedTerm(term); + if (sanitized) { + const ftsPhrase = `"${sanitized}"`; // Phrase match (no prefix) + if (negated) { + negative.push(ftsPhrase); + } else { + positive.push(ftsPhrase); + } + } + } else { + const sanitized = sanitizeFTS5Term(term); + if (sanitized) { + const ftsTerm = `"${sanitized}"*`; // Prefix match + if (negated) { + negative.push(ftsTerm); + } else { + positive.push(ftsTerm); + } } } } diff --git a/test/structured-search.test.ts b/test/structured-search.test.ts index 5c4e97f..d704210 100644 --- a/test/structured-search.test.ts +++ b/test/structured-search.test.ts @@ -399,6 +399,14 @@ describe("buildFTS5Query (lex parser)", () => { return term.replace(/[^\p{L}\p{N}']/gu, '').toLowerCase(); } + function isHyphenatedToken(token: string): boolean { + return /^[\p{L}\p{N}][\p{L}\p{N}'-]*-[\p{L}\p{N}][\p{L}\p{N}'-]*$/u.test(token); + } + + function sanitizeHyphenatedTerm(term: string): string { + return term.split('-').map(t => sanitizeFTS5Term(t)).filter(t => t).join(' '); + } + function buildFTS5Query(query: string): string | null { const positive: string[] = []; const negative: string[] = []; @@ -424,8 +432,14 @@ describe("buildFTS5Query (lex parser)", () => { const start = i; while (i < s.length && !/[\s"]/.test(s[i]!)) i++; const term = s.slice(start, i); - const sanitized = sanitizeFTS5Term(term); - if (sanitized) (negated ? negative : positive).push(`"${sanitized}"*`); + + if (isHyphenatedToken(term)) { + const sanitized = sanitizeHyphenatedTerm(term); + if (sanitized) (negated ? negative : positive).push(`"${sanitized}"`); + } else { + const sanitized = sanitizeFTS5Term(term); + if (sanitized) (negated ? negative : positive).push(`"${sanitized}"*`); + } } } @@ -488,4 +502,37 @@ describe("buildFTS5Query (lex parser)", () => { test("special chars in terms stripped", () => { expect(buildFTS5Query("hello!world")).toBe('"helloworld"*'); }); + + // Hyphenated token tests + test("hyphenated term → phrase match", () => { + expect(buildFTS5Query("multi-agent")).toBe('"multi agent"'); + }); + + test("hyphenated identifier → phrase match", () => { + expect(buildFTS5Query("DEC-0054")).toBe('"dec 0054"'); + }); + + test("hyphenated model name → phrase match", () => { + expect(buildFTS5Query("gpt-4")).toBe('"gpt 4"'); + }); + + test("multi-hyphen term → phrase match", () => { + expect(buildFTS5Query("foo-bar-baz")).toBe('"foo bar baz"'); + }); + + test("hyphenated term mixed with plain terms", () => { + expect(buildFTS5Query("multi-agent memory")).toBe('"multi agent" AND "memory"*'); + }); + + test("negation still works alongside hyphenated terms", () => { + expect(buildFTS5Query("multi-agent -sports")).toBe('"multi agent" NOT "sports"*'); + }); + + test("negated hyphenated term", () => { + expect(buildFTS5Query("performance -multi-agent")).toBe('"performance"* NOT "multi agent"'); + }); + + test("plain negation still works (not confused with hyphen)", () => { + expect(buildFTS5Query("performance -sports")).toBe('"performance"* NOT "sports"*'); + }); }); From ddecde78dac144c46e73df300c164bf7964f59d6 Mon Sep 17 00:00:00 2001 From: Alexei Ledenev Date: Thu, 26 Mar 2026 22:11:07 +0200 Subject: [PATCH 09/13] fix: preserve dots in filenames during handelize The handelize() regex replaced all non-letter/non-number chars with dashes, including dots in the filename stem. This mangled session filenames like "topic-1773595309.753009.md" to "topic-1773595309-753009.md", breaking memory_get path resolution (file not found on disk). Fix: add dot to the preserved character class in the filename regex. After deploying, run qmd-reindex.sh to rebuild indexes with correct paths. --- src/store.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/store.ts b/src/store.ts index f17404d..5cab320 100644 --- a/src/store.ts +++ b/src/store.ts @@ -1610,7 +1610,7 @@ export function handelize(path: string): string { const nameWithoutExt = ext ? segment.slice(0, -ext.length) : segment; const cleanedName = nameWithoutExt - .replace(/[^\p{L}\p{N}$]+/gu, '-') // Keep route marker "$", dash-separate other chars + .replace(/[^\p{L}\p{N}.$]+/gu, '-') // Keep letters, numbers, dots, "$"; dash-separate rest .replace(/^-+|-+$/g, ''); // Remove leading/trailing dashes return cleanedName + ext; From 72f2dd1fe5f37e36d68f5e0711972f5ca1329192 Mon Sep 17 00:00:00 2001 From: Alexei Ledenev Date: Thu, 26 Mar 2026 22:38:09 +0200 Subject: [PATCH 10/13] fix: preserve original filename case in handelize (MEMORY.md not memory.md) --- src/store.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/store.ts b/src/store.ts index 5cab320..e67b284 100644 --- a/src/store.ts +++ b/src/store.ts @@ -1595,7 +1595,6 @@ export function handelize(path: string): string { const result = path .replace(/___/g, '/') // Triple underscore becomes folder separator - .toLowerCase() .split('/') .map((segment, idx, arr) => { const isLastSegment = idx === arr.length - 1; From 792992ef653c923daca07394be2d6057665b04f0 Mon Sep 17 00:00:00 2001 From: Niven Date: Fri, 27 Mar 2026 13:10:31 -0700 Subject: [PATCH 11/13] Add rerank parameter to MCP query tool The MCP query tool always ran LLM reranking, even for lex-only queries. On CPU-only infrastructure (e.g. Railway), the reranker adds 60-120s per query. The SDK and CLI already support skipping reranking, but the MCP server did not expose this option. Add a `rerank` boolean parameter (default: true) to the MCP query tool's input schema, forwarded to store.search() as the existing `rerank` option. Fixes #477 Co-Authored-By: Claude Opus 4.6 (1M context) --- src/mcp/server.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/mcp/server.ts b/src/mcp/server.ts index f1cc2a9..b7fada7 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -296,9 +296,12 @@ Intent-aware lex (C++ performance, not sports): intent: z.string().optional().describe( "Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own." ), + rerank: z.boolean().optional().default(true).describe( + "Rerank results using LLM (default: true). Set to false for faster results on CPU-only machines." + ), }, }, - async ({ searches, limit, minScore, candidateLimit, collections, intent }) => { + async ({ searches, limit, minScore, candidateLimit, collections, intent, rerank }) => { // Map to internal format const queries: ExpandedQuery[] = searches.map(s => ({ type: s.type, @@ -313,6 +316,7 @@ Intent-aware lex (C++ performance, not sports): collections: effectiveCollections.length > 0 ? effectiveCollections : undefined, limit, minScore, + rerank, intent, }); From cf9991cfa73af9b5a97778a8a6c7bf3556b805a0 Mon Sep 17 00:00:00 2001 From: Surma Date: Fri, 27 Mar 2026 23:11:23 +0000 Subject: [PATCH 12/13] Fix flake --- CHANGELOG.md | 1 + flake.nix | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ace379..324617e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Fixes +- Fix paths in nix flake - Sync stale `bun.lock` (`better-sqlite3` 11.x → 12.x). CI and release script now use `--frozen-lockfile` to prevent recurrence. #386 (thanks @Mic92) diff --git a/flake.nix b/flake.nix index f3a1fc1..4aa95cd 100644 --- a/flake.nix +++ b/flake.nix @@ -48,7 +48,7 @@ cp package.json $out/lib/qmd/ makeWrapper ${pkgs.bun}/bin/bun $out/bin/qmd \ - --add-flags "$out/lib/qmd/src/qmd.ts" \ + --add-flags "$out/lib/qmd/src/cli/qmd.ts" \ --set DYLD_LIBRARY_PATH "${pkgs.sqlite.out}/lib" \ --set LD_LIBRARY_PATH "${pkgs.sqlite.out}/lib" ''; @@ -81,7 +81,7 @@ shellHook = '' export BREW_PREFIX="''${BREW_PREFIX:-${sqliteWithExtensions.out}}" echo "QMD development shell" - echo "Run: bun src/qmd.ts " + echo "Run: bun src/cli/qmd.ts " ''; }; } From 8d343b9da1a5e93c81f517929c94c15be17b0e82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20L=C3=BCtke?= Date: Sat, 28 Mar 2026 19:54:18 -0400 Subject: [PATCH 13/13] Update handelize tests for case/dot preservation (#475) PR #475 changed handelize() to preserve original case and dots, but the tests still expected lowercase output. Update assertions to match the new behavior. Co-Authored-By: Claude Opus 4.6 --- test/store.helpers.unit.test.ts | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/store.helpers.unit.test.ts b/test/store.helpers.unit.test.ts index eb7f8a6..e3c2373 100644 --- a/test/store.helpers.unit.test.ts +++ b/test/store.helpers.unit.test.ts @@ -114,14 +114,14 @@ describe("cleanupOrphanedVectors", () => { // ============================================================================= describe("handelize", () => { - test("converts to lowercase", () => { - expect(handelize("README.md")).toBe("readme.md"); - expect(handelize("MyFile.MD")).toBe("myfile.md"); + test("preserves original case", () => { + expect(handelize("README.md")).toBe("README.md"); + expect(handelize("MyFile.MD")).toBe("MyFile.MD"); }); test("preserves folder structure", () => { expect(handelize("a/b/c/d.md")).toBe("a/b/c/d.md"); - expect(handelize("docs/api/README.md")).toBe("docs/api/readme.md"); + expect(handelize("docs/api/README.md")).toBe("docs/api/README.md"); }); test("replaces non-word characters with dash", () => { @@ -151,7 +151,7 @@ describe("handelize", () => { test("handles complex real-world meeting notes", () => { const complexName = "Money Movement Licensing Review - 2025/11/19 10:25 EST - Notes by Gemini.md"; const result = handelize(complexName); - expect(result).toBe("money-movement-licensing-review-2025-11-19-10-25-est-notes-by-gemini.md"); + expect(result).toBe("Money-Movement-Licensing-Review-2025-11-19-10-25-EST-Notes-by-Gemini.md"); expect(result).not.toContain(" "); expect(result).not.toContain("/"); expect(result).not.toContain(":"); @@ -159,7 +159,7 @@ describe("handelize", () => { test("handles unicode characters", () => { expect(handelize("日本語.md")).toBe("日本語.md"); - expect(handelize("Зоны и проекты.md")).toBe("зоны-и-проекты.md"); + expect(handelize("Зоны и проекты.md")).toBe("Зоны-и-проекты.md"); expect(handelize("café-notes.md")).toBe("café-notes.md"); expect(handelize("naïve.md")).toBe("naïve.md"); expect(handelize("日本語-notes.md")).toBe("日本語-notes.md"); @@ -181,13 +181,13 @@ describe("handelize", () => { test("handles dates and times in filenames", () => { expect(handelize("meeting-2025-01-15.md")).toBe("meeting-2025-01-15.md"); expect(handelize("notes 2025/01/15.md")).toBe("notes-2025/01/15.md"); - expect(handelize("call_10:30_AM.md")).toBe("call-10-30-am.md"); + expect(handelize("call_10:30_AM.md")).toBe("call-10-30-AM.md"); }); test("handles special project naming patterns", () => { - expect(handelize("PROJECT_ABC_v2.0.md")).toBe("project-abc-v2-0.md"); - expect(handelize("[WIP] Feature Request.md")).toBe("wip-feature-request.md"); - expect(handelize("(DRAFT) Proposal v1.md")).toBe("draft-proposal-v1.md"); + expect(handelize("PROJECT_ABC_v2.0.md")).toBe("PROJECT-ABC-v2.0.md"); + expect(handelize("[WIP] Feature Request.md")).toBe("WIP-Feature-Request.md"); + expect(handelize("(DRAFT) Proposal v1.md")).toBe("DRAFT-Proposal-v1.md"); }); test("handles symbol-only route filenames", () => {