From 783359f55cb278988d3f537ff01315401abf6193 Mon Sep 17 00:00:00 2001
From: Jarvis <jarvis@sauce.sh>
Date: Sat, 21 Mar 2026 20:59:11 -0700
Subject: [PATCH 01/13] =?UTF-8?q?fix:=20increase=20RERANK=5FCONTEXT=5FSIZE?=
 =?UTF-8?q?=20default=202048=E2=86=924096,=20make=20configurable=20via=20Q?=
 =?UTF-8?q?MD=5FRERANK=5FCONTEXT=5FSIZE=20env=20var,=20fix=20RERANK=5FTEMP?=
 =?UTF-8?q?LATE=5FOVERHEAD=20underestimate=20200=E2=86=92512?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Default 2048 was too small for longer documents (session transcripts, CJK
text, large markdown files). After truncation the Qwen3 reranker template
adds more overhead than the original 200-token estimate, causing node-llama-cpp
to throw 'input lengths exceed context size'.

Fixes: tobi/qmd#91 tobi/qmd#290 tobi/qmd#291 tobi/qmd#314
---
 src/llm.ts | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/src/llm.ts b/src/llm.ts
index 2385456..e194f49 100644
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -757,9 +757,16 @@ export class LlamaCpp implements LLM {
    * - Combined: drops from 11.6 GB (auto, no flash) to 568 MB per context (20×)
    */
   // Qwen3 reranker template adds ~200 tokens overhead (system prompt, tags, etc.)
-  // Chunks are max 800 tokens, so 800 + 200 + query ≈ 1100 tokens typical.
-  // Use 2048 for safety margin. Still 17× less than auto (40960).
-  private static readonly RERANK_CONTEXT_SIZE = 2048;
+  // Default 2048 was too small for longer documents (e.g. session transcripts,
+  // CJK text, or large markdown files) — callers hit "input lengths exceed
+  // context size" errors even after truncation because the overhead estimate
+  // was insufficient.  4096 comfortably fits the largest real-world chunks
+  // while staying well below the 40 960-token auto size.
+  // Override with QMD_RERANK_CONTEXT_SIZE env var if you need more headroom.
+  private static readonly RERANK_CONTEXT_SIZE: number = (() => {
+    const v = parseInt(process.env.QMD_RERANK_CONTEXT_SIZE ?? "", 10);
+    return Number.isFinite(v) && v > 0 ? v : 4096;
+  })();
   private async ensureRerankContexts(): Promise<Awaited<ReturnType<LlamaModel["createRankingContext"]>>[]> {
     if (this.rerankContexts.length === 0) {
       const model = await this.ensureRerankModel();
@@ -1099,8 +1106,10 @@ export class LlamaCpp implements LLM {
     }
   }
 
-  // Qwen3 reranker chat template overhead (system prompt, tags, separators)
-  private static readonly RERANK_TEMPLATE_OVERHEAD = 200;
+  // Qwen3 reranker chat template overhead (system prompt, tags, separators).
+  // Measured at ~350 tokens on real queries; use 512 as a safe upper bound so
+  // the truncation budget never lets a document slip past the context limit.
+  private static readonly RERANK_TEMPLATE_OVERHEAD = 512;
   private static readonly RERANK_TARGET_DOCS_PER_CONTEXT = 10;
 
   async rerank(

From 939d15652c68d1746c902867436af8a6720fb91b Mon Sep 17 00:00:00 2001
From: Mike Bannister <notimpossiblemike@gmail.com>
Date: Mon, 23 Mar 2026 11:35:22 -0400
Subject: [PATCH 02/13] fix: use CTE in searchFTS to prevent query planner
 regression with collection filter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When searchFTS combines FTS5 MATCH with a collection filter (d.collection = ?)
in the same WHERE clause, SQLite's query planner abandons the FTS5 index and
falls back to a full scan. This turns an 8ms query into a 17+ second query on
large collections (16K+ documents).

The fix wraps the FTS5 query in a CTE so it runs first with proper index usage,
then filters by collection on the materialized results.

Benchmarks on a 16,258-document collection:
  Before: qmd search "knowctl" -c <collection> → 19.8s
  After:  qmd search "knowctl" -c <collection> → 0.4s

The CTE fetches limit*10 candidates from the FTS index to ensure enough results
survive collection filtering. Without a collection filter, the query plan was
already optimal, so no CTE overhead is added in that case.
---
 src/store.ts | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/store.ts b/src/store.ts
index f17404d..73ad96a 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -2764,20 +2764,38 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle
   const ftsQuery = buildFTS5Query(query);
   if (!ftsQuery) return [];
 
+  // Use a CTE to force FTS5 to run first, then filter by collection.
+  // Without the CTE, SQLite's query planner combines FTS5 MATCH with the
+  // collection filter in a single WHERE clause, which can cause it to
+  // abandon the FTS5 index and fall back to a full scan — turning an 8ms
+  // query into a 17-second query on large collections.
+  const params: (string | number)[] = [ftsQuery];
+
+  // When filtering by collection, fetch extra candidates from the FTS index
+  // since some will be filtered out. Without a collection filter we can
+  // fetch exactly the requested limit.
+  const ftsLimit = collectionName ? limit * 10 : limit;
+
   let sql = `
+    WITH fts_matches AS (
+      SELECT rowid, bm25(documents_fts, 10.0, 1.0) as bm25_score
+      FROM documents_fts
+      WHERE documents_fts MATCH ?
+      ORDER BY bm25_score ASC
+      LIMIT ${ftsLimit}
+    )
     SELECT
       'qmd://' || d.collection || '/' || d.path as filepath,
       d.collection || '/' || d.path as display_path,
       d.title,
       content.doc as body,
       d.hash,
-      bm25(documents_fts, 10.0, 1.0) as bm25_score
-    FROM documents_fts f
-    JOIN documents d ON d.id = f.rowid
+      fm.bm25_score
+    FROM fts_matches fm
+    JOIN documents d ON d.id = fm.rowid
     JOIN content ON content.hash = d.hash
-    WHERE documents_fts MATCH ? AND d.active = 1
+    WHERE d.active = 1
   `;
-  const params: (string | number)[] = [ftsQuery];
 
   if (collectionName) {
     sql += ` AND d.collection = ?`;
@@ -2785,7 +2803,7 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle
   }
 
   // bm25 lower is better; sort ascending.
-  sql += ` ORDER BY bm25_score ASC LIMIT ?`;
+  sql += ` ORDER BY fm.bm25_score ASC LIMIT ?`;
   params.push(limit);
 
   const rows = db.prepare(sql).all(...params) as { filepath: string; display_path: string; title: string; body: string; hash: string; bm25_score: number }[];

From bc80e72a0629c413f13b68fb1386b3dc16db5264 Mon Sep 17 00:00:00 2001
From: Mike Bannister <notimpossiblemike@gmail.com>
Date: Mon, 23 Mar 2026 11:49:25 -0400
Subject: [PATCH 03/13] chore: update bun.lock after dependency install

---
 bun.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bun.lock b/bun.lock
index 74cf1cb..de2be8c 100644
--- a/bun.lock
+++ b/bun.lock
@@ -12,7 +12,7 @@
         "picomatch": "^4.0.0",
         "sqlite-vec": "^0.1.7-alpha.2",
         "yaml": "^2.8.2",
-        "zod": "^4.2.1",
+        "zod": "4.2.1",
       },
       "devDependencies": {
         "@types/better-sqlite3": "^7.6.0",

From 840a6142234a4da963f95298acff2778aa987b1e Mon Sep 17 00:00:00 2001
From: Antonio <ajgcvm@gmail.com>
Date: Tue, 24 Mar 2026 11:07:01 -0300
Subject: [PATCH 04/13] fix: respect XDG_CACHE_HOME for model cache directory

MODEL_CACHE_DIR was hardcoded to ~/.cache/qmd/models/, ignoring the
XDG_CACHE_HOME environment variable. This was inconsistent with the rest
of the codebase (store.ts, cli/qmd.ts) which already respects XDG paths.

Fixes #425

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/llm.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/llm.ts b/src/llm.ts
index 2385456..43c9742 100644
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -209,7 +209,9 @@ export const DEFAULT_RERANK_MODEL_URI = DEFAULT_RERANK_MODEL;
 export const DEFAULT_GENERATE_MODEL_URI = DEFAULT_GENERATE_MODEL;
 
 // Local model cache directory
-const MODEL_CACHE_DIR = join(homedir(), ".cache", "qmd", "models");
+const MODEL_CACHE_DIR = process.env.XDG_CACHE_HOME
+  ? join(process.env.XDG_CACHE_HOME, "qmd", "models")
+  : join(homedir(), ".cache", "qmd", "models");
 export const DEFAULT_MODEL_CACHE_DIR = MODEL_CACHE_DIR;
 
 export type PullResult = {

From 902e14650e23db7d7b835745f977e3fb899ad9d6 Mon Sep 17 00:00:00 2001
From: Antonio <ajgcvm@gmail.com>
Date: Tue, 24 Mar 2026 11:11:31 -0300
Subject: [PATCH 05/13] fix(embed): handle vec0 OR REPLACE limitation in
 insertEmbedding

sqlite-vec's vec0 virtual tables silently ignore the OR REPLACE conflict
clause. When a crash interrupts embedding mid-way, chunks that were
inserted into vectors_vec but not content_vectors get re-selected by
getHashesForEmbedding, causing a UNIQUE constraint error on re-embed.

Two changes:
1. Insert content_vectors first so getHashesForEmbedding won't re-select
   the hash if a crash occurs between the two inserts.
2. Use DELETE + INSERT for vectors_vec instead of INSERT OR REPLACE.

Fixes #445

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/store.ts | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/store.ts b/src/store.ts
index f17404d..1e92605 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -2943,6 +2943,12 @@ export function clearAllEmbeddings(db: Database): void {
 /**
  * Insert a single embedding into both content_vectors and vectors_vec tables.
  * The hash_seq key is formatted as "hash_seq" for the vectors_vec table.
+ *
+ * content_vectors is inserted first so that getHashesForEmbedding (which checks
+ * only content_vectors) won't re-select the hash on a crash between the two inserts.
+ *
+ * vectors_vec uses DELETE + INSERT instead of INSERT OR REPLACE because sqlite-vec's
+ * vec0 virtual tables silently ignore the OR REPLACE conflict clause.
  */
 export function insertEmbedding(
   db: Database,
@@ -2954,11 +2960,16 @@ export function insertEmbedding(
   embeddedAt: string
 ): void {
   const hashSeq = `${hash}_${seq}`;
-  const insertVecStmt = db.prepare(`INSERT OR REPLACE INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`);
-  const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, ?, ?, ?, ?)`);
 
-  insertVecStmt.run(hashSeq, embedding);
+  // Insert content_vectors first — crash-safe ordering (see getHashesForEmbedding)
+  const insertContentVectorStmt = db.prepare(`INSERT OR REPLACE INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, ?, ?, ?, ?)`);
   insertContentVectorStmt.run(hash, seq, pos, model, embeddedAt);
+
+  // vec0 virtual tables don't support OR REPLACE — use DELETE + INSERT
+  const deleteVecStmt = db.prepare(`DELETE FROM vectors_vec WHERE hash_seq = ?`);
+  const insertVecStmt = db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`);
+  deleteVecStmt.run(hashSeq);
+  insertVecStmt.run(hashSeq, embedding);
 }
 
 // =============================================================================

From 70db2f5226a30d235f9a378199b92d2a321c003d Mon Sep 17 00:00:00 2001
From: Fred <frederic@fornini.com>
Date: Tue, 24 Mar 2026 22:38:57 +0100
Subject: [PATCH 06/13] fix: prevent qmd embed from running indefinitely
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After the session's max duration timer fires (30 min), the embedding loop
continued iterating over all remaining chunks. Each embed call threw
SessionReleasedError, was caught, incremented errors, and the loop moved
to the next chunk — burning 100% CPU for days with zero useful output.

Three targeted fixes:

1. Check session.isValid before each batch iteration in the embedding loop,
   breaking early when the session has been aborted.

2. Pass the session's AbortSignal to chunkDocumentByTokens so tokenization
   also respects session expiry instead of running unbounded.

3. Add an error-rate circuit breaker: if >80% of processed chunks fail,
   abort early rather than grinding through the remaining work.

Fixes #440

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/store.ts | 60 ++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 47 insertions(+), 13 deletions(-)

diff --git a/src/store.ts b/src/store.ts
index f17404d..ac3cf16 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -1337,6 +1337,12 @@ export async function generateEmbeddings(
     const batches = buildEmbeddingBatches(docsToEmbed, maxDocsPerBatch, maxBatchBytes);
 
     for (const batchMeta of batches) {
+      // Abort early if session has been invalidated
+      if (!session.isValid) {
+        console.warn(`⚠ Session expired — skipping remaining document batches`);
+        break;
+      }
+
       const batchDocs = getEmbeddingDocsForBatch(db, batchMeta);
       const batchChunks: ChunkItem[] = [];
       const batchBytes = batchMeta.reduce((sum, doc) => sum + Math.max(0, doc.bytes), 0);
@@ -1345,7 +1351,7 @@ export async function generateEmbeddings(
         if (!doc.body.trim()) continue;
 
         const title = extractTitle(doc.body, doc.path);
-        const chunks = await chunkDocumentByTokens(doc.body);
+        const chunks = await chunkDocumentByTokens(doc.body, undefined, undefined, undefined, session.signal);
 
         for (let seq = 0; seq < chunks.length; seq++) {
           batchChunks.push({
@@ -1383,6 +1389,23 @@ export async function generateEmbeddings(
       let batchChunkBytesProcessed = 0;
 
       for (let batchStart = 0; batchStart < batchChunks.length; batchStart += BATCH_SIZE) {
+        // Abort early if session has been invalidated (e.g. max duration exceeded)
+        if (!session.isValid) {
+          const remaining = batchChunks.length - batchStart;
+          errors += remaining;
+          console.warn(`⚠ Session expired — skipping ${remaining} remaining chunks`);
+          break;
+        }
+
+        // Abort early if error rate is too high (>80% of processed chunks failed)
+        const processed = chunksEmbedded + errors;
+        if (processed >= BATCH_SIZE && errors > processed * 0.8) {
+          const remaining = batchChunks.length - batchStart;
+          errors += remaining;
+          console.warn(`⚠ Error rate too high (${errors}/${processed}) — aborting embedding`);
+          break;
+        }
+
         const batchEnd = Math.min(batchStart + BATCH_SIZE, batchChunks.length);
         const chunkBatch = batchChunks.slice(batchStart, batchEnd);
         const texts = chunkBatch.map(chunk => formatDocForEmbedding(chunk.text, chunk.title));
@@ -1402,20 +1425,26 @@ export async function generateEmbeddings(
           }
         } catch {
           // Batch failed — try individual embeddings as fallback
-          for (const chunk of chunkBatch) {
-            try {
-              const text = formatDocForEmbedding(chunk.text, chunk.title);
-              const result = await session.embed(text);
-              if (result) {
-                insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
-                chunksEmbedded++;
-              } else {
+          // But skip if session is already invalid (avoids N doomed retries)
+          if (!session.isValid) {
+            errors += chunkBatch.length;
+            batchChunkBytesProcessed += chunkBatch.reduce((sum, c) => sum + c.bytes, 0);
+          } else {
+            for (const chunk of chunkBatch) {
+              try {
+                const text = formatDocForEmbedding(chunk.text, chunk.title);
+                const result = await session.embed(text);
+                if (result) {
+                  insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
+                  chunksEmbedded++;
+                } else {
+                  errors++;
+                }
+              } catch {
                 errors++;
               }
-            } catch {
-              errors++;
+              batchChunkBytesProcessed += chunk.bytes;
             }
-            batchChunkBytesProcessed += chunk.bytes;
           }
         }
 
@@ -2092,7 +2121,8 @@ export async function chunkDocumentByTokens(
   content: string,
   maxTokens: number = CHUNK_SIZE_TOKENS,
   overlapTokens: number = CHUNK_OVERLAP_TOKENS,
-  windowTokens: number = CHUNK_WINDOW_TOKENS
+  windowTokens: number = CHUNK_WINDOW_TOKENS,
+  signal?: AbortSignal
 ): Promise<{ text: string; pos: number; tokens: number }[]> {
   const llm = getDefaultLlamaCpp();
 
@@ -2110,6 +2140,9 @@ export async function chunkDocumentByTokens(
   const results: { text: string; pos: number; tokens: number }[] = [];
 
   for (const chunk of charChunks) {
+    // Respect abort signal to avoid runaway tokenization
+    if (signal?.aborted) break;
+
     const tokens = await llm.tokenize(chunk.text);
 
     if (tokens.length <= maxTokens) {
@@ -2123,6 +2156,7 @@ export async function chunkDocumentByTokens(
       const subChunks = chunkDocument(chunk.text, safeMaxChars, Math.floor(overlapChars * actualCharsPerToken / 2), Math.floor(windowChars * actualCharsPerToken / 2));
 
       for (const subChunk of subChunks) {
+        if (signal?.aborted) break;
         const subTokens = await llm.tokenize(subChunk.text);
         results.push({
           text: subChunk.text,

From fa214db367f4c4ee5da8d8421bbf73113e7bc84d Mon Sep 17 00:00:00 2001
From: Ryan <Goldstein>
Date: Tue, 24 Mar 2026 20:12:45 -0400
Subject: [PATCH 07/13] fix: correct BM25 field weights to include all 3 FTS
 columns

The bm25() call only had 2 weights for 3 columns (filepath, title, body),
giving body an implicit weight of 0. Add proper weights: filepath=1.5,
title=4.0, body=1.0 so title matches are boosted and body content is scored.
---
 src/store.ts       |  2 +-
 test/store.test.ts | 28 ++++++++++++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/store.ts b/src/store.ts
index f17404d..5770107 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -2771,7 +2771,7 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle
       d.title,
       content.doc as body,
       d.hash,
-      bm25(documents_fts, 10.0, 1.0) as bm25_score
+      bm25(documents_fts, 1.5, 4.0, 1.0) as bm25_score
     FROM documents_fts f
     JOIN documents d ON d.id = f.rowid
     JOIN content ON content.hash = d.hash
diff --git a/test/store.test.ts b/test/store.test.ts
index c5755f8..a55996e 100644
--- a/test/store.test.ts
+++ b/test/store.test.ts
@@ -1203,6 +1203,34 @@ describe("FTS Search", () => {
     await cleanupTestDb(store);
   });
 
+  test("searchFTS title boost outweighs higher body frequency", async () => {
+    const store = await createTestStore();
+    const collectionName = await createTestCollection();
+
+    // Document with "quantum" mentioned in a longer body but NOT in the title
+    await insertTestDocument(store.db, collectionName, {
+      name: "body-only",
+      title: "General Science Notes",
+      body: "This research paper discusses quantum mechanics and the quantum model of computation. The quantum approach offers improvements over classical methods.",
+      displayPath: "test/body-only.md",
+    });
+
+    // Document with "quantum" in the title but a shorter body mention
+    await insertTestDocument(store.db, collectionName, {
+      name: "title-match",
+      title: "Quantum Computing Overview",
+      body: "An introduction to the fundamentals of this emerging computing paradigm.",
+      displayPath: "test/title-match.md",
+    });
+
+    const results = store.searchFTS("quantum", 10);
+    expect(results.length).toBe(2);
+    // Title-match doc should rank higher due to BM25 column weights boosting title
+    expect(results[0]!.displayPath).toBe(`${collectionName}/test/title-match.md`);
+
+    await cleanupTestDb(store);
+  });
+
   test("searchFTS respects limit parameter", async () => {
     const store = await createTestStore();
     const collectionName = await createTestCollection();

From 7b9bd01226ecf3e779ceac1017c111642ac8dc2b Mon Sep 17 00:00:00 2001
From: Ryan <Goldstein>
Date: Tue, 24 Mar 2026 20:13:52 -0400
Subject: [PATCH 08/13] fix: handle hyphenated tokens in FTS5 lex queries

Hyphenated terms like multi-agent, DEC-0054, gpt-4 were being stripped
of hyphens and concatenated (e.g., "multiagent") which missed matches.
Now they're split into FTS5 phrase queries ("multi agent") so the porter
tokenizer matches them correctly.
---
 src/store.ts                   | 54 +++++++++++++++++++++++++++++-----
 test/structured-search.test.ts | 51 ++++++++++++++++++++++++++++++--
 2 files changed, 96 insertions(+), 9 deletions(-)

diff --git a/src/store.ts b/src/store.ts
index f17404d..8e1b1f9 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -2654,20 +2654,46 @@ function sanitizeFTS5Term(term: string): string {
   return term.replace(/[^\p{L}\p{N}']/gu, '').toLowerCase();
 }
 
+/**
+ * Check if a token is a hyphenated compound word (e.g., multi-agent, DEC-0054, gpt-4).
+ * Returns true if the token contains internal hyphens between word/digit characters.
+ */
+function isHyphenatedToken(token: string): boolean {
+  return /^[\p{L}\p{N}][\p{L}\p{N}'-]*-[\p{L}\p{N}][\p{L}\p{N}'-]*$/u.test(token);
+}
+
+/**
+ * Sanitize a hyphenated term into an FTS5 phrase by splitting on hyphens
+ * and sanitizing each part. Returns the parts joined by spaces for use
+ * inside FTS5 quotes: "multi agent" matches "multi-agent" in porter tokenizer.
+ */
+function sanitizeHyphenatedTerm(term: string): string {
+  return term.split('-').map(t => sanitizeFTS5Term(t)).filter(t => t).join(' ');
+}
+
 /**
  * Parse lex query syntax into FTS5 query.
  *
  * Supports:
  * - Quoted phrases: "exact phrase" → "exact phrase" (exact match)
  * - Negation: -term or -"phrase" → uses FTS5 NOT operator
+ * - Hyphenated tokens: multi-agent, DEC-0054, gpt-4 → treated as phrases
  * - Plain terms: term → "term"* (prefix match)
  *
  * FTS5 NOT is a binary operator: `term1 NOT term2` means "match term1 but not term2".
  * So `-term` only works when there are also positive terms.
  *
+ * Hyphen disambiguation: `-sports` at a word boundary is negation, but `multi-agent`
+ * (where `-` is between word characters) is treated as a hyphenated phrase.
+ * When a leading `-` is followed by what looks like a hyphenated compound word
+ * (e.g., `-multi-agent`), the entire token is treated as a negated phrase.
+ *
  * Examples:
  *   performance -sports     → "performance"* NOT "sports"*
  *   "machine learning"      → "machine learning"
+ *   multi-agent memory      → "multi agent" AND "memory"*
+ *   DEC-0054               → "dec 0054"
+ *   -multi-agent            → NOT "multi agent"
  */
 function buildFTS5Query(query: string): string | null {
   const positive: string[] = [];
@@ -2709,13 +2735,27 @@ function buildFTS5Query(query: string): string | null {
       while (i < s.length && !/[\s"]/.test(s[i]!)) i++;
       const term = s.slice(start, i);
 
-      const sanitized = sanitizeFTS5Term(term);
-      if (sanitized) {
-        const ftsTerm = `"${sanitized}"*`;  // Prefix match
-        if (negated) {
-          negative.push(ftsTerm);
-        } else {
-          positive.push(ftsTerm);
+      // Handle hyphenated tokens: multi-agent, DEC-0054, gpt-4
+      // These get split into phrase queries so FTS5 porter tokenizer matches them.
+      if (isHyphenatedToken(term)) {
+        const sanitized = sanitizeHyphenatedTerm(term);
+        if (sanitized) {
+          const ftsPhrase = `"${sanitized}"`;  // Phrase match (no prefix)
+          if (negated) {
+            negative.push(ftsPhrase);
+          } else {
+            positive.push(ftsPhrase);
+          }
+        }
+      } else {
+        const sanitized = sanitizeFTS5Term(term);
+        if (sanitized) {
+          const ftsTerm = `"${sanitized}"*`;  // Prefix match
+          if (negated) {
+            negative.push(ftsTerm);
+          } else {
+            positive.push(ftsTerm);
+          }
         }
       }
     }
diff --git a/test/structured-search.test.ts b/test/structured-search.test.ts
index 5c4e97f..d704210 100644
--- a/test/structured-search.test.ts
+++ b/test/structured-search.test.ts
@@ -399,6 +399,14 @@ describe("buildFTS5Query (lex parser)", () => {
     return term.replace(/[^\p{L}\p{N}']/gu, '').toLowerCase();
   }
 
+  function isHyphenatedToken(token: string): boolean {
+    return /^[\p{L}\p{N}][\p{L}\p{N}'-]*-[\p{L}\p{N}][\p{L}\p{N}'-]*$/u.test(token);
+  }
+
+  function sanitizeHyphenatedTerm(term: string): string {
+    return term.split('-').map(t => sanitizeFTS5Term(t)).filter(t => t).join(' ');
+  }
+
   function buildFTS5Query(query: string): string | null {
     const positive: string[] = [];
     const negative: string[] = [];
@@ -424,8 +432,14 @@ describe("buildFTS5Query (lex parser)", () => {
         const start = i;
         while (i < s.length && !/[\s"]/.test(s[i]!)) i++;
         const term = s.slice(start, i);
-        const sanitized = sanitizeFTS5Term(term);
-        if (sanitized) (negated ? negative : positive).push(`"${sanitized}"*`);
+
+        if (isHyphenatedToken(term)) {
+          const sanitized = sanitizeHyphenatedTerm(term);
+          if (sanitized) (negated ? negative : positive).push(`"${sanitized}"`);
+        } else {
+          const sanitized = sanitizeFTS5Term(term);
+          if (sanitized) (negated ? negative : positive).push(`"${sanitized}"*`);
+        }
       }
     }
 
@@ -488,4 +502,37 @@ describe("buildFTS5Query (lex parser)", () => {
   test("special chars in terms stripped", () => {
     expect(buildFTS5Query("hello!world")).toBe('"helloworld"*');
   });
+
+  // Hyphenated token tests
+  test("hyphenated term → phrase match", () => {
+    expect(buildFTS5Query("multi-agent")).toBe('"multi agent"');
+  });
+
+  test("hyphenated identifier → phrase match", () => {
+    expect(buildFTS5Query("DEC-0054")).toBe('"dec 0054"');
+  });
+
+  test("hyphenated model name → phrase match", () => {
+    expect(buildFTS5Query("gpt-4")).toBe('"gpt 4"');
+  });
+
+  test("multi-hyphen term → phrase match", () => {
+    expect(buildFTS5Query("foo-bar-baz")).toBe('"foo bar baz"');
+  });
+
+  test("hyphenated term mixed with plain terms", () => {
+    expect(buildFTS5Query("multi-agent memory")).toBe('"multi agent" AND "memory"*');
+  });
+
+  test("negation still works alongside hyphenated terms", () => {
+    expect(buildFTS5Query("multi-agent -sports")).toBe('"multi agent" NOT "sports"*');
+  });
+
+  test("negated hyphenated term", () => {
+    expect(buildFTS5Query("performance -multi-agent")).toBe('"performance"* NOT "multi agent"');
+  });
+
+  test("plain negation still works (not confused with hyphen)", () => {
+    expect(buildFTS5Query("performance -sports")).toBe('"performance"* NOT "sports"*');
+  });
 });

From ddecde78dac144c46e73df300c164bf7964f59d6 Mon Sep 17 00:00:00 2001
From: Alexei Ledenev <alexei.led@gmail.com>
Date: Thu, 26 Mar 2026 22:11:07 +0200
Subject: [PATCH 09/13] fix: preserve dots in filenames during handelize

The handelize() regex replaced all non-letter/non-number chars with
dashes, including dots in the filename stem. This mangled session
filenames like "topic-1773595309.753009.md" to "topic-1773595309-753009.md",
breaking memory_get path resolution (file not found on disk).

Fix: add dot to the preserved character class in the filename regex.
After deploying, run qmd-reindex.sh to rebuild indexes with correct paths.
---
 src/store.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/store.ts b/src/store.ts
index f17404d..5cab320 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -1610,7 +1610,7 @@ export function handelize(path: string): string {
         const nameWithoutExt = ext ? segment.slice(0, -ext.length) : segment;
 
         const cleanedName = nameWithoutExt
-          .replace(/[^\p{L}\p{N}$]+/gu, '-')  // Keep route marker "$", dash-separate other chars
+          .replace(/[^\p{L}\p{N}.$]+/gu, '-')  // Keep letters, numbers, dots, "$"; dash-separate rest
           .replace(/^-+|-+$/g, ''); // Remove leading/trailing dashes
 
         return cleanedName + ext;

From 72f2dd1fe5f37e36d68f5e0711972f5ca1329192 Mon Sep 17 00:00:00 2001
From: Alexei Ledenev <alexei.led@gmail.com>
Date: Thu, 26 Mar 2026 22:38:09 +0200
Subject: [PATCH 10/13] fix: preserve original filename case in handelize
 (MEMORY.md not memory.md)

---
 src/store.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/store.ts b/src/store.ts
index 5cab320..e67b284 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -1595,7 +1595,6 @@ export function handelize(path: string): string {
 
   const result = path
     .replace(/___/g, '/')       // Triple underscore becomes folder separator
-    .toLowerCase()
     .split('/')
     .map((segment, idx, arr) => {
       const isLastSegment = idx === arr.length - 1;

From 792992ef653c923daca07394be2d6057665b04f0 Mon Sep 17 00:00:00 2001
From: Niven <nivsubscriber@gmail.com>
Date: Fri, 27 Mar 2026 13:10:31 -0700
Subject: [PATCH 11/13] Add rerank parameter to MCP query tool

The MCP query tool always ran LLM reranking, even for lex-only queries.
On CPU-only infrastructure (e.g. Railway), the reranker adds 60-120s
per query. The SDK and CLI already support skipping reranking, but the
MCP server did not expose this option.

Add a `rerank` boolean parameter (default: true) to the MCP query
tool's input schema, forwarded to store.search() as the existing
`rerank` option.

Fixes #477

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/mcp/server.ts | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/mcp/server.ts b/src/mcp/server.ts
index f1cc2a9..b7fada7 100644
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@@ -296,9 +296,12 @@ Intent-aware lex (C++ performance, not sports):
         intent: z.string().optional().describe(
           "Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own."
         ),
+        rerank: z.boolean().optional().default(true).describe(
+          "Rerank results using LLM (default: true). Set to false for faster results on CPU-only machines."
+        ),
       },
     },
-    async ({ searches, limit, minScore, candidateLimit, collections, intent }) => {
+    async ({ searches, limit, minScore, candidateLimit, collections, intent, rerank }) => {
       // Map to internal format
       const queries: ExpandedQuery[] = searches.map(s => ({
         type: s.type,
@@ -313,6 +316,7 @@ Intent-aware lex (C++ performance, not sports):
         collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
         limit,
         minScore,
+        rerank,
         intent,
       });
 

From cf9991cfa73af9b5a97778a8a6c7bf3556b805a0 Mon Sep 17 00:00:00 2001
From: Surma <surma@surma.dev>
Date: Fri, 27 Mar 2026 23:11:23 +0000
Subject: [PATCH 12/13] Fix flake

---
 CHANGELOG.md | 1 +
 flake.nix    | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5ace379..324617e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,7 @@
 
 ### Fixes
 
+- Fix paths in nix flake 
 - Sync stale `bun.lock` (`better-sqlite3` 11.x → 12.x). CI and release
   script now use `--frozen-lockfile` to prevent recurrence. #386
   (thanks @Mic92)
diff --git a/flake.nix b/flake.nix
index f3a1fc1..4aa95cd 100644
--- a/flake.nix
+++ b/flake.nix
@@ -48,7 +48,7 @@
             cp package.json $out/lib/qmd/
 
             makeWrapper ${pkgs.bun}/bin/bun $out/bin/qmd \
-              --add-flags "$out/lib/qmd/src/qmd.ts" \
+              --add-flags "$out/lib/qmd/src/cli/qmd.ts" \
               --set DYLD_LIBRARY_PATH "${pkgs.sqlite.out}/lib" \
               --set LD_LIBRARY_PATH "${pkgs.sqlite.out}/lib"
           '';
@@ -81,7 +81,7 @@
           shellHook = ''
             export BREW_PREFIX="''${BREW_PREFIX:-${sqliteWithExtensions.out}}"
             echo "QMD development shell"
-            echo "Run: bun src/qmd.ts <command>"
+            echo "Run: bun src/cli/qmd.ts <command>"
           '';
         };
       }

From 8d343b9da1a5e93c81f517929c94c15be17b0e82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20L=C3=BCtke?= <tobi@shopify.com>
Date: Sat, 28 Mar 2026 19:54:18 -0400
Subject: [PATCH 13/13] Update handelize tests for case/dot preservation (#475)

PR #475 changed handelize() to preserve original case and dots,
but the tests still expected lowercase output. Update assertions
to match the new behavior.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 test/store.helpers.unit.test.ts | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/test/store.helpers.unit.test.ts b/test/store.helpers.unit.test.ts
index eb7f8a6..e3c2373 100644
--- a/test/store.helpers.unit.test.ts
+++ b/test/store.helpers.unit.test.ts
@@ -114,14 +114,14 @@ describe("cleanupOrphanedVectors", () => {
 // =============================================================================
 
 describe("handelize", () => {
-  test("converts to lowercase", () => {
-    expect(handelize("README.md")).toBe("readme.md");
-    expect(handelize("MyFile.MD")).toBe("myfile.md");
+  test("preserves original case", () => {
+    expect(handelize("README.md")).toBe("README.md");
+    expect(handelize("MyFile.MD")).toBe("MyFile.MD");
   });
 
   test("preserves folder structure", () => {
     expect(handelize("a/b/c/d.md")).toBe("a/b/c/d.md");
-    expect(handelize("docs/api/README.md")).toBe("docs/api/readme.md");
+    expect(handelize("docs/api/README.md")).toBe("docs/api/README.md");
   });
 
   test("replaces non-word characters with dash", () => {
@@ -151,7 +151,7 @@ describe("handelize", () => {
   test("handles complex real-world meeting notes", () => {
     const complexName = "Money Movement Licensing Review - 2025／11／19 10:25 EST - Notes by Gemini.md";
     const result = handelize(complexName);
-    expect(result).toBe("money-movement-licensing-review-2025-11-19-10-25-est-notes-by-gemini.md");
+    expect(result).toBe("Money-Movement-Licensing-Review-2025-11-19-10-25-EST-Notes-by-Gemini.md");
     expect(result).not.toContain(" ");
     expect(result).not.toContain("／");
     expect(result).not.toContain(":");
@@ -159,7 +159,7 @@ describe("handelize", () => {
 
   test("handles unicode characters", () => {
     expect(handelize("日本語.md")).toBe("日本語.md");
-    expect(handelize("Зоны и проекты.md")).toBe("зоны-и-проекты.md");
+    expect(handelize("Зоны и проекты.md")).toBe("Зоны-и-проекты.md");
     expect(handelize("café-notes.md")).toBe("café-notes.md");
     expect(handelize("naïve.md")).toBe("naïve.md");
     expect(handelize("日本語-notes.md")).toBe("日本語-notes.md");
@@ -181,13 +181,13 @@ describe("handelize", () => {
   test("handles dates and times in filenames", () => {
     expect(handelize("meeting-2025-01-15.md")).toBe("meeting-2025-01-15.md");
     expect(handelize("notes 2025/01/15.md")).toBe("notes-2025/01/15.md");
-    expect(handelize("call_10:30_AM.md")).toBe("call-10-30-am.md");
+    expect(handelize("call_10:30_AM.md")).toBe("call-10-30-AM.md");
   });
 
   test("handles special project naming patterns", () => {
-    expect(handelize("PROJECT_ABC_v2.0.md")).toBe("project-abc-v2-0.md");
-    expect(handelize("[WIP] Feature Request.md")).toBe("wip-feature-request.md");
-    expect(handelize("(DRAFT) Proposal v1.md")).toBe("draft-proposal-v1.md");
+    expect(handelize("PROJECT_ABC_v2.0.md")).toBe("PROJECT-ABC-v2.0.md");
+    expect(handelize("[WIP] Feature Request.md")).toBe("WIP-Feature-Request.md");
+    expect(handelize("(DRAFT) Proposal v1.md")).toBe("DRAFT-Proposal-v1.md");
   });
 
   test("handles symbol-only route filenames", () => {