From 216793380afb1b1ad4084cc8d945e23bf46285e7 Mon Sep 17 00:00:00 2001 From: Brendan McCord Date: Sun, 11 Jan 2026 22:08:32 -0600 Subject: [PATCH] Fix vsearch/query hang caused by sqlite-vec JOIN incompatibility sqlite-vec virtual tables don't work correctly with JOINs in the same query - they cause the query to hang indefinitely. Changes: - searchVec: Rewrite to use two-step approach 1. Query vectors_vec table alone (no JOINs) 2. Look up document info separately using result hash_seqs - vsearch: Change from Promise.all to sequential for loop (node-llama-cpp embedding context doesn't handle concurrent calls) This fixes vsearch and hybrid query commands that were hanging at "Searching N vector queries..." Co-Authored-By: Claude Opus 4.5 --- src/qmd.ts | 6 +++--- src/store.ts | 59 ++++++++++++++++++++++++++++++++-------------------- 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/src/qmd.ts b/src/qmd.ts index f508914..bdcac36 100755 --- a/src/qmd.ts +++ b/src/qmd.ts @@ -1972,8 +1972,8 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string = const perQueryLimit = opts.all ? 500 : 20; const allResults = new Map(); - // Use Promise.all for concurrent vector searches - await Promise.all(vectorQueries.map(async (q) => { + // Run vector searches sequentially (node-llama-cpp embedding context doesn't handle concurrent calls) + for (const q of vectorQueries) { const vecResults = await searchVec(db, q, model, perQueryLimit, collectionName as any); for (const r of vecResults) { const existing = allResults.get(r.filepath); @@ -1981,7 +1981,7 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string = allResults.set(r.filepath, { file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score, hash: r.hash }); } } - })); + } // Sort by max score and limit to requested count const results = Array.from(allResults.values()) diff --git a/src/store.ts b/src/store.ts index e14c7ae..ca34b2c 100644 --- a/src/store.ts +++ b/src/store.ts @@ -1679,48 +1679,61 @@ export async function searchVec(db: Database, query: string, model: string, limi const embedding = await getEmbedding(query, model, true); if (!embedding) return []; - // sqlite-vec requires "k = ?" for KNN queries - let sql = ` + // Step 1: Get vector matches (sqlite-vec doesn't work with JOINs) + const vecResults = db.prepare(` + SELECT hash_seq, distance + FROM vectors_vec + WHERE embedding MATCH ? AND k = ? + `).all(new Float32Array(embedding), limit * 3) as { hash_seq: string; distance: number }[]; + + if (vecResults.length === 0) return []; + + // Step 2: Get chunk info and document data + const hashSeqs = vecResults.map(r => r.hash_seq); + const distanceMap = new Map(vecResults.map(r => [r.hash_seq, r.distance])); + + // Build query for document lookup + const placeholders = hashSeqs.map(() => '?').join(','); + let docSql = ` SELECT - v.hash_seq, - v.distance, + cv.hash || '_' || cv.seq as hash_seq, + cv.hash, + cv.pos, 'qmd://' || d.collection || '/' || d.path as filepath, d.collection || '/' || d.path as display_path, d.title, - content.doc as body, - cv.hash, - cv.pos - FROM vectors_vec v - JOIN content_vectors cv ON cv.hash || '_' || cv.seq = v.hash_seq + content.doc as body + FROM content_vectors cv JOIN documents d ON d.hash = cv.hash AND d.active = 1 JOIN content ON content.hash = d.hash - WHERE v.embedding MATCH ? AND k = ? + WHERE cv.hash || '_' || cv.seq IN (${placeholders}) `; - - const params: (Float32Array | number | string)[] = [new Float32Array(embedding), limit * 3]; + const params: string[] = [...hashSeqs]; if (collectionId) { - // Filter by collection name - sql += ` AND d.collection = ?`; + docSql += ` AND d.collection = ?`; params.push(String(collectionId)); } - sql += ` ORDER BY v.distance`; + const docRows = db.prepare(docSql).all(...params) as { + hash_seq: string; hash: string; pos: number; filepath: string; + display_path: string; title: string; body: string; + }[]; - const rows = db.prepare(sql).all(...params) as { hash_seq: string; distance: number; filepath: string; display_path: string; title: string; body: string; hash: string; pos: number }[]; - - const seen = new Map(); - for (const row of rows) { + // Combine with distances and dedupe by filepath + const seen = new Map(); + for (const row of docRows) { + const distance = distanceMap.get(row.hash_seq) ?? 1; const existing = seen.get(row.filepath); - if (!existing || row.distance < existing.bestDist) { - seen.set(row.filepath, { row, bestDist: row.distance }); + if (!existing || distance < existing.bestDist) { + seen.set(row.filepath, { row, bestDist: distance }); } } return Array.from(seen.values()) .sort((a, b) => a.bestDist - b.bestDist) .slice(0, limit) - .map(({ row }) => { + .map(({ row, bestDist }) => { const collectionName = row.filepath.split('//')[1]?.split('/')[0] || ""; return { filepath: row.filepath, @@ -1733,7 +1746,7 @@ export async function searchVec(db: Database, query: string, model: string, limi bodyLength: row.body.length, body: row.body, context: getContextForFile(db, row.filepath), - score: 1 - row.distance, // Cosine similarity = 1 - cosine distance + score: 1 - bestDist, // Cosine similarity = 1 - cosine distance source: "vec" as const, chunkPos: row.pos, };