Fix vsearch/query hang caused by sqlite-vec JOIN incompatibility

sqlite-vec virtual tables don't work correctly with JOINs in the same
query - they cause the query to hang indefinitely.

Changes:
- searchVec: Rewrite to use two-step approach
  1. Query vectors_vec table alone (no JOINs)
  2. Look up document info separately using result hash_seqs
- vsearch: Change from Promise.all to sequential for loop
  (node-llama-cpp embedding context doesn't handle concurrent calls)

This fixes vsearch and hybrid query commands that were hanging at
"Searching N vector queries..."

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Brendan McCord 2026-01-11 22:08:32 -06:00
parent 5fa66fd228
commit 216793380a
2 changed files with 39 additions and 26 deletions

View File

@ -1972,8 +1972,8 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
const perQueryLimit = opts.all ? 500 : 20;
const allResults = new Map<string, { file: string; displayPath: string; title: string; body: string; score: number; hash: string }>();
// Use Promise.all for concurrent vector searches
await Promise.all(vectorQueries.map(async (q) => {
// Run vector searches sequentially (node-llama-cpp embedding context doesn't handle concurrent calls)
for (const q of vectorQueries) {
const vecResults = await searchVec(db, q, model, perQueryLimit, collectionName as any);
for (const r of vecResults) {
const existing = allResults.get(r.filepath);
@ -1981,7 +1981,7 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
allResults.set(r.filepath, { file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score, hash: r.hash });
}
}
}));
}
// Sort by max score and limit to requested count
const results = Array.from(allResults.values())

View File

@ -1679,48 +1679,61 @@ export async function searchVec(db: Database, query: string, model: string, limi
const embedding = await getEmbedding(query, model, true);
if (!embedding) return [];
// sqlite-vec requires "k = ?" for KNN queries
let sql = `
// Step 1: Get vector matches (sqlite-vec doesn't work with JOINs)
const vecResults = db.prepare(`
SELECT hash_seq, distance
FROM vectors_vec
WHERE embedding MATCH ? AND k = ?
`).all(new Float32Array(embedding), limit * 3) as { hash_seq: string; distance: number }[];
if (vecResults.length === 0) return [];
// Step 2: Get chunk info and document data
const hashSeqs = vecResults.map(r => r.hash_seq);
const distanceMap = new Map(vecResults.map(r => [r.hash_seq, r.distance]));
// Build query for document lookup
const placeholders = hashSeqs.map(() => '?').join(',');
let docSql = `
SELECT
v.hash_seq,
v.distance,
cv.hash || '_' || cv.seq as hash_seq,
cv.hash,
cv.pos,
'qmd://' || d.collection || '/' || d.path as filepath,
d.collection || '/' || d.path as display_path,
d.title,
content.doc as body,
cv.hash,
cv.pos
FROM vectors_vec v
JOIN content_vectors cv ON cv.hash || '_' || cv.seq = v.hash_seq
content.doc as body
FROM content_vectors cv
JOIN documents d ON d.hash = cv.hash AND d.active = 1
JOIN content ON content.hash = d.hash
WHERE v.embedding MATCH ? AND k = ?
WHERE cv.hash || '_' || cv.seq IN (${placeholders})
`;
const params: (Float32Array | number | string)[] = [new Float32Array(embedding), limit * 3];
const params: string[] = [...hashSeqs];
if (collectionId) {
// Filter by collection name
sql += ` AND d.collection = ?`;
docSql += ` AND d.collection = ?`;
params.push(String(collectionId));
}
sql += ` ORDER BY v.distance`;
const docRows = db.prepare(docSql).all(...params) as {
hash_seq: string; hash: string; pos: number; filepath: string;
display_path: string; title: string; body: string;
}[];
const rows = db.prepare(sql).all(...params) as { hash_seq: string; distance: number; filepath: string; display_path: string; title: string; body: string; hash: string; pos: number }[];
const seen = new Map<string, { row: typeof rows[0]; bestDist: number }>();
for (const row of rows) {
// Combine with distances and dedupe by filepath
const seen = new Map<string, { row: typeof docRows[0]; bestDist: number }>();
for (const row of docRows) {
const distance = distanceMap.get(row.hash_seq) ?? 1;
const existing = seen.get(row.filepath);
if (!existing || row.distance < existing.bestDist) {
seen.set(row.filepath, { row, bestDist: row.distance });
if (!existing || distance < existing.bestDist) {
seen.set(row.filepath, { row, bestDist: distance });
}
}
return Array.from(seen.values())
.sort((a, b) => a.bestDist - b.bestDist)
.slice(0, limit)
.map(({ row }) => {
.map(({ row, bestDist }) => {
const collectionName = row.filepath.split('//')[1]?.split('/')[0] || "";
return {
filepath: row.filepath,
@ -1733,7 +1746,7 @@ export async function searchVec(db: Database, query: string, model: string, limi
bodyLength: row.body.length,
body: row.body,
context: getContextForFile(db, row.filepath),
score: 1 - row.distance, // Cosine similarity = 1 - cosine distance
score: 1 - bestDist, // Cosine similarity = 1 - cosine distance
source: "vec" as const,
chunkPos: row.pos,
};