Fix vsearch/query hang caused by sqlite-vec JOIN incompatibility
sqlite-vec virtual tables don't work correctly with JOINs in the same query - they cause the query to hang indefinitely. Changes: - searchVec: Rewrite to use two-step approach 1. Query vectors_vec table alone (no JOINs) 2. Look up document info separately using result hash_seqs - vsearch: Change from Promise.all to sequential for loop (node-llama-cpp embedding context doesn't handle concurrent calls) This fixes vsearch and hybrid query commands that were hanging at "Searching N vector queries..." Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
5fa66fd228
commit
216793380a
@ -1972,8 +1972,8 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
|
||||
const perQueryLimit = opts.all ? 500 : 20;
|
||||
const allResults = new Map<string, { file: string; displayPath: string; title: string; body: string; score: number; hash: string }>();
|
||||
|
||||
// Use Promise.all for concurrent vector searches
|
||||
await Promise.all(vectorQueries.map(async (q) => {
|
||||
// Run vector searches sequentially (node-llama-cpp embedding context doesn't handle concurrent calls)
|
||||
for (const q of vectorQueries) {
|
||||
const vecResults = await searchVec(db, q, model, perQueryLimit, collectionName as any);
|
||||
for (const r of vecResults) {
|
||||
const existing = allResults.get(r.filepath);
|
||||
@ -1981,7 +1981,7 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
|
||||
allResults.set(r.filepath, { file: r.filepath, displayPath: r.displayPath, title: r.title, body: r.body || "", score: r.score, hash: r.hash });
|
||||
}
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
// Sort by max score and limit to requested count
|
||||
const results = Array.from(allResults.values())
|
||||
|
||||
59
src/store.ts
59
src/store.ts
@ -1679,48 +1679,61 @@ export async function searchVec(db: Database, query: string, model: string, limi
|
||||
const embedding = await getEmbedding(query, model, true);
|
||||
if (!embedding) return [];
|
||||
|
||||
// sqlite-vec requires "k = ?" for KNN queries
|
||||
let sql = `
|
||||
// Step 1: Get vector matches (sqlite-vec doesn't work with JOINs)
|
||||
const vecResults = db.prepare(`
|
||||
SELECT hash_seq, distance
|
||||
FROM vectors_vec
|
||||
WHERE embedding MATCH ? AND k = ?
|
||||
`).all(new Float32Array(embedding), limit * 3) as { hash_seq: string; distance: number }[];
|
||||
|
||||
if (vecResults.length === 0) return [];
|
||||
|
||||
// Step 2: Get chunk info and document data
|
||||
const hashSeqs = vecResults.map(r => r.hash_seq);
|
||||
const distanceMap = new Map(vecResults.map(r => [r.hash_seq, r.distance]));
|
||||
|
||||
// Build query for document lookup
|
||||
const placeholders = hashSeqs.map(() => '?').join(',');
|
||||
let docSql = `
|
||||
SELECT
|
||||
v.hash_seq,
|
||||
v.distance,
|
||||
cv.hash || '_' || cv.seq as hash_seq,
|
||||
cv.hash,
|
||||
cv.pos,
|
||||
'qmd://' || d.collection || '/' || d.path as filepath,
|
||||
d.collection || '/' || d.path as display_path,
|
||||
d.title,
|
||||
content.doc as body,
|
||||
cv.hash,
|
||||
cv.pos
|
||||
FROM vectors_vec v
|
||||
JOIN content_vectors cv ON cv.hash || '_' || cv.seq = v.hash_seq
|
||||
content.doc as body
|
||||
FROM content_vectors cv
|
||||
JOIN documents d ON d.hash = cv.hash AND d.active = 1
|
||||
JOIN content ON content.hash = d.hash
|
||||
WHERE v.embedding MATCH ? AND k = ?
|
||||
WHERE cv.hash || '_' || cv.seq IN (${placeholders})
|
||||
`;
|
||||
|
||||
const params: (Float32Array | number | string)[] = [new Float32Array(embedding), limit * 3];
|
||||
const params: string[] = [...hashSeqs];
|
||||
|
||||
if (collectionId) {
|
||||
// Filter by collection name
|
||||
sql += ` AND d.collection = ?`;
|
||||
docSql += ` AND d.collection = ?`;
|
||||
params.push(String(collectionId));
|
||||
}
|
||||
|
||||
sql += ` ORDER BY v.distance`;
|
||||
const docRows = db.prepare(docSql).all(...params) as {
|
||||
hash_seq: string; hash: string; pos: number; filepath: string;
|
||||
display_path: string; title: string; body: string;
|
||||
}[];
|
||||
|
||||
const rows = db.prepare(sql).all(...params) as { hash_seq: string; distance: number; filepath: string; display_path: string; title: string; body: string; hash: string; pos: number }[];
|
||||
|
||||
const seen = new Map<string, { row: typeof rows[0]; bestDist: number }>();
|
||||
for (const row of rows) {
|
||||
// Combine with distances and dedupe by filepath
|
||||
const seen = new Map<string, { row: typeof docRows[0]; bestDist: number }>();
|
||||
for (const row of docRows) {
|
||||
const distance = distanceMap.get(row.hash_seq) ?? 1;
|
||||
const existing = seen.get(row.filepath);
|
||||
if (!existing || row.distance < existing.bestDist) {
|
||||
seen.set(row.filepath, { row, bestDist: row.distance });
|
||||
if (!existing || distance < existing.bestDist) {
|
||||
seen.set(row.filepath, { row, bestDist: distance });
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(seen.values())
|
||||
.sort((a, b) => a.bestDist - b.bestDist)
|
||||
.slice(0, limit)
|
||||
.map(({ row }) => {
|
||||
.map(({ row, bestDist }) => {
|
||||
const collectionName = row.filepath.split('//')[1]?.split('/')[0] || "";
|
||||
return {
|
||||
filepath: row.filepath,
|
||||
@ -1733,7 +1746,7 @@ export async function searchVec(db: Database, query: string, model: string, limi
|
||||
bodyLength: row.body.length,
|
||||
body: row.body,
|
||||
context: getContextForFile(db, row.filepath),
|
||||
score: 1 - row.distance, // Cosine similarity = 1 - cosine distance
|
||||
score: 1 - bestDist, // Cosine similarity = 1 - cosine distance
|
||||
source: "vec" as const,
|
||||
chunkPos: row.pos,
|
||||
};
|
||||
|
||||
Loading…
Reference in New Issue
Block a user