feat(cli): show timing for expand, embed, and rerank steps
Each LLM invocation now prints elapsed time:
Expanding query...
(4.2s) → searching 6 queries
Embedding 4 queries...
(605ms)
Reranking 40 chunks...
(3.2s)
This commit is contained in:
parent
a81f013126
commit
24ffa4876b
32
src/qmd.ts
32
src/qmd.ts
@ -81,6 +81,7 @@ import {
|
||||
import {
|
||||
getCollection as getCollectionFromYaml,
|
||||
listCollections as yamlListCollections,
|
||||
getDefaultCollectionNames,
|
||||
addContext as yamlAddContext,
|
||||
removeContext as yamlRemoveContext,
|
||||
setGlobalContext,
|
||||
@ -256,6 +257,11 @@ function formatTimeAgo(date: Date): string {
|
||||
return `${days}d ago`;
|
||||
}
|
||||
|
||||
function formatMs(ms: number): string {
|
||||
if (ms < 1000) return `${ms}ms`;
|
||||
return `${(ms / 1000).toFixed(1)}s`;
|
||||
}
|
||||
|
||||
function formatBytes(bytes: number): string {
|
||||
if (bytes < 1024) return `${bytes} B`;
|
||||
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
||||
@ -1901,7 +1907,6 @@ function outputResults(results: { file: string; displayPath: string; title: stri
|
||||
function resolveCollectionFilter(raw: string | string[] | undefined, useDefaults: boolean = false): string[] {
|
||||
// If no filter specified and useDefaults is true, use default collections
|
||||
if (!raw && useDefaults) {
|
||||
const { getDefaultCollectionNames } = require("./collections.js");
|
||||
return getDefaultCollectionNames();
|
||||
}
|
||||
if (!raw) return [];
|
||||
@ -2130,12 +2135,19 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
|
||||
limit: opts.all ? 500 : (opts.limit || 10),
|
||||
minScore: opts.minScore || 0,
|
||||
hooks: {
|
||||
onEmbedStart: (count) => {
|
||||
process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}\n`);
|
||||
},
|
||||
onEmbedDone: (ms) => {
|
||||
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
|
||||
},
|
||||
onRerankStart: (chunkCount) => {
|
||||
process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}\n`);
|
||||
progress.indeterminate();
|
||||
},
|
||||
onRerankDone: () => {
|
||||
onRerankDone: (ms) => {
|
||||
progress.clear();
|
||||
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
|
||||
},
|
||||
},
|
||||
});
|
||||
@ -2149,16 +2161,26 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
|
||||
onStrongSignal: (score) => {
|
||||
process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
|
||||
},
|
||||
onExpand: (original, expanded) => {
|
||||
onExpandStart: () => {
|
||||
process.stderr.write(`${c.dim}Expanding query...${c.reset}\n`);
|
||||
},
|
||||
onExpand: (original, expanded, ms) => {
|
||||
logExpansionTree(original, expanded);
|
||||
process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
|
||||
process.stderr.write(`${c.dim} (${formatMs(ms)}) → searching ${expanded.length + 1} queries${c.reset}\n`);
|
||||
},
|
||||
onEmbedStart: (count) => {
|
||||
process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}\n`);
|
||||
},
|
||||
onEmbedDone: (ms) => {
|
||||
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
|
||||
},
|
||||
onRerankStart: (chunkCount) => {
|
||||
process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}\n`);
|
||||
progress.indeterminate();
|
||||
},
|
||||
onRerankDone: () => {
|
||||
onRerankDone: (ms) => {
|
||||
progress.clear();
|
||||
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
33
src/store.ts
33
src/store.ts
@ -2848,12 +2848,18 @@ export function addLineNumbers(text: string, startLine: number = 1): string {
|
||||
export interface SearchHooks {
|
||||
/** BM25 probe found strong signal — expansion will be skipped */
|
||||
onStrongSignal?: (topScore: number) => void;
|
||||
/** Query expansion complete. Empty array = strong signal skip (no expansion). */
|
||||
onExpand?: (original: string, expanded: ExpandedQuery[]) => void;
|
||||
/** Query expansion starting */
|
||||
onExpandStart?: () => void;
|
||||
/** Query expansion complete. Empty array = strong signal skip. elapsedMs = time taken. */
|
||||
onExpand?: (original: string, expanded: ExpandedQuery[], elapsedMs: number) => void;
|
||||
/** Embedding starting (vec/hyde queries) */
|
||||
onEmbedStart?: (count: number) => void;
|
||||
/** Embedding complete */
|
||||
onEmbedDone?: (elapsedMs: number) => void;
|
||||
/** Reranking is about to start */
|
||||
onRerankStart?: (chunkCount: number) => void;
|
||||
/** Reranking finished */
|
||||
onRerankDone?: () => void;
|
||||
onRerankDone?: (elapsedMs: number) => void;
|
||||
}
|
||||
|
||||
export interface HybridQueryOptions {
|
||||
@ -2918,11 +2924,13 @@ export async function hybridQuery(
|
||||
if (hasStrongSignal) hooks?.onStrongSignal?.(topScore);
|
||||
|
||||
// Step 2: Expand query (or skip if strong signal)
|
||||
hooks?.onExpandStart?.();
|
||||
const expandStart = Date.now();
|
||||
const expanded = hasStrongSignal
|
||||
? []
|
||||
: await store.expandQuery(query);
|
||||
|
||||
hooks?.onExpand?.(query, expanded);
|
||||
hooks?.onExpand?.(query, expanded, Date.now() - expandStart);
|
||||
|
||||
// Seed with initial FTS results (avoid re-running original query FTS)
|
||||
if (initialFts.length > 0) {
|
||||
@ -2967,7 +2975,10 @@ export async function hybridQuery(
|
||||
// Batch embed all vector queries in a single call
|
||||
const llm = getDefaultLlamaCpp();
|
||||
const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text));
|
||||
hooks?.onEmbedStart?.(textsToEmbed.length);
|
||||
const embedStart = Date.now();
|
||||
const embeddings = await llm.embedBatch(textsToEmbed);
|
||||
hooks?.onEmbedDone?.(Date.now() - embedStart);
|
||||
|
||||
// Run sqlite-vec lookups with pre-computed embeddings
|
||||
for (let i = 0; i < vecQueries.length; i++) {
|
||||
@ -3020,8 +3031,9 @@ export async function hybridQuery(
|
||||
|
||||
// Step 6: Rerank chunks (NOT full bodies)
|
||||
hooks?.onRerankStart?.(chunksToRerank.length);
|
||||
const rerankStart = Date.now();
|
||||
const reranked = await store.rerank(query, chunksToRerank);
|
||||
hooks?.onRerankDone?.();
|
||||
hooks?.onRerankDone?.(Date.now() - rerankStart);
|
||||
|
||||
// Step 7: Blend RRF position score with reranker score
|
||||
// Position-aware weights: top retrieval results get more protection from reranker disagreement
|
||||
@ -3111,9 +3123,10 @@ export async function vectorSearchQuery(
|
||||
if (!hasVectors) return [];
|
||||
|
||||
// Expand query — filter to vec/hyde only (lex queries target FTS, not vector)
|
||||
const expandStart = Date.now();
|
||||
const allExpanded = await store.expandQuery(query);
|
||||
const vecExpanded = allExpanded.filter(q => q.type !== 'lex');
|
||||
options?.hooks?.onExpand?.(query, vecExpanded);
|
||||
options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
|
||||
|
||||
// Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
|
||||
const queryTexts = [query, ...vecExpanded.map(q => q.text)];
|
||||
@ -3260,7 +3273,10 @@ export async function structuredSearch(
|
||||
if (vecSearches.length > 0) {
|
||||
const llm = getDefaultLlamaCpp();
|
||||
const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query));
|
||||
hooks?.onEmbedStart?.(textsToEmbed.length);
|
||||
const embedStart = Date.now();
|
||||
const embeddings = await llm.embedBatch(textsToEmbed);
|
||||
hooks?.onEmbedDone?.(Date.now() - embedStart);
|
||||
|
||||
for (let i = 0; i < vecSearches.length; i++) {
|
||||
const embedding = embeddings[i]?.embedding;
|
||||
@ -3292,7 +3308,7 @@ export async function structuredSearch(
|
||||
|
||||
if (candidates.length === 0) return [];
|
||||
|
||||
hooks?.onExpand?.("", []); // Signal no expansion (pre-expanded)
|
||||
hooks?.onExpand?.("", [], 0); // Signal no expansion (pre-expanded)
|
||||
|
||||
// Step 4: Chunk documents, pick best chunk per doc for reranking
|
||||
// Use first lex query as the "query" for keyword matching, or first vec if no lex
|
||||
@ -3322,8 +3338,9 @@ export async function structuredSearch(
|
||||
|
||||
// Step 5: Rerank chunks
|
||||
hooks?.onRerankStart?.(chunksToRerank.length);
|
||||
const rerankStart2 = Date.now();
|
||||
const reranked = await store.rerank(primaryQuery, chunksToRerank);
|
||||
hooks?.onRerankDone?.();
|
||||
hooks?.onRerankDone?.(Date.now() - rerankStart2);
|
||||
|
||||
// Step 6: Blend RRF position score with reranker score
|
||||
const candidateMap = new Map(candidates.map(c => [c.file, {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user