feat(cli): show timing for expand, embed, and rerank steps

Each LLM invocation now prints elapsed time:
  Expanding query...
    (4.2s) → searching 6 queries
  Embedding 4 queries...
    (605ms)
  Reranking 40 chunks...
    (3.2s)
This commit is contained in:
Tobi Lütke 2026-02-19 06:08:02 -05:00
parent a81f013126
commit 24ffa4876b
No known key found for this signature in database
2 changed files with 52 additions and 13 deletions

View File

@ -81,6 +81,7 @@ import {
import {
getCollection as getCollectionFromYaml,
listCollections as yamlListCollections,
getDefaultCollectionNames,
addContext as yamlAddContext,
removeContext as yamlRemoveContext,
setGlobalContext,
@ -256,6 +257,11 @@ function formatTimeAgo(date: Date): string {
return `${days}d ago`;
}
function formatMs(ms: number): string {
if (ms < 1000) return `${ms}ms`;
return `${(ms / 1000).toFixed(1)}s`;
}
function formatBytes(bytes: number): string {
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
@ -1901,7 +1907,6 @@ function outputResults(results: { file: string; displayPath: string; title: stri
function resolveCollectionFilter(raw: string | string[] | undefined, useDefaults: boolean = false): string[] {
// If no filter specified and useDefaults is true, use default collections
if (!raw && useDefaults) {
const { getDefaultCollectionNames } = require("./collections.js");
return getDefaultCollectionNames();
}
if (!raw) return [];
@ -2130,12 +2135,19 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
limit: opts.all ? 500 : (opts.limit || 10),
minScore: opts.minScore || 0,
hooks: {
onEmbedStart: (count) => {
process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}\n`);
},
onEmbedDone: (ms) => {
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
},
onRerankStart: (chunkCount) => {
process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}\n`);
progress.indeterminate();
},
onRerankDone: () => {
onRerankDone: (ms) => {
progress.clear();
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
},
},
});
@ -2149,16 +2161,26 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
onStrongSignal: (score) => {
process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
},
onExpand: (original, expanded) => {
onExpandStart: () => {
process.stderr.write(`${c.dim}Expanding query...${c.reset}\n`);
},
onExpand: (original, expanded, ms) => {
logExpansionTree(original, expanded);
process.stderr.write(`${c.dim}Searching ${expanded.length + 1} queries...${c.reset}\n`);
process.stderr.write(`${c.dim} (${formatMs(ms)}) → searching ${expanded.length + 1} queries${c.reset}\n`);
},
onEmbedStart: (count) => {
process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}\n`);
},
onEmbedDone: (ms) => {
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
},
onRerankStart: (chunkCount) => {
process.stderr.write(`${c.dim}Reranking ${chunkCount} chunks...${c.reset}\n`);
progress.indeterminate();
},
onRerankDone: () => {
onRerankDone: (ms) => {
progress.clear();
process.stderr.write(`${c.dim} (${formatMs(ms)})${c.reset}\n`);
},
},
});

View File

@ -2848,12 +2848,18 @@ export function addLineNumbers(text: string, startLine: number = 1): string {
export interface SearchHooks {
/** BM25 probe found strong signal — expansion will be skipped */
onStrongSignal?: (topScore: number) => void;
/** Query expansion complete. Empty array = strong signal skip (no expansion). */
onExpand?: (original: string, expanded: ExpandedQuery[]) => void;
/** Query expansion starting */
onExpandStart?: () => void;
/** Query expansion complete. Empty array = strong signal skip. elapsedMs = time taken. */
onExpand?: (original: string, expanded: ExpandedQuery[], elapsedMs: number) => void;
/** Embedding starting (vec/hyde queries) */
onEmbedStart?: (count: number) => void;
/** Embedding complete */
onEmbedDone?: (elapsedMs: number) => void;
/** Reranking is about to start */
onRerankStart?: (chunkCount: number) => void;
/** Reranking finished */
onRerankDone?: () => void;
onRerankDone?: (elapsedMs: number) => void;
}
export interface HybridQueryOptions {
@ -2918,11 +2924,13 @@ export async function hybridQuery(
if (hasStrongSignal) hooks?.onStrongSignal?.(topScore);
// Step 2: Expand query (or skip if strong signal)
hooks?.onExpandStart?.();
const expandStart = Date.now();
const expanded = hasStrongSignal
? []
: await store.expandQuery(query);
hooks?.onExpand?.(query, expanded);
hooks?.onExpand?.(query, expanded, Date.now() - expandStart);
// Seed with initial FTS results (avoid re-running original query FTS)
if (initialFts.length > 0) {
@ -2967,7 +2975,10 @@ export async function hybridQuery(
// Batch embed all vector queries in a single call
const llm = getDefaultLlamaCpp();
const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text));
hooks?.onEmbedStart?.(textsToEmbed.length);
const embedStart = Date.now();
const embeddings = await llm.embedBatch(textsToEmbed);
hooks?.onEmbedDone?.(Date.now() - embedStart);
// Run sqlite-vec lookups with pre-computed embeddings
for (let i = 0; i < vecQueries.length; i++) {
@ -3020,8 +3031,9 @@ export async function hybridQuery(
// Step 6: Rerank chunks (NOT full bodies)
hooks?.onRerankStart?.(chunksToRerank.length);
const rerankStart = Date.now();
const reranked = await store.rerank(query, chunksToRerank);
hooks?.onRerankDone?.();
hooks?.onRerankDone?.(Date.now() - rerankStart);
// Step 7: Blend RRF position score with reranker score
// Position-aware weights: top retrieval results get more protection from reranker disagreement
@ -3111,9 +3123,10 @@ export async function vectorSearchQuery(
if (!hasVectors) return [];
// Expand query — filter to vec/hyde only (lex queries target FTS, not vector)
const expandStart = Date.now();
const allExpanded = await store.expandQuery(query);
const vecExpanded = allExpanded.filter(q => q.type !== 'lex');
options?.hooks?.onExpand?.(query, vecExpanded);
options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
// Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
const queryTexts = [query, ...vecExpanded.map(q => q.text)];
@ -3260,7 +3273,10 @@ export async function structuredSearch(
if (vecSearches.length > 0) {
const llm = getDefaultLlamaCpp();
const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query));
hooks?.onEmbedStart?.(textsToEmbed.length);
const embedStart = Date.now();
const embeddings = await llm.embedBatch(textsToEmbed);
hooks?.onEmbedDone?.(Date.now() - embedStart);
for (let i = 0; i < vecSearches.length; i++) {
const embedding = embeddings[i]?.embedding;
@ -3292,7 +3308,7 @@ export async function structuredSearch(
if (candidates.length === 0) return [];
hooks?.onExpand?.("", []); // Signal no expansion (pre-expanded)
hooks?.onExpand?.("", [], 0); // Signal no expansion (pre-expanded)
// Step 4: Chunk documents, pick best chunk per doc for reranking
// Use first lex query as the "query" for keyword matching, or first vec if no lex
@ -3322,8 +3338,9 @@ export async function structuredSearch(
// Step 5: Rerank chunks
hooks?.onRerankStart?.(chunksToRerank.length);
const rerankStart2 = Date.now();
const reranked = await store.rerank(primaryQuery, chunksToRerank);
hooks?.onRerankDone?.();
hooks?.onRerankDone?.(Date.now() - rerankStart2);
// Step 6: Blend RRF position score with reranker score
const candidateMap = new Map(candidates.map(c => [c.file, {