feat(query): add --explain score traces for hybrid search
This commit is contained in:
parent
7904ab9a9d
commit
b068ad0dd6
@ -2,6 +2,12 @@
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Changes
|
||||
|
||||
- Query: add `--explain` for `qmd query` to expose retrieval score traces
|
||||
in JSON and CLI output. Includes backend scores (FTS/vector), per-list
|
||||
RRF contributions, top-rank bonus, reranker score, and final blended score.
|
||||
|
||||
## [1.1.1] - 2026-03-06
|
||||
|
||||
### Fixes
|
||||
|
||||
@ -388,6 +388,7 @@ qmd query "user authentication"
|
||||
--min-score <num> # Minimum score threshold (default: 0)
|
||||
--full # Show full document content
|
||||
--line-numbers # Add line numbers to output
|
||||
--explain # Include retrieval score traces (query, JSON/CLI output)
|
||||
--index <name> # Use named index
|
||||
|
||||
# Output formats (for search and multi-get)
|
||||
@ -450,6 +451,9 @@ qmd search --md --full "error handling"
|
||||
# JSON output for scripting
|
||||
qmd query --json "quarterly reports"
|
||||
|
||||
# Inspect how each result was scored (RRF + rerank blend)
|
||||
qmd query --json --explain "quarterly reports"
|
||||
|
||||
# Use separate index for different knowledge base
|
||||
qmd --index work search "quarterly reports"
|
||||
```
|
||||
|
||||
50
src/qmd.ts
50
src/qmd.ts
@ -62,6 +62,7 @@ import {
|
||||
structuredSearch,
|
||||
addLineNumbers,
|
||||
type ExpandedQuery,
|
||||
type HybridQueryExplain,
|
||||
type StructuredSubSearch,
|
||||
DEFAULT_EMBED_MODEL,
|
||||
DEFAULT_RERANK_MODEL,
|
||||
@ -1767,6 +1768,7 @@ type OutputOptions = {
|
||||
all?: boolean;
|
||||
collection?: string | string[]; // Filter by collection name(s)
|
||||
lineNumbers?: boolean; // Add line numbers to output
|
||||
explain?: boolean; // Include retrieval score traces (query only)
|
||||
context?: string; // Optional context for query expansion
|
||||
candidateLimit?: number; // Max candidates to rerank (default: 40)
|
||||
};
|
||||
@ -1792,6 +1794,10 @@ function formatScore(score: number): string {
|
||||
return `${c.dim}${pct}%${c.reset}`;
|
||||
}
|
||||
|
||||
function formatExplainNumber(value: number): string {
|
||||
return value.toFixed(4);
|
||||
}
|
||||
|
||||
// Shorten directory path for display - relative to $HOME (used for context paths, not documents)
|
||||
function shortPath(dirpath: string): string {
|
||||
const home = homedir();
|
||||
@ -1828,7 +1834,20 @@ function printEmptySearchResults(format: OutputFormat, reason: EmptySearchReason
|
||||
console.log("No results found.");
|
||||
}
|
||||
|
||||
function outputResults(results: { file: string; displayPath: string; title: string; body: string; score: number; context?: string | null; chunkPos?: number; hash?: string; docid?: string }[], query: string, opts: OutputOptions): void {
|
||||
type OutputRow = {
|
||||
file: string;
|
||||
displayPath: string;
|
||||
title: string;
|
||||
body: string;
|
||||
score: number;
|
||||
context?: string | null;
|
||||
chunkPos?: number;
|
||||
hash?: string;
|
||||
docid?: string;
|
||||
explain?: HybridQueryExplain;
|
||||
};
|
||||
|
||||
function outputResults(results: OutputRow[], query: string, opts: OutputOptions): void {
|
||||
const filtered = results.filter(r => r.score >= opts.minScore).slice(0, opts.limit);
|
||||
|
||||
if (filtered.length === 0) {
|
||||
@ -1857,6 +1876,7 @@ function outputResults(results: { file: string; displayPath: string; title: stri
|
||||
...(row.context && { context: row.context }),
|
||||
...(body && { body }),
|
||||
...(snippet && { snippet }),
|
||||
...(opts.explain && row.explain && { explain: row.explain }),
|
||||
};
|
||||
});
|
||||
console.log(JSON.stringify(output, null, 2));
|
||||
@ -1896,6 +1916,28 @@ function outputResults(results: { file: string; displayPath: string; title: stri
|
||||
// Line 4: Score
|
||||
const score = formatScore(row.score);
|
||||
console.log(`Score: ${c.bold}${score}${c.reset}`);
|
||||
if (opts.explain && row.explain) {
|
||||
const explain = row.explain;
|
||||
const ftsScores = explain.ftsScores.length > 0
|
||||
? explain.ftsScores.map(formatExplainNumber).join(", ")
|
||||
: "none";
|
||||
const vecScores = explain.vectorScores.length > 0
|
||||
? explain.vectorScores.map(formatExplainNumber).join(", ")
|
||||
: "none";
|
||||
const contribSummary = explain.rrf.contributions
|
||||
.slice()
|
||||
.sort((a, b) => b.rrfContribution - a.rrfContribution)
|
||||
.slice(0, 3)
|
||||
.map(c => `${c.source}/${c.queryType}#${c.rank}:${formatExplainNumber(c.rrfContribution)}`)
|
||||
.join(" | ");
|
||||
|
||||
console.log(`${c.dim}Explain: fts=[${ftsScores}] vec=[${vecScores}]${c.reset}`);
|
||||
console.log(`${c.dim} RRF: total=${formatExplainNumber(explain.rrf.totalScore)} base=${formatExplainNumber(explain.rrf.baseScore)} bonus=${formatExplainNumber(explain.rrf.topRankBonus)} rank=${explain.rrf.rank}${c.reset}`);
|
||||
console.log(`${c.dim} Blend: ${Math.round(explain.rrf.weight * 100)}%*${formatExplainNumber(explain.rrf.positionScore)} + ${Math.round((1 - explain.rrf.weight) * 100)}%*${formatExplainNumber(explain.rerankScore)} = ${formatExplainNumber(explain.blendedScore)}${c.reset}`);
|
||||
if (contribSummary.length > 0) {
|
||||
console.log(`${c.dim} Top RRF contributions: ${contribSummary}${c.reset}`);
|
||||
}
|
||||
}
|
||||
console.log();
|
||||
|
||||
// Snippet with highlighting (diff-style header included)
|
||||
@ -2179,6 +2221,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
|
||||
limit: opts.all ? 500 : (opts.limit || 10),
|
||||
minScore: opts.minScore || 0,
|
||||
candidateLimit: opts.candidateLimit,
|
||||
explain: !!opts.explain,
|
||||
hooks: {
|
||||
onEmbedStart: (count) => {
|
||||
process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
|
||||
@ -2203,6 +2246,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
|
||||
limit: opts.all ? 500 : (opts.limit || 10),
|
||||
minScore: opts.minScore || 0,
|
||||
candidateLimit: opts.candidateLimit,
|
||||
explain: !!opts.explain,
|
||||
hooks: {
|
||||
onStrongSignal: (score) => {
|
||||
process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
|
||||
@ -2263,6 +2307,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
|
||||
score: r.score,
|
||||
context: r.context,
|
||||
docid: r.docid,
|
||||
explain: r.explain,
|
||||
})), displayQuery, { ...opts, limit: results.length });
|
||||
}, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
|
||||
}
|
||||
@ -2292,6 +2337,7 @@ function parseCLI() {
|
||||
xml: { type: "boolean" },
|
||||
files: { type: "boolean" },
|
||||
json: { type: "boolean" },
|
||||
explain: { type: "boolean" },
|
||||
collection: { type: "string", short: "c", multiple: true }, // Filter by collection(s)
|
||||
// Collection options
|
||||
name: { type: "string" }, // collection name
|
||||
@ -2346,6 +2392,7 @@ function parseCLI() {
|
||||
collection: values.collection as string[] | undefined,
|
||||
lineNumbers: !!values["line-numbers"],
|
||||
candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
|
||||
explain: !!values.explain,
|
||||
};
|
||||
|
||||
return {
|
||||
@ -2449,6 +2496,7 @@ function showHelp(): void {
|
||||
console.log(" --full - Output full document instead of snippet");
|
||||
console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
|
||||
console.log(" --line-numbers - Include line numbers in output");
|
||||
console.log(" --explain - Include retrieval score traces (query --json/CLI)");
|
||||
console.log(" --files | --json | --csv | --md | --xml - Output format");
|
||||
console.log(" -c, --collection <name> - Filter by one or more collections");
|
||||
console.log("");
|
||||
|
||||
178
src/store.ts
178
src/store.ts
@ -1040,6 +1040,41 @@ export type RankedResult = {
|
||||
score: number;
|
||||
};
|
||||
|
||||
export type RRFContributionTrace = {
|
||||
listIndex: number;
|
||||
source: "fts" | "vec";
|
||||
queryType: "original" | "lex" | "vec" | "hyde";
|
||||
query: string;
|
||||
rank: number; // 1-indexed rank within list
|
||||
weight: number;
|
||||
backendScore: number; // Backend-normalized score before fusion
|
||||
rrfContribution: number; // weight / (k + rank)
|
||||
};
|
||||
|
||||
export type RRFScoreTrace = {
|
||||
contributions: RRFContributionTrace[];
|
||||
baseScore: number; // Sum of reciprocal-rank contributions
|
||||
topRank: number; // Best (lowest) rank seen across lists
|
||||
topRankBonus: number; // +0.05 for rank 1, +0.02 for rank 2-3
|
||||
totalScore: number; // baseScore + topRankBonus
|
||||
};
|
||||
|
||||
export type HybridQueryExplain = {
|
||||
ftsScores: number[];
|
||||
vectorScores: number[];
|
||||
rrf: {
|
||||
rank: number; // Rank after RRF fusion (1-indexed)
|
||||
positionScore: number; // 1 / rank used in position-aware blending
|
||||
weight: number; // Position-aware RRF weight (0.75 / 0.60 / 0.40)
|
||||
baseScore: number;
|
||||
topRankBonus: number;
|
||||
totalScore: number;
|
||||
contributions: RRFContributionTrace[];
|
||||
};
|
||||
rerankScore: number;
|
||||
blendedScore: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Error result when document is not found
|
||||
*/
|
||||
@ -2430,6 +2465,72 @@ export function reciprocalRankFusion(
|
||||
.map(e => ({ ...e.result, score: e.rrfScore }));
|
||||
}
|
||||
|
||||
/**
|
||||
* Build per-document RRF contribution traces for explain/debug output.
|
||||
*/
|
||||
export function buildRrfTrace(
|
||||
resultLists: RankedResult[][],
|
||||
weights: number[] = [],
|
||||
listMeta: RankedListMeta[] = [],
|
||||
k: number = 60
|
||||
): Map<string, RRFScoreTrace> {
|
||||
const traces = new Map<string, RRFScoreTrace>();
|
||||
|
||||
for (let listIdx = 0; listIdx < resultLists.length; listIdx++) {
|
||||
const list = resultLists[listIdx];
|
||||
if (!list) continue;
|
||||
const weight = weights[listIdx] ?? 1.0;
|
||||
const meta = listMeta[listIdx] ?? {
|
||||
source: "fts",
|
||||
queryType: "original",
|
||||
query: "",
|
||||
} as const;
|
||||
|
||||
for (let rank0 = 0; rank0 < list.length; rank0++) {
|
||||
const result = list[rank0];
|
||||
if (!result) continue;
|
||||
const rank = rank0 + 1; // 1-indexed rank for explain output
|
||||
const contribution = weight / (k + rank);
|
||||
const existing = traces.get(result.file);
|
||||
|
||||
const detail: RRFContributionTrace = {
|
||||
listIndex: listIdx,
|
||||
source: meta.source,
|
||||
queryType: meta.queryType,
|
||||
query: meta.query,
|
||||
rank,
|
||||
weight,
|
||||
backendScore: result.score,
|
||||
rrfContribution: contribution,
|
||||
};
|
||||
|
||||
if (existing) {
|
||||
existing.baseScore += contribution;
|
||||
existing.topRank = Math.min(existing.topRank, rank);
|
||||
existing.contributions.push(detail);
|
||||
} else {
|
||||
traces.set(result.file, {
|
||||
contributions: [detail],
|
||||
baseScore: contribution,
|
||||
topRank: rank,
|
||||
topRankBonus: 0,
|
||||
totalScore: 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const trace of traces.values()) {
|
||||
let bonus = 0;
|
||||
if (trace.topRank === 1) bonus = 0.05;
|
||||
else if (trace.topRank <= 3) bonus = 0.02;
|
||||
trace.topRankBonus = bonus;
|
||||
trace.totalScore = trace.baseScore + bonus;
|
||||
}
|
||||
|
||||
return traces;
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Document retrieval
|
||||
// =============================================================================
|
||||
@ -2891,6 +2992,7 @@ export interface HybridQueryOptions {
|
||||
limit?: number; // default 10
|
||||
minScore?: number; // default 0
|
||||
candidateLimit?: number; // default RERANK_CANDIDATE_LIMIT
|
||||
explain?: boolean; // include backend/RRF/rerank score traces
|
||||
hooks?: SearchHooks;
|
||||
}
|
||||
|
||||
@ -2904,8 +3006,15 @@ export interface HybridQueryResult {
|
||||
score: number; // blended score (full precision)
|
||||
context: string | null; // user-set context
|
||||
docid: string; // content hash prefix (6 chars)
|
||||
explain?: HybridQueryExplain;
|
||||
}
|
||||
|
||||
export type RankedListMeta = {
|
||||
source: "fts" | "vec";
|
||||
queryType: "original" | "lex" | "vec" | "hyde";
|
||||
query: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Hybrid search: BM25 + vector + query expansion + RRF + chunked reranking.
|
||||
*
|
||||
@ -2928,9 +3037,11 @@ export async function hybridQuery(
|
||||
const minScore = options?.minScore ?? 0;
|
||||
const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
|
||||
const collection = options?.collection;
|
||||
const explain = options?.explain ?? false;
|
||||
const hooks = options?.hooks;
|
||||
|
||||
const rankedLists: RankedResult[][] = [];
|
||||
const rankedListMeta: RankedListMeta[] = [];
|
||||
const docidMap = new Map<string, string>(); // filepath -> docid
|
||||
const hasVectors = !!store.db.prepare(
|
||||
`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
|
||||
@ -2963,6 +3074,7 @@ export async function hybridQuery(
|
||||
file: r.filepath, displayPath: r.displayPath,
|
||||
title: r.title, body: r.body || "", score: r.score,
|
||||
})));
|
||||
rankedListMeta.push({ source: "fts", queryType: "original", query });
|
||||
}
|
||||
|
||||
// Step 3: Route searches by query type
|
||||
@ -2981,18 +3093,19 @@ export async function hybridQuery(
|
||||
file: r.filepath, displayPath: r.displayPath,
|
||||
title: r.title, body: r.body || "", score: r.score,
|
||||
})));
|
||||
rankedListMeta.push({ source: "fts", queryType: "lex", query: q.text });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 3b: Collect all texts that need vector search (original query + vec/hyde expansions)
|
||||
if (hasVectors) {
|
||||
const vecQueries: { text: string; isOriginal: boolean }[] = [
|
||||
{ text: query, isOriginal: true },
|
||||
const vecQueries: { text: string; queryType: "original" | "vec" | "hyde" }[] = [
|
||||
{ text: query, queryType: "original" },
|
||||
];
|
||||
for (const q of expanded) {
|
||||
if (q.type === 'vec' || q.type === 'hyde') {
|
||||
vecQueries.push({ text: q.text, isOriginal: false });
|
||||
vecQueries.push({ text: q.text, queryType: q.type });
|
||||
}
|
||||
}
|
||||
|
||||
@ -3019,6 +3132,11 @@ export async function hybridQuery(
|
||||
file: r.filepath, displayPath: r.displayPath,
|
||||
title: r.title, body: r.body || "", score: r.score,
|
||||
})));
|
||||
rankedListMeta.push({
|
||||
source: "vec",
|
||||
queryType: vecQueries[i]!.queryType,
|
||||
query: vecQueries[i]!.text,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3026,6 +3144,7 @@ export async function hybridQuery(
|
||||
// Step 4: RRF fusion — first 2 lists (original FTS + first vec) get 2x weight
|
||||
const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0);
|
||||
const fused = reciprocalRankFusion(rankedLists, weights);
|
||||
const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null;
|
||||
const candidates = fused.slice(0, candidateLimit);
|
||||
|
||||
if (candidates.length === 0) return [];
|
||||
@ -3080,6 +3199,22 @@ export async function hybridQuery(
|
||||
const bestIdx = chunkInfo?.bestIdx ?? 0;
|
||||
const bestChunk = chunkInfo?.chunks[bestIdx]?.text || candidate?.body || "";
|
||||
const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0;
|
||||
const trace = rrfTraceByFile?.get(r.file);
|
||||
const explainData: HybridQueryExplain | undefined = explain ? {
|
||||
ftsScores: trace?.contributions.filter(c => c.source === "fts").map(c => c.backendScore) ?? [],
|
||||
vectorScores: trace?.contributions.filter(c => c.source === "vec").map(c => c.backendScore) ?? [],
|
||||
rrf: {
|
||||
rank: rrfRank,
|
||||
positionScore: rrfScore,
|
||||
weight: rrfWeight,
|
||||
baseScore: trace?.baseScore ?? 0,
|
||||
topRankBonus: trace?.topRankBonus ?? 0,
|
||||
totalScore: trace?.totalScore ?? 0,
|
||||
contributions: trace?.contributions ?? [],
|
||||
},
|
||||
rerankScore: r.score,
|
||||
blendedScore,
|
||||
} : undefined;
|
||||
|
||||
return {
|
||||
file: r.file,
|
||||
@ -3091,6 +3226,7 @@ export async function hybridQuery(
|
||||
score: blendedScore,
|
||||
context: store.getContextForFile(r.file),
|
||||
docid: docidMap.get(r.file) || "",
|
||||
...(explainData ? { explain: explainData } : {}),
|
||||
};
|
||||
}).sort((a, b) => b.score - a.score);
|
||||
|
||||
@ -3201,6 +3337,7 @@ export interface StructuredSearchOptions {
|
||||
limit?: number; // default 10
|
||||
minScore?: number; // default 0
|
||||
candidateLimit?: number; // default RERANK_CANDIDATE_LIMIT
|
||||
explain?: boolean; // include backend/RRF/rerank score traces
|
||||
/** Future: domain intent hint for routing/boosting */
|
||||
intent?: string;
|
||||
hooks?: SearchHooks;
|
||||
@ -3232,6 +3369,7 @@ export async function structuredSearch(
|
||||
const limit = options?.limit ?? 10;
|
||||
const minScore = options?.minScore ?? 0;
|
||||
const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
|
||||
const explain = options?.explain ?? false;
|
||||
const hooks = options?.hooks;
|
||||
|
||||
const collections = options?.collections;
|
||||
@ -3258,6 +3396,7 @@ export async function structuredSearch(
|
||||
}
|
||||
|
||||
const rankedLists: RankedResult[][] = [];
|
||||
const rankedListMeta: RankedListMeta[] = [];
|
||||
const docidMap = new Map<string, string>(); // filepath -> docid
|
||||
const hasVectors = !!store.db.prepare(
|
||||
`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
|
||||
@ -3277,6 +3416,11 @@ export async function structuredSearch(
|
||||
file: r.filepath, displayPath: r.displayPath,
|
||||
title: r.title, body: r.body || "", score: r.score,
|
||||
})));
|
||||
rankedListMeta.push({
|
||||
source: "fts",
|
||||
queryType: "lex",
|
||||
query: search.query,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3284,7 +3428,10 @@ export async function structuredSearch(
|
||||
|
||||
// Step 2: Batch embed and run vector searches for vec/hyde
|
||||
if (hasVectors) {
|
||||
const vecSearches = searches.filter(s => s.type === 'vec' || s.type === 'hyde');
|
||||
const vecSearches = searches.filter(
|
||||
(s): s is StructuredSubSearch & { type: 'vec' | 'hyde' } =>
|
||||
s.type === 'vec' || s.type === 'hyde'
|
||||
);
|
||||
if (vecSearches.length > 0) {
|
||||
const llm = getDefaultLlamaCpp();
|
||||
const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query));
|
||||
@ -3308,6 +3455,11 @@ export async function structuredSearch(
|
||||
file: r.filepath, displayPath: r.displayPath,
|
||||
title: r.title, body: r.body || "", score: r.score,
|
||||
})));
|
||||
rankedListMeta.push({
|
||||
source: "vec",
|
||||
queryType: vecSearches[i]!.type,
|
||||
query: vecSearches[i]!.query,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -3319,6 +3471,7 @@ export async function structuredSearch(
|
||||
// Step 3: RRF fusion — first list gets 2x weight (assume caller ordered by importance)
|
||||
const weights = rankedLists.map((_, i) => i === 0 ? 2.0 : 1.0);
|
||||
const fused = reciprocalRankFusion(rankedLists, weights);
|
||||
const rrfTraceByFile = explain ? buildRrfTrace(rankedLists, weights, rankedListMeta) : null;
|
||||
const candidates = fused.slice(0, candidateLimit);
|
||||
|
||||
if (candidates.length === 0) return [];
|
||||
@ -3377,6 +3530,22 @@ export async function structuredSearch(
|
||||
const bestIdx = chunkInfo?.bestIdx ?? 0;
|
||||
const bestChunk = chunkInfo?.chunks[bestIdx]?.text || candidate?.body || "";
|
||||
const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0;
|
||||
const trace = rrfTraceByFile?.get(r.file);
|
||||
const explainData: HybridQueryExplain | undefined = explain ? {
|
||||
ftsScores: trace?.contributions.filter(c => c.source === "fts").map(c => c.backendScore) ?? [],
|
||||
vectorScores: trace?.contributions.filter(c => c.source === "vec").map(c => c.backendScore) ?? [],
|
||||
rrf: {
|
||||
rank: rrfRank,
|
||||
positionScore: rrfScore,
|
||||
weight: rrfWeight,
|
||||
baseScore: trace?.baseScore ?? 0,
|
||||
topRankBonus: trace?.topRankBonus ?? 0,
|
||||
totalScore: trace?.totalScore ?? 0,
|
||||
contributions: trace?.contributions ?? [],
|
||||
},
|
||||
rerankScore: r.score,
|
||||
blendedScore,
|
||||
} : undefined;
|
||||
|
||||
return {
|
||||
file: r.file,
|
||||
@ -3388,6 +3557,7 @@ export async function structuredSearch(
|
||||
score: blendedScore,
|
||||
context: store.getContextForFile(r.file),
|
||||
docid: docidMap.get(r.file) || "",
|
||||
...(explainData ? { explain: explainData } : {}),
|
||||
};
|
||||
}).sort((a, b) => b.score - a.score);
|
||||
|
||||
|
||||
55
test/rrf-trace.test.ts
Normal file
55
test/rrf-trace.test.ts
Normal file
@ -0,0 +1,55 @@
|
||||
import { describe, expect, test } from "vitest";
|
||||
import { buildRrfTrace, reciprocalRankFusion, type RankedResult } from "../src/store";
|
||||
|
||||
describe("buildRrfTrace", () => {
|
||||
test("matches reciprocalRankFusion totals and records per-list contributions", () => {
|
||||
const list1: RankedResult[] = [
|
||||
{ file: "qmd://docs/a.md", displayPath: "docs/a.md", title: "A", body: "", score: 0.92 },
|
||||
{ file: "qmd://docs/b.md", displayPath: "docs/b.md", title: "B", body: "", score: 0.81 },
|
||||
];
|
||||
const list2: RankedResult[] = [
|
||||
{ file: "qmd://docs/b.md", displayPath: "docs/b.md", title: "B", body: "", score: 0.77 },
|
||||
{ file: "qmd://docs/a.md", displayPath: "docs/a.md", title: "A", body: "", score: 0.65 },
|
||||
];
|
||||
|
||||
const weights = [2.0, 1.0];
|
||||
const traces = buildRrfTrace(
|
||||
[list1, list2],
|
||||
weights,
|
||||
[
|
||||
{ source: "fts", queryType: "lex", query: "lex query" },
|
||||
{ source: "vec", queryType: "vec", query: "vec query" },
|
||||
]
|
||||
);
|
||||
const fused = reciprocalRankFusion([list1, list2], weights);
|
||||
|
||||
for (const result of fused) {
|
||||
const trace = traces.get(result.file);
|
||||
expect(trace).toBeDefined();
|
||||
expect(trace!.totalScore).toBeCloseTo(result.score, 10);
|
||||
}
|
||||
|
||||
const aTrace = traces.get("qmd://docs/a.md")!;
|
||||
expect(aTrace.contributions).toHaveLength(2);
|
||||
expect(aTrace.contributions[0]?.source).toBe("fts");
|
||||
expect(aTrace.contributions[1]?.source).toBe("vec");
|
||||
expect(aTrace.topRank).toBe(1);
|
||||
expect(aTrace.topRankBonus).toBeCloseTo(0.05, 10);
|
||||
});
|
||||
|
||||
test("applies top-rank bonus thresholds correctly", () => {
|
||||
const list: RankedResult[] = [
|
||||
{ file: "qmd://docs/r1.md", displayPath: "docs/r1.md", title: "R1", body: "", score: 0.9 },
|
||||
{ file: "qmd://docs/r2.md", displayPath: "docs/r2.md", title: "R2", body: "", score: 0.8 },
|
||||
{ file: "qmd://docs/r3.md", displayPath: "docs/r3.md", title: "R3", body: "", score: 0.7 },
|
||||
{ file: "qmd://docs/r4.md", displayPath: "docs/r4.md", title: "R4", body: "", score: 0.6 },
|
||||
];
|
||||
|
||||
const traces = buildRrfTrace([list], [1.0], [{ source: "fts", queryType: "lex", query: "rank" }]);
|
||||
|
||||
expect(traces.get("qmd://docs/r1.md")?.topRankBonus).toBeCloseTo(0.05, 10);
|
||||
expect(traces.get("qmd://docs/r2.md")?.topRankBonus).toBeCloseTo(0.02, 10);
|
||||
expect(traces.get("qmd://docs/r3.md")?.topRankBonus).toBeCloseTo(0.02, 10);
|
||||
expect(traces.get("qmd://docs/r4.md")?.topRankBonus).toBeCloseTo(0.0, 10);
|
||||
});
|
||||
});
|
||||
Loading…
Reference in New Issue
Block a user