feat(cli): add --no-rerank flag to skip reranking in qmd query

Exposes the existing skipRerank option as a --no-rerank CLI flag for
qmd query. On CPU-only machines, reranking takes 120s+ for 20 chunks -
this flag lets users get RRF-fused results without the reranking penalty.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Matt Van Horn 2026-03-10 23:41:43 -07:00
parent ae3604cb88
commit 11b3f17fba

View File

@ -1724,6 +1724,7 @@ type OutputOptions = {
context?: string; // Optional context for query expansion
candidateLimit?: number; // Max candidates to rerank (default: 40)
intent?: string; // Domain intent for disambiguation
skipRerank?: boolean; // Skip LLM reranking, use RRF scores only
};
// Highlight query terms in text (skip short words < 3 chars)
@ -2206,6 +2207,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
limit: opts.all ? 500 : (opts.limit || 10),
minScore: opts.minScore || 0,
candidateLimit: opts.candidateLimit,
skipRerank: opts.skipRerank,
explain: !!opts.explain,
intent,
hooks: {
@ -2232,6 +2234,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
limit: opts.all ? 500 : (opts.limit || 10),
minScore: opts.minScore || 0,
candidateLimit: opts.candidateLimit,
skipRerank: opts.skipRerank,
explain: !!opts.explain,
intent,
hooks: {
@ -2344,6 +2347,7 @@ function parseCLI() {
"line-numbers": { type: "boolean" }, // add line numbers to output
// Query options
"candidate-limit": { type: "string", short: "C" },
"no-rerank": { type: "boolean", default: false },
intent: { type: "string" },
// MCP HTTP transport options
http: { type: "boolean" },
@ -2383,6 +2387,7 @@ function parseCLI() {
collection: values.collection as string[] | undefined,
lineNumbers: !!values["line-numbers"],
candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
skipRerank: !!values["no-rerank"],
explain: !!values.explain,
intent: values.intent as string | undefined,
};
@ -2599,6 +2604,7 @@ function showHelp(): void {
console.log(" --min-score <num> - Minimum similarity score");
console.log(" --full - Output full document instead of snippet");
console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
console.log(" --no-rerank - Skip LLM reranking (use RRF scores only, much faster on CPU)");
console.log(" --line-numbers - Include line numbers in output");
console.log(" --explain - Include retrieval score traces (query --json/CLI)");
console.log(" --files | --json | --csv | --md | --xml - Output format");