feat: expose candidateLimit as MCP tool parameter and CLI flag

Reranking 40 chunks takes ~2 min on CPU (the default candidateLimit).
The option already exists in hybridQuery()/structuredSearch() but was
never surfaced to users. This adds:

- `candidateLimit` param to the MCP `query` tool inputSchema
- `candidateLimit` field to the REST /query endpoint
- `--candidate-limit` / `-C` CLI flag for `qmd query`

Default stays 40 (no behavior change). Users on CPU-only machines can
lower it for a speed/recall tradeoff. Complements #231.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Andreas Spannagel 2026-02-24 14:13:49 +01:00
parent d6f3688d91
commit 87bd968d7b
2 changed files with 13 additions and 1 deletions

View File

@ -307,10 +307,13 @@ Intent-aware lex (C++ performance, not sports):
),
limit: z.number().optional().default(10).describe("Max results (default: 10)"),
minScore: z.number().optional().default(0).describe("Min relevance 0-1 (default: 0)"),
candidateLimit: z.number().optional().describe(
"Maximum candidates to rerank (default: 40, lower = faster but may miss results)"
),
collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
},
},
async ({ searches, limit, minScore, collections }) => {
async ({ searches, limit, minScore, candidateLimit, collections }) => {
// Map to internal format
const subSearches: StructuredSubSearch[] = searches.map(s => ({
type: s.type,
@ -324,6 +327,7 @@ Intent-aware lex (C++ performance, not sports):
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
limit,
minScore,
candidateLimit,
});
// Use first lex or vec query for snippet extraction
@ -635,6 +639,7 @@ export async function startMcpHttpServer(port: number, options?: { quiet?: boole
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
limit: params.limit ?? 10,
minScore: params.minScore ?? 0,
candidateLimit: params.candidateLimit,
});
// Use first lex or vec query for snippet extraction

View File

@ -1751,6 +1751,7 @@ type OutputOptions = {
collection?: string | string[]; // Filter by collection name(s)
lineNumbers?: boolean; // Add line numbers to output
context?: string; // Optional context for query expansion
candidateLimit?: number; // Max candidates to rerank (default: 40)
};
// Highlight query terms in text (skip short words < 3 chars)
@ -2141,6 +2142,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
collections: singleCollection ? [singleCollection] : undefined,
limit: opts.all ? 500 : (opts.limit || 10),
minScore: opts.minScore || 0,
candidateLimit: opts.candidateLimit,
hooks: {
onEmbedStart: (count) => {
process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
@ -2164,6 +2166,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
collection: singleCollection,
limit: opts.all ? 500 : (opts.limit || 10),
minScore: opts.minScore || 0,
candidateLimit: opts.candidateLimit,
hooks: {
onStrongSignal: (score) => {
process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
@ -2271,6 +2274,8 @@ function parseCLI() {
from: { type: "string" }, // start line
"max-bytes": { type: "string" }, // max bytes for multi-get
"line-numbers": { type: "boolean" }, // add line numbers to output
// Query options
"candidate-limit": { type: "string", short: "C" },
// MCP HTTP transport options
http: { type: "boolean" },
daemon: { type: "boolean" },
@ -2308,6 +2313,7 @@ function parseCLI() {
all: isAll,
collection: values.collection as string[] | undefined,
lineNumbers: !!values["line-numbers"],
candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
};
return {
@ -2409,6 +2415,7 @@ function showHelp(): void {
console.log(" --all - Return all matches (pair with --min-score)");
console.log(" --min-score <num> - Minimum similarity score");
console.log(" --full - Output full document instead of snippet");
console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
console.log(" --line-numbers - Include line numbers in output");
console.log(" --files | --json | --csv | --md | --xml - Output format");
console.log(" -c, --collection <name> - Filter by one or more collections");