feat: expose candidateLimit as MCP tool parameter and CLI flag
Reranking 40 chunks takes ~2 min on CPU (the default candidateLimit). The option already exists in hybridQuery()/structuredSearch() but was never surfaced to users. This adds: - `candidateLimit` param to the MCP `query` tool inputSchema - `candidateLimit` field to the REST /query endpoint - `--candidate-limit` / `-C` CLI flag for `qmd query` Default stays 40 (no behavior change). Users on CPU-only machines can lower it for a speed/recall tradeoff. Complements #231. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d6f3688d91
commit
87bd968d7b
@ -307,10 +307,13 @@ Intent-aware lex (C++ performance, not sports):
|
||||
),
|
||||
limit: z.number().optional().default(10).describe("Max results (default: 10)"),
|
||||
minScore: z.number().optional().default(0).describe("Min relevance 0-1 (default: 0)"),
|
||||
candidateLimit: z.number().optional().describe(
|
||||
"Maximum candidates to rerank (default: 40, lower = faster but may miss results)"
|
||||
),
|
||||
collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
|
||||
},
|
||||
},
|
||||
async ({ searches, limit, minScore, collections }) => {
|
||||
async ({ searches, limit, minScore, candidateLimit, collections }) => {
|
||||
// Map to internal format
|
||||
const subSearches: StructuredSubSearch[] = searches.map(s => ({
|
||||
type: s.type,
|
||||
@ -324,6 +327,7 @@ Intent-aware lex (C++ performance, not sports):
|
||||
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
|
||||
limit,
|
||||
minScore,
|
||||
candidateLimit,
|
||||
});
|
||||
|
||||
// Use first lex or vec query for snippet extraction
|
||||
@ -635,6 +639,7 @@ export async function startMcpHttpServer(port: number, options?: { quiet?: boole
|
||||
collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
|
||||
limit: params.limit ?? 10,
|
||||
minScore: params.minScore ?? 0,
|
||||
candidateLimit: params.candidateLimit,
|
||||
});
|
||||
|
||||
// Use first lex or vec query for snippet extraction
|
||||
|
||||
@ -1751,6 +1751,7 @@ type OutputOptions = {
|
||||
collection?: string | string[]; // Filter by collection name(s)
|
||||
lineNumbers?: boolean; // Add line numbers to output
|
||||
context?: string; // Optional context for query expansion
|
||||
candidateLimit?: number; // Max candidates to rerank (default: 40)
|
||||
};
|
||||
|
||||
// Highlight query terms in text (skip short words < 3 chars)
|
||||
@ -2141,6 +2142,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
|
||||
collections: singleCollection ? [singleCollection] : undefined,
|
||||
limit: opts.all ? 500 : (opts.limit || 10),
|
||||
minScore: opts.minScore || 0,
|
||||
candidateLimit: opts.candidateLimit,
|
||||
hooks: {
|
||||
onEmbedStart: (count) => {
|
||||
process.stderr.write(`${c.dim}Embedding ${count} ${count === 1 ? 'query' : 'queries'}...${c.reset}`);
|
||||
@ -2164,6 +2166,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
|
||||
collection: singleCollection,
|
||||
limit: opts.all ? 500 : (opts.limit || 10),
|
||||
minScore: opts.minScore || 0,
|
||||
candidateLimit: opts.candidateLimit,
|
||||
hooks: {
|
||||
onStrongSignal: (score) => {
|
||||
process.stderr.write(`${c.dim}Strong BM25 signal (${score.toFixed(2)}) — skipping expansion${c.reset}\n`);
|
||||
@ -2271,6 +2274,8 @@ function parseCLI() {
|
||||
from: { type: "string" }, // start line
|
||||
"max-bytes": { type: "string" }, // max bytes for multi-get
|
||||
"line-numbers": { type: "boolean" }, // add line numbers to output
|
||||
// Query options
|
||||
"candidate-limit": { type: "string", short: "C" },
|
||||
// MCP HTTP transport options
|
||||
http: { type: "boolean" },
|
||||
daemon: { type: "boolean" },
|
||||
@ -2308,6 +2313,7 @@ function parseCLI() {
|
||||
all: isAll,
|
||||
collection: values.collection as string[] | undefined,
|
||||
lineNumbers: !!values["line-numbers"],
|
||||
candidateLimit: values["candidate-limit"] ? parseInt(String(values["candidate-limit"]), 10) : undefined,
|
||||
};
|
||||
|
||||
return {
|
||||
@ -2409,6 +2415,7 @@ function showHelp(): void {
|
||||
console.log(" --all - Return all matches (pair with --min-score)");
|
||||
console.log(" --min-score <num> - Minimum similarity score");
|
||||
console.log(" --full - Output full document instead of snippet");
|
||||
console.log(" -C, --candidate-limit <n> - Max candidates to rerank (default 40, lower = faster)");
|
||||
console.log(" --line-numbers - Include line numbers in output");
|
||||
console.log(" --files | --json | --csv | --md | --xml - Output format");
|
||||
console.log(" -c, --collection <name> - Filter by one or more collections");
|
||||
|
||||
Loading…
Reference in New Issue
Block a user