diff --git a/skills/qmd/SKILL.md b/skills/qmd/SKILL.md index 6ad55fb..1bdb2a7 100644 --- a/skills/qmd/SKILL.md +++ b/skills/qmd/SKILL.md @@ -5,7 +5,7 @@ license: MIT compatibility: Requires qmd CLI or MCP server. Install via `bun install -g https://github.com/tobi/qmd`. metadata: author: tobi - version: "1.1.1" + version: "1.2.0" allowed-tools: Bash(qmd:*), mcp__qmd__* --- @@ -15,137 +15,150 @@ QMD is a local, on-device search engine for markdown content. It indexes your no ## QMD Status -!`qmd status 2>/dev/null || echo "Not installed. Run: bun install -g https://github.com/tobi/qmd"` +!`qmd status 2>/dev/null || echo "Not installed. See installation instructions below."` -## When to Use This Skill +## Installation -- User asks to search their notes, documents, or knowledge base -- User needs to find information in their markdown files -- User wants to retrieve specific documents or search across collections -- User asks "what did I write about X" or "find my notes on Y" -- User needs semantic search (conceptual similarity) not just keyword matching -- User mentions meeting notes, transcripts, or documentation lookup - -## Search Commands - -Choose the right search mode for the task: - -| Command | Use When | Speed | -|---------|----------|-------| -| `qmd search` | Exact keyword matches needed | Fast | -| `qmd vsearch` | Keywords aren't working, need conceptual matches | Medium | -| `qmd query` | Best results needed, speed not critical | Slower | +### Install QMD ```bash -# Fast keyword search (BM25) -qmd search "your query" +# Install globally with bun +bun install -g https://github.com/tobi/qmd -# Semantic vector search (finds conceptually similar content) +# Or with npm +npm install -g https://github.com/tobi/qmd +``` + +### Configure MCP Server + +**Claude Code** — add to `~/.claude/settings.json`: +```json +{ + "mcpServers": { + "qmd": { "command": "qmd", "args": ["mcp"] } + } +} +``` + +**Claude Desktop** — add to `~/Library/Application Support/Claude/claude_desktop_config.json`: +```json +{ + "mcpServers": { + "qmd": { "command": "qmd", "args": ["mcp"] } + } +} +``` + +**OpenClaw** — add to `~/.openclaw/openclaw.json` under `mcp.servers`: +```json +{ + "mcp": { + "servers": { + "qmd": { "command": "qmd", "args": ["mcp"] } + } + } +} +``` + +### Index Your Content + +```bash +# Add a collection (indexes all markdown files) +qmd collection add ~/Documents/notes --name notes + +# Generate embeddings for semantic search +qmd embed + +# Check status +qmd status +``` + +## Search Strategy — Use `structured_search` + +**You are a capable LLM.** Use `structured_search` instead of `deep_search` — you generate better query expansions than the local model. + +### How structured_search Works + +You provide 2-4 sub-searches, each with a type: + +| Type | Purpose | Example | +|------|---------|---------| +| `lex` | BM25 keywords — exact terms, names, identifiers | `"CAP theorem consistency"` | +| `vec` | Semantic — natural language questions | `"what is the tradeoff between consistency and availability"` | +| `hyde` | Hypothetical document — what the answer looks like | `"The CAP theorem states that distributed systems can only guarantee two of three properties..."` | + +### Example: Finding CAP Theorem Docs + +```json +{ + "searches": [ + { "type": "lex", "query": "CAP theorem consistency availability partition" }, + { "type": "vec", "query": "distributed systems tradeoff between data consistency and availability" }, + { "type": "hyde", "query": "The CAP theorem proves that a distributed system cannot simultaneously provide consistency, availability, and partition tolerance. You must choose two." } + ], + "limit": 10 +} +``` + +### Guidelines for Query Expansion + +1. **lex queries**: 2-5 keyword terms. Include synonyms and related terms. +2. **vec queries**: Full natural language questions. Be specific. +3. **hyde queries**: 50-100 words. Write what the answer *looks like*, not the question. +4. **Order matters**: First search gets 2x weight in fusion. + +### When to Use Each Search Type + +| Situation | Approach | +|-----------|----------| +| Know exact terms (names, code, acronyms) | Start with `lex` | +| Conceptual search, don't know vocabulary | Lead with `vec` | +| Complex topic, want best recall | Use all three types | +| Quick lookup | Single `lex` query is fine | + +## MCP Tools Reference + +| Tool | Speed | Use Case | +|------|-------|----------| +| `structured_search` | ~5s | **Recommended** — you provide query expansions | +| `search` | ~30ms | Fast keyword lookup (BM25) | +| `vector_search` | ~2s | Semantic similarity | +| `deep_search` | ~10s | Auto-expands query (uses small local model) | +| `get` | instant | Retrieve doc by path or `#docid` | +| `multi_get` | instant | Retrieve multiple docs | +| `status` | instant | Index health | + +## CLI Fallback + +If MCP isn't configured, use the CLI: + +```bash +# Keyword search +qmd search "your query" -n 10 + +# Semantic search qmd vsearch "your query" -# Hybrid search with re-ranking (best quality) +# Hybrid with re-ranking (auto-expands) qmd query "your query" -``` -## Common Options - -```bash --n # Number of results (default: 5) --c, --collection # Restrict to specific collection ---all # Return all matches ---min-score # Minimum score threshold (0.0-1.0) ---full # Show full document content ---json # JSON output for processing ---files # List files with scores ---line-numbers # Add line numbers to output -``` - -## Document Retrieval - -```bash -# Get document by path -qmd get "collection/path/to/doc.md" - -# Get document by docid (shown in search results as #abc123) -qmd get "#abc123" - -# Get with line numbers for code review -qmd get "docs/api.md" --line-numbers - -# Get multiple documents by glob pattern -qmd multi-get "docs/*.md" - -# Get multiple documents by list -qmd multi-get "doc1.md, doc2.md, #abc123" -``` - -## Index Management - -```bash -# Check index status and available collections -qmd status - -# List all collections -qmd collection list - -# List files in a collection -qmd ls - -# Update index (re-scan files for changes) -qmd update +# Retrieve document +qmd get "#abc123" --full ``` ## Score Interpretation -| Score | Meaning | Action | -|-------|---------|--------| -| 0.8 - 1.0 | Highly relevant | Show to user | -| 0.5 - 0.8 | Moderately relevant | Include if few results | -| 0.2 - 0.5 | Somewhat relevant | Only if user wants more | -| 0.0 - 0.2 | Low relevance | Usually skip | +| Score | Meaning | +|-------|---------| +| 0.8+ | Highly relevant — show to user | +| 0.5-0.8 | Moderately relevant — include if few results | +| 0.2-0.5 | Weak match — only if user wants more | +| <0.2 | Skip | -## Recommended Workflow +## Workflow Example -1. **Check what's available**: `qmd status` -2. **Start with keyword search**: `qmd search "topic" -n 10` -3. **Try semantic if needed**: `qmd vsearch "describe the concept"` -4. **Use hybrid for best results**: `qmd query "question" --min-score 0.4` -5. **Retrieve full documents**: `qmd get "#docid" --full` - -## Example: Finding Meeting Notes - -```bash -# Search for meetings about a topic -qmd search "quarterly review" -c meetings -n 5 - -# Get semantic matches -qmd vsearch "performance discussion" -c meetings - -# Retrieve the full meeting notes -qmd get "#abc123" --full -``` - -## Example: Research Across All Notes - -```bash -# Hybrid search for best results -qmd query "authentication implementation" --min-score 0.3 --json - -# Get all relevant files for deeper analysis -qmd query "auth flow" --all --files --min-score 0.4 -``` - -## MCP Server Integration - -This plugin configures the qmd MCP server automatically. When available, prefer MCP tools over Bash for tighter integration: - -| MCP Tool | Equivalent CLI | Purpose | -|----------|---------------|---------| -| `qmd_search` | `qmd search` | Fast BM25 keyword search | -| `qmd_vector_search` | `qmd vsearch` | Semantic vector search | -| `qmd_deep_search` | `qmd query` | Deep search with expansion and reranking | -| `qmd_get` | `qmd get` | Retrieve document by path or docid | -| `qmd_multi_get` | `qmd multi-get` | Retrieve multiple documents | -| `qmd_status` | `qmd status` | Index health and collection info | - -For manual MCP setup without the plugin, see [references/mcp-setup.md](references/mcp-setup.md). +1. **Check collections**: `qmd status` or `status` tool +2. **Search with structured_search**: Generate lex + vec + hyde queries +3. **Review results**: Check scores and snippets +4. **Retrieve full docs**: Use `get` with `#docid` from results +5. **Iterate**: Refine queries based on what you find diff --git a/skills/qmd/references/mcp-setup.md b/skills/qmd/references/mcp-setup.md index 2a6a626..e2d55ff 100644 --- a/skills/qmd/references/mcp-setup.md +++ b/skills/qmd/references/mcp-setup.md @@ -1,10 +1,24 @@ # QMD MCP Server Setup -Manual MCP configuration for use without the qmd plugin. +## Quick Start -> **Note**: If using the qmd plugin, MCP configuration is included automatically. This is only needed for manual setup. +1. **Install QMD** + ```bash + bun install -g https://github.com/tobi/qmd + # or: npm install -g https://github.com/tobi/qmd + ``` -## Claude Code +2. **Configure your client** (see below) + +3. **Index your content** + ```bash + qmd collection add ~/path/to/markdown --name myknowledge + qmd embed # Generate embeddings for semantic search + ``` + +## Client Configuration + +### Claude Code Add to `~/.claude/settings.json`: @@ -19,9 +33,9 @@ Add to `~/.claude/settings.json`: } ``` -## Claude Desktop +### Claude Desktop -Add to `~/Library/Application Support/Claude/claude_desktop_config.json`: +Add to `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS) or `%APPDATA%\Claude\claude_desktop_config.json` (Windows): ```json { @@ -34,79 +48,132 @@ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`: } ``` -## Available MCP Tools +### OpenClaw -Once configured, these tools become available: +Add to `~/.openclaw/openclaw.json`: -### qmd_search -Fast BM25 keyword search. +```json +{ + "mcp": { + "servers": { + "qmd": { + "command": "qmd", + "args": ["mcp"] + } + } + } +} +``` -**Parameters:** -- `query` (required): Search query string -- `collection` (optional): Restrict to specific collection -- `limit` (optional): Number of results (default: 5) -- `minScore` (optional): Minimum relevance score +### HTTP Mode (for remote/multi-client) -### qmd_vector_search -Semantic vector search for conceptual similarity. +```bash +# Start HTTP server (default port 8181) +qmd mcp --http -**Parameters:** -- `query` (required): Search query string -- `collection` (optional): Restrict to specific collection -- `limit` (optional): Number of results (default: 5) -- `minScore` (optional): Minimum relevance score +# Or as a background daemon +qmd mcp --http --daemon -### qmd_deep_search -Hybrid search combining BM25, vector search, and LLM re-ranking. +# Stop daemon +qmd mcp stop +``` -**Parameters:** -- `query` (required): Search query string -- `collection` (optional): Restrict to specific collection -- `limit` (optional): Number of results (default: 5) -- `minScore` (optional): Minimum relevance score +## MCP Tools + +### structured_search ⭐ Recommended + +Execute pre-expanded search queries. **Use this** — you're a capable LLM that generates better query expansions than the local model. + +```json +{ + "searches": [ + { "type": "lex", "query": "keyword phrases here" }, + { "type": "vec", "query": "natural language question" }, + { "type": "hyde", "query": "A hypothetical answer passage..." } + ], + "limit": 10, + "collection": "optional-filter", + "minScore": 0.0 +} +``` + +**Search types:** +- `lex` — BM25 keyword search. Short phrases, 2-5 terms. +- `vec` — Semantic vector search. Natural language questions. +- `hyde` — Hypothetical document. Write what the answer looks like (50-100 words). + +### search + +Fast BM25 keyword search (~30ms). + +| Parameter | Type | Description | +|-----------|------|-------------| +| `query` | string | Search query | +| `collection` | string? | Filter by collection | +| `limit` | number? | Max results (default: 5) | +| `minScore` | number? | Min relevance 0-1 | + +### vector_search + +Semantic similarity search (~2s). + +| Parameter | Type | Description | +|-----------|------|-------------| +| `query` | string | Natural language query | +| `collection` | string? | Filter by collection | +| `limit` | number? | Max results (default: 5) | +| `minScore` | number? | Min relevance 0-1 | + +### deep_search + +Hybrid search with automatic query expansion (~10s). Uses a small local model to expand your query. **Prefer `structured_search`** — you generate better expansions. + +| Parameter | Type | Description | +|-----------|------|-------------| +| `query` | string | Search query | +| `collection` | string? | Filter by collection | +| `limit` | number? | Max results (default: 5) | +| `minScore` | number? | Min relevance 0-1 | + +### get -### qmd_get Retrieve a document by path or docid. -**Parameters:** -- `path` (required): Document path or docid (e.g., `#abc123`) -- `full` (optional): Return full content (default: true) -- `lineNumbers` (optional): Include line numbers +| Parameter | Type | Description | +|-----------|------|-------------| +| `path` | string | File path or `#docid` | +| `full` | boolean? | Return full content | +| `lineNumbers` | boolean? | Add line numbers | -### qmd_multi_get -Retrieve multiple documents. +### multi_get -**Parameters:** -- `pattern` (required): Glob pattern or comma-separated list -- `maxBytes` (optional): Skip files larger than this (default: 10KB) +Retrieve multiple documents by glob or list. -### qmd_status -Get index health and collection information. +| Parameter | Type | Description | +|-----------|------|-------------| +| `pattern` | string | Glob pattern or comma-separated paths/docids | +| `maxBytes` | number? | Skip files larger than this (default: 10KB) | -**Parameters:** None +### status + +Get index health and collection info. No parameters. ## Troubleshooting -### MCP server not starting -- Ensure qmd is in your PATH: `which qmd` -- Try running `qmd mcp` manually to see errors -- Check that Bun is installed: `bun --version` +**MCP server not starting** +- Check qmd is in PATH: `which qmd` +- Run manually to see errors: `qmd mcp` +- Verify bun installed: `bun --version` -### No results returned -- Verify collections exist: `qmd collection list` -- Check index status: `qmd status` -- Ensure embeddings are generated: `qmd embed` +**No results / empty index** +- Check collections: `qmd collection list` +- Verify status: `qmd status` +- Generate embeddings: `qmd embed` -### Slow searches -- For faster results, use `qmd_search` instead of `qmd_deep_search` -- The first search may be slow while models load (~3GB) -- Subsequent searches are much faster +**Slow first search** +- Normal — models load on first use (~3GB) +- Subsequent searches are fast -## Choosing Between CLI and MCP - -| Scenario | Recommendation | -|----------|---------------| -| MCP configured | Use `qmd_*` tools directly | -| No MCP | Use Bash with `qmd` commands | -| Complex pipelines | Bash may be more flexible | -| Simple lookups | MCP tools are cleaner | +**structured_search not found** +- Update QMD: `bun install -g https://github.com/tobi/qmd` +- Requires v1.0.7+ diff --git a/src/mcp.ts b/src/mcp.ts index fa67487..02807c7 100644 --- a/src/mcp.ts +++ b/src/mcp.ts @@ -21,9 +21,10 @@ import { addLineNumbers, hybridQuery, vectorSearchQuery, + structuredSearch, DEFAULT_MULTI_GET_MAX_BYTES, } from "./store.js"; -import type { Store } from "./store.js"; +import type { Store, StructuredSubSearch } from "./store.js"; import { getCollection, getGlobalContext } from "./collections.js"; import { disposeDefaultLlamaCpp } from "./llm.js"; @@ -123,9 +124,15 @@ function buildInstructions(store: Store): string { // Tool schemas describe parameters; instructions describe strategy. lines.push(""); lines.push("Search:"); - lines.push(" - `search` (~30ms) — keyword and exact phrase matching."); - lines.push(" - `vector_search` (~2s) — meaning-based, finds adjacent concepts even when vocabulary differs."); - lines.push(" - `deep_search` (~10s) — auto-expands the query into variations, searches each by keyword and meaning, reranks for top hits."); + lines.push(" - `search` (~30ms) — BM25 keyword matching. Fast, exact terms."); + lines.push(" - `vector_search` (~2s) — semantic search. Finds synonyms and related concepts."); + lines.push(" - `deep_search` (~10s) — auto-expands query + reranks. Use when you don't know the exact terms."); + lines.push(" - `structured_search` (~5s) — YOU provide the query variations. Best for complex/nuanced queries."); + lines.push(""); + lines.push("For structured_search, pass 2-4 sub-searches:"); + lines.push(" - type:'lex' for keyword phrases (BM25)"); + lines.push(" - type:'vec' for semantic questions"); + lines.push(" - type:'hyde' for hypothetical answer snippets"); // --- Retrieval workflow --- lines.push(""); @@ -350,6 +357,85 @@ function createMcpServer(store: Store): McpServer { } ); + // --------------------------------------------------------------------------- + // Tool: qmd_structured_search (Pre-expanded queries from LLM) + // --------------------------------------------------------------------------- + + const subSearchSchema = z.object({ + type: z.enum(['lex', 'vec', 'hyde']).describe( + "Search type: 'lex' = BM25 keyword search (exact terms, fast), " + + "'vec' = semantic vector search (meaning-based, finds synonyms/paraphrases), " + + "'hyde' = hypothetical document (imagine what the answer looks like)" + ), + query: z.string().describe("The search query text"), + }); + + server.registerTool( + "structured_search", + { + title: "Structured Search", + description: `Execute pre-expanded search queries. Skips internal query expansion — you provide the search variations directly. + +**When to use:** You're an LLM that can generate better query expansions than a small local model. Pass 2-4 sub-searches for best results. + +**Search types:** +- \`lex\`: BM25 keyword search. Use short keyword phrases (2-5 terms). Good for exact terms, names, code identifiers. +- \`vec\`: Semantic vector search. Use natural language questions or descriptions. Finds documents with similar meaning even when vocabulary differs. +- \`hyde\`: Hypothetical document. Write a short passage (~50-100 words) that looks like what you're searching for. Powerful for finding conceptually similar content. + +**Example:** To find CAP theorem docs, pass: +- { type: "lex", query: "CAP theorem consistency availability" } +- { type: "vec", query: "what is the tradeoff between data consistency and system availability in distributed systems" } +- { type: "hyde", query: "The CAP theorem states that a distributed system can only guarantee two of three properties: Consistency, Availability, and Partition tolerance." }`, + annotations: { readOnlyHint: true, openWorldHint: false }, + inputSchema: { + searches: z.array(subSearchSchema).min(1).max(10).describe( + "Array of sub-searches to execute. Order matters — first search gets higher weight in fusion." + ), + limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"), + minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"), + collection: z.string().optional().describe("Filter to a specific collection by name"), + intent: z.string().optional().describe("(Future) Domain intent hint, e.g., 'distributed systems', 'startup finances'"), + }, + }, + async ({ searches, limit, minScore, collection, intent }) => { + // Map to internal format + const subSearches: StructuredSubSearch[] = searches.map(s => ({ + type: s.type, + query: s.query, + })); + + const results = await structuredSearch(store, subSearches, { + collection, + limit, + minScore, + intent, + }); + + // Use first lex or vec query for snippet extraction + const primaryQuery = searches.find(s => s.type === 'lex')?.query + || searches.find(s => s.type === 'vec')?.query + || searches[0]?.query || ""; + + const filtered: SearchResultItem[] = results.map(r => { + const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300); + return { + docid: `#${r.docid}`, + file: r.displayPath, + title: r.title, + score: Math.round(r.score * 100) / 100, + context: r.context, + snippet: addLineNumbers(snippet, line), + }; + }); + + return { + content: [{ type: "text", text: formatSearchSummary(filtered, primaryQuery) }], + structuredContent: { results: filtered }, + }; + } + ); + // --------------------------------------------------------------------------- // Tool: qmd_get (Retrieve document) // --------------------------------------------------------------------------- @@ -609,6 +695,54 @@ export async function startMcpHttpServer(port: number, options?: { quiet?: boole return; } + // REST endpoint: POST /search — structured search without MCP protocol + if (pathname === "/search" && nodeReq.method === "POST") { + const rawBody = await collectBody(nodeReq); + const params = JSON.parse(rawBody); + + // Validate required fields + if (!params.searches || !Array.isArray(params.searches)) { + nodeRes.writeHead(400, { "Content-Type": "application/json" }); + nodeRes.end(JSON.stringify({ error: "Missing required field: searches (array)" })); + return; + } + + // Map to internal format + const subSearches: StructuredSubSearch[] = params.searches.map((s: any) => ({ + type: s.type as 'lex' | 'vec' | 'hyde', + query: String(s.query || ""), + })); + + const results = await structuredSearch(store, subSearches, { + collection: params.collection, + limit: params.limit ?? 10, + minScore: params.minScore ?? 0, + intent: params.intent, + }); + + // Use first lex or vec query for snippet extraction + const primaryQuery = params.searches.find((s: any) => s.type === 'lex')?.query + || params.searches.find((s: any) => s.type === 'vec')?.query + || params.searches[0]?.query || ""; + + const formatted = results.map(r => { + const { line, snippet } = extractSnippet(r.bestChunk, primaryQuery, 300); + return { + docid: `#${r.docid}`, + file: r.displayPath, + title: r.title, + score: Math.round(r.score * 100) / 100, + context: r.context, + snippet: addLineNumbers(snippet, line), + }; + }); + + nodeRes.writeHead(200, { "Content-Type": "application/json" }); + nodeRes.end(JSON.stringify({ results: formatted })); + log(`${ts()} POST /search ${params.searches.length} queries (${Date.now() - reqStart}ms)`); + return; + } + if (pathname === "/mcp" && nodeReq.method === "POST") { const rawBody = await collectBody(nodeReq); const body = JSON.parse(rawBody); diff --git a/src/store.ts b/src/store.ts index b68f8c0..fbdafc6 100644 --- a/src/store.ts +++ b/src/store.ts @@ -3055,3 +3055,194 @@ export async function vectorSearchQuery( .filter(r => r.score >= minScore) .slice(0, limit); } + +// ============================================================================= +// Structured search — pre-expanded queries from LLM +// ============================================================================= + +/** + * A single sub-search in a structured search request. + * Matches the format used in QMD training data. + */ +export interface StructuredSubSearch { + /** Search type: 'lex' for BM25 keywords, 'vec' for semantic, 'hyde' for hypothetical document */ + type: 'lex' | 'vec' | 'hyde'; + /** The search query text */ + query: string; +} + +export interface StructuredSearchOptions { + collection?: string; + limit?: number; // default 10 + minScore?: number; // default 0 + candidateLimit?: number; // default RERANK_CANDIDATE_LIMIT + /** Future: domain intent hint for routing/boosting */ + intent?: string; + hooks?: SearchHooks; +} + +/** + * Structured search: execute pre-expanded queries without LLM query expansion. + * + * Designed for LLM callers (MCP/HTTP) that generate their own query expansions. + * Skips the internal expandQuery() step — goes directly to: + * + * Pipeline: + * 1. Route searches: lex→FTS, vec/hyde→vector (batch embed) + * 2. RRF fusion across all result lists + * 3. Chunk documents + keyword-best-chunk selection + * 4. Rerank on chunks + * 5. Position-aware score blending + * 6. Dedup, filter, slice + * + * This is the recommended endpoint for capable LLMs — they can generate + * better query variations than our small local model, especially for + * domain-specific or nuanced queries. + */ +export async function structuredSearch( + store: Store, + searches: StructuredSubSearch[], + options?: StructuredSearchOptions +): Promise { + const limit = options?.limit ?? 10; + const minScore = options?.minScore ?? 0; + const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT; + const collection = options?.collection; + const hooks = options?.hooks; + + if (searches.length === 0) return []; + + const rankedLists: RankedResult[][] = []; + const docidMap = new Map(); // filepath -> docid + const hasVectors = !!store.db.prepare( + `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'` + ).get(); + + // Step 1: Run FTS for all lex searches (sync, instant) + for (const search of searches) { + if (search.type === 'lex') { + const ftsResults = store.searchFTS(search.query, 20, collection); + if (ftsResults.length > 0) { + for (const r of ftsResults) docidMap.set(r.filepath, r.docid); + rankedLists.push(ftsResults.map(r => ({ + file: r.filepath, displayPath: r.displayPath, + title: r.title, body: r.body || "", score: r.score, + }))); + } + } + } + + // Step 2: Batch embed and run vector searches for vec/hyde + if (hasVectors) { + const vecSearches = searches.filter(s => s.type === 'vec' || s.type === 'hyde'); + if (vecSearches.length > 0) { + const llm = getDefaultLlamaCpp(); + const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query)); + const embeddings = await llm.embedBatch(textsToEmbed); + + for (let i = 0; i < vecSearches.length; i++) { + const embedding = embeddings[i]?.embedding; + if (!embedding) continue; + + const vecResults = await store.searchVec( + vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, collection, + undefined, embedding + ); + if (vecResults.length > 0) { + for (const r of vecResults) docidMap.set(r.filepath, r.docid); + rankedLists.push(vecResults.map(r => ({ + file: r.filepath, displayPath: r.displayPath, + title: r.title, body: r.body || "", score: r.score, + }))); + } + } + } + } + + if (rankedLists.length === 0) return []; + + // Step 3: RRF fusion — first list gets 2x weight (assume caller ordered by importance) + const weights = rankedLists.map((_, i) => i === 0 ? 2.0 : 1.0); + const fused = reciprocalRankFusion(rankedLists, weights); + const candidates = fused.slice(0, candidateLimit); + + if (candidates.length === 0) return []; + + hooks?.onExpand?.("", []); // Signal no expansion (pre-expanded) + + // Step 4: Chunk documents, pick best chunk per doc for reranking + // Use first lex query as the "query" for keyword matching, or first vec if no lex + const primaryQuery = searches.find(s => s.type === 'lex')?.query + || searches.find(s => s.type === 'vec')?.query + || searches[0]?.query || ""; + const queryTerms = primaryQuery.toLowerCase().split(/\s+/).filter(t => t.length > 2); + const chunksToRerank: { file: string; text: string }[] = []; + const docChunkMap = new Map(); + + for (const cand of candidates) { + const chunks = chunkDocument(cand.body); + if (chunks.length === 0) continue; + + // Pick chunk with most keyword overlap + let bestIdx = 0; + let bestScore = -1; + for (let i = 0; i < chunks.length; i++) { + const chunkLower = chunks[i]!.text.toLowerCase(); + const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0); + if (score > bestScore) { bestScore = score; bestIdx = i; } + } + + chunksToRerank.push({ file: cand.file, text: chunks[bestIdx]!.text }); + docChunkMap.set(cand.file, { chunks, bestIdx }); + } + + // Step 5: Rerank chunks + hooks?.onRerankStart?.(chunksToRerank.length); + const reranked = await store.rerank(primaryQuery, chunksToRerank); + hooks?.onRerankDone?.(); + + // Step 6: Blend RRF position score with reranker score + const candidateMap = new Map(candidates.map(c => [c.file, { + displayPath: c.displayPath, title: c.title, body: c.body, + }])); + const rrfRankMap = new Map(candidates.map((c, i) => [c.file, i + 1])); + + const blended = reranked.map(r => { + const rrfRank = rrfRankMap.get(r.file) || candidateLimit; + let rrfWeight: number; + if (rrfRank <= 3) rrfWeight = 0.75; + else if (rrfRank <= 10) rrfWeight = 0.60; + else rrfWeight = 0.40; + const rrfScore = 1 / rrfRank; + const blendedScore = rrfWeight * rrfScore + (1 - rrfWeight) * r.score; + + const candidate = candidateMap.get(r.file); + const chunkInfo = docChunkMap.get(r.file); + const bestIdx = chunkInfo?.bestIdx ?? 0; + const bestChunk = chunkInfo?.chunks[bestIdx]?.text || candidate?.body || ""; + const bestChunkPos = chunkInfo?.chunks[bestIdx]?.pos || 0; + + return { + file: r.file, + displayPath: candidate?.displayPath || "", + title: candidate?.title || "", + body: candidate?.body || "", + bestChunk, + bestChunkPos, + score: blendedScore, + context: store.getContextForFile(r.file), + docid: docidMap.get(r.file) || "", + }; + }).sort((a, b) => b.score - a.score); + + // Step 7: Dedup by file + const seenFiles = new Set(); + return blended + .filter(r => { + if (seenFiles.has(r.file)) return false; + seenFiles.add(r.file); + return true; + }) + .filter(r => r.score >= minScore) + .slice(0, limit); +}