diff --git a/skills/qmd/SKILL.md b/skills/qmd/SKILL.md index e57d43f..4fa3479 100644 --- a/skills/qmd/SKILL.md +++ b/skills/qmd/SKILL.md @@ -11,54 +11,87 @@ allowed-tools: Bash(qmd:*), mcp__qmd__* # QMD - Quick Markdown Search -Local search engine for markdown content. Indexes notes, docs, and knowledge bases. +Local search engine for markdown content. ## Status !`qmd status 2>/dev/null || echo "Not installed: npm install -g @tobilu/qmd"` -## MCP Search — `structured_search` - -Pass 1-4 sub-queries with type `lex`, `vec`, or `hyde`: +## MCP: `structured_search` ```json { "searches": [ { "type": "lex", "query": "CAP theorem consistency" }, { "type": "vec", "query": "tradeoff between consistency and availability" } - ] + ], + "collections": ["notes", "docs"], + "limit": 10 } ``` -| Type | Method | What to Write | -|------|--------|---------------| -| `lex` | BM25 keywords | Short phrases — exact terms, names, code | -| `vec` | Vector search | Natural language question | -| `hyde` | Vector search | Hypothetical answer (50-100 words) | +### Search Types -**Tips:** -- Quick lookup → single `lex` query -- Don't know exact terms → use `vec` -- Best results → combine `lex` + `vec` (+ `hyde` for complex topics) -- First query gets 2x weight +| Type | Method | Input | +|------|--------|-------| +| `lex` | BM25 | Keywords — exact terms, names, code | +| `vec` | Vector | Question — natural language | +| `hyde` | Vector | Answer — hypothetical result (50-100 words) | -## MCP Tools +### Writing Good Queries + +**lex (keyword)** +- 2-5 terms, no filler words +- Include synonyms: `"auth authentication login"` +- Use exact names: `"PostgreSQL connection pool"` +- Code identifiers work: `"handleError async"` + +**vec (semantic)** +- Full natural language question +- Be specific: `"how does the rate limiter handle burst traffic"` not `"rate limiting"` +- Include context: `"in the payment service, how are refunds processed"` + +**hyde (hypothetical document)** +- Write 50-100 words of what the *answer* looks like +- Use the vocabulary you expect in the result +- Example: `"The rate limiter uses a sliding window algorithm with a 60-second window. When a client exceeds 100 requests per minute, subsequent requests return 429 Too Many Requests until the window resets."` + +### Combining Types + +| Goal | Approach | +|------|----------| +| Know exact terms | `lex` only | +| Don't know vocabulary | `vec` only | +| Best recall | `lex` + `vec` | +| Complex topic | `lex` + `vec` + `hyde` | + +First query gets 2x weight in fusion — put your best guess first. + +### Collection Filtering + +```json +{ "collection": "docs" } // Single collection +{ "collections": ["docs", "notes"] } // Multiple (OR) +``` + +Omit both to search all collections. + +## Other MCP Tools | Tool | Use | |------|-----| -| `structured_search` | Search with lex/vec/hyde queries | | `get` | Retrieve doc by path or `#docid` | -| `multi_get` | Retrieve multiple docs by glob/list | -| `status` | Index health and collections | +| `multi_get` | Retrieve multiple by glob/list | +| `status` | Collections and health | ## CLI ```bash -qmd search "keywords" # BM25 keyword search -qmd vsearch "question" # Vector similarity -qmd query "question" # Auto-expand + rerank -qmd query $'lex: X\nvec: Y' # Structured (same as MCP) -qmd get "#abc123" # Retrieve by docid +qmd query "question" # Auto-expand + rerank +qmd query $'lex: X\nvec: Y' # Structured +qmd search "keywords" # BM25 only +qmd vsearch "question" # Vector only +qmd get "#abc123" # By docid ``` ## Setup @@ -66,10 +99,5 @@ qmd get "#abc123" # Retrieve by docid ```bash npm install -g @tobilu/qmd qmd collection add ~/notes --name notes -qmd embed # Generate embeddings -``` - -MCP config for Claude Code (`~/.claude/settings.json`): -```json -{ "mcpServers": { "qmd": { "command": "qmd", "args": ["mcp"] } } } +qmd embed ``` diff --git a/src/mcp.ts b/src/mcp.ts index 274b6a1..1a6694f 100644 --- a/src/mcp.ts +++ b/src/mcp.ts @@ -261,11 +261,11 @@ function createMcpServer(store: Store): McpServer { ), limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"), minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"), - collection: z.string().optional().describe("Filter to a specific collection by name"), - intent: z.string().optional().describe("(Future) Domain intent hint, e.g., 'distributed systems', 'startup finances'"), + collection: z.string().optional().describe("Filter to a single collection by name"), + collections: z.array(z.string()).optional().describe("Filter to multiple collections (OR match)"), }, }, - async ({ searches, limit, minScore, collection, intent }) => { + async ({ searches, limit, minScore, collection, collections }) => { // Map to internal format const subSearches: StructuredSubSearch[] = searches.map(s => ({ type: s.type, @@ -274,9 +274,9 @@ function createMcpServer(store: Store): McpServer { const results = await structuredSearch(store, subSearches, { collection, + collections, limit, minScore, - intent, }); // Use first lex or vec query for snippet extraction @@ -582,9 +582,9 @@ export async function startMcpHttpServer(port: number, options?: { quiet?: boole const results = await structuredSearch(store, subSearches, { collection: params.collection, + collections: params.collections, limit: params.limit ?? 10, minScore: params.minScore ?? 0, - intent: params.intent, }); // Use first lex or vec query for snippet extraction diff --git a/src/store.ts b/src/store.ts index fbdafc6..505c1c3 100644 --- a/src/store.ts +++ b/src/store.ts @@ -3072,7 +3072,8 @@ export interface StructuredSubSearch { } export interface StructuredSearchOptions { - collection?: string; + collection?: string; // Single collection filter + collections?: string[]; // Multiple collections filter (OR) limit?: number; // default 10 minScore?: number; // default 0 candidateLimit?: number; // default RERANK_CANDIDATE_LIMIT @@ -3107,9 +3108,12 @@ export async function structuredSearch( const limit = options?.limit ?? 10; const minScore = options?.minScore ?? 0; const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT; - const collection = options?.collection; const hooks = options?.hooks; + // Normalize collection filter to array (undefined = all collections) + const collections: string[] | undefined = options?.collections + ?? (options?.collection ? [options.collection] : undefined); + if (searches.length === 0) return []; const rankedLists: RankedResult[][] = []; @@ -3118,16 +3122,21 @@ export async function structuredSearch( `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'` ).get(); + // Helper to run search across collections (or all if undefined) + const collectionList = collections ?? [undefined]; // undefined = all collections + // Step 1: Run FTS for all lex searches (sync, instant) for (const search of searches) { if (search.type === 'lex') { - const ftsResults = store.searchFTS(search.query, 20, collection); - if (ftsResults.length > 0) { - for (const r of ftsResults) docidMap.set(r.filepath, r.docid); - rankedLists.push(ftsResults.map(r => ({ - file: r.filepath, displayPath: r.displayPath, - title: r.title, body: r.body || "", score: r.score, - }))); + for (const coll of collectionList) { + const ftsResults = store.searchFTS(search.query, 20, coll); + if (ftsResults.length > 0) { + for (const r of ftsResults) docidMap.set(r.filepath, r.docid); + rankedLists.push(ftsResults.map(r => ({ + file: r.filepath, displayPath: r.displayPath, + title: r.title, body: r.body || "", score: r.score, + }))); + } } } } @@ -3144,16 +3153,18 @@ export async function structuredSearch( const embedding = embeddings[i]?.embedding; if (!embedding) continue; - const vecResults = await store.searchVec( - vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, collection, - undefined, embedding - ); - if (vecResults.length > 0) { - for (const r of vecResults) docidMap.set(r.filepath, r.docid); - rankedLists.push(vecResults.map(r => ({ - file: r.filepath, displayPath: r.displayPath, - title: r.title, body: r.body || "", score: r.score, - }))); + for (const coll of collectionList) { + const vecResults = await store.searchVec( + vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, coll, + undefined, embedding + ); + if (vecResults.length > 0) { + for (const r of vecResults) docidMap.set(r.filepath, r.docid); + rankedLists.push(vecResults.map(r => ({ + file: r.filepath, displayPath: r.displayPath, + title: r.title, body: r.body || "", score: r.score, + }))); + } } } }