feat: add collections array filter + improve query writing docs

- structured_search now accepts collections[] for OR filtering - Updated skill docs with detailed query writing guidance - lex: 2-5 keywords, include synonyms, exact names - vec: full natural language questions with context - hyde: 50-100 word hypothetical answer passages
2026-02-18 22:09:24 -05:00 · 2026-02-18 22:09:24 -05:00 · d1ec31eab8
commit d1ec31eab8
parent 6d6bdff09c
3 changed files with 93 additions and 54 deletions
--- a/skills/qmd/SKILL.md
+++ b/skills/qmd/SKILL.md
@ -11,54 +11,87 @@ allowed-tools: Bash(qmd:*), mcp__qmd__*

 # QMD - Quick Markdown Search

-Local search engine for markdown content. Indexes notes, docs, and knowledge bases.
+Local search engine for markdown content.

 ## Status

 !`qmd status 2>/dev/null || echo "Not installed: npm install -g @tobilu/qmd"`

-## MCP Search — `structured_search`
-
-Pass 1-4 sub-queries with type `lex`, `vec`, or `hyde`:
+## MCP: `structured_search`

 ```json
 {
  "searches": [
    { "type": "lex", "query": "CAP theorem consistency" },
    { "type": "vec", "query": "tradeoff between consistency and availability" }
-  ]
+  ],
+  "collections": ["notes", "docs"],
+  "limit": 10
 }
 ```

-| Type | Method | What to Write |
-|------|--------|---------------|
-| `lex` | BM25 keywords | Short phrases — exact terms, names, code |
-| `vec` | Vector search | Natural language question |
-| `hyde` | Vector search | Hypothetical answer (50-100 words) |
+### Search Types

-**Tips:**
- Quick lookup → single `lex` query
- Don't know exact terms → use `vec`
- Best results → combine `lex` + `vec` (+ `hyde` for complex topics)
- First query gets 2x weight
+| Type | Method | Input |
+|------|--------|-------|
+| `lex` | BM25 | Keywords — exact terms, names, code |
+| `vec` | Vector | Question — natural language |
+| `hyde` | Vector | Answer — hypothetical result (50-100 words) |

-## MCP Tools
+### Writing Good Queries
+
+**lex (keyword)**
+- 2-5 terms, no filler words
+- Include synonyms: `"auth authentication login"`
+- Use exact names: `"PostgreSQL connection pool"`
+- Code identifiers work: `"handleError async"`
+
+**vec (semantic)**
+- Full natural language question
+- Be specific: `"how does the rate limiter handle burst traffic"` not `"rate limiting"`
+- Include context: `"in the payment service, how are refunds processed"`
+
+**hyde (hypothetical document)**
+- Write 50-100 words of what the *answer* looks like
+- Use the vocabulary you expect in the result
+- Example: `"The rate limiter uses a sliding window algorithm with a 60-second window. When a client exceeds 100 requests per minute, subsequent requests return 429 Too Many Requests until the window resets."`
+
+### Combining Types
+
+| Goal | Approach |
+|------|----------|
+| Know exact terms | `lex` only |
+| Don't know vocabulary | `vec` only |
+| Best recall | `lex` + `vec` |
+| Complex topic | `lex` + `vec` + `hyde` |
+
+First query gets 2x weight in fusion — put your best guess first.
+
+### Collection Filtering
+
+```json
+{ "collection": "docs" }           // Single collection
+{ "collections": ["docs", "notes"] }  // Multiple (OR)
+```
+
+Omit both to search all collections.
+
+## Other MCP Tools

 | Tool | Use |
 |------|-----|
-| `structured_search` | Search with lex/vec/hyde queries |
 | `get` | Retrieve doc by path or `#docid` |
-| `multi_get` | Retrieve multiple docs by glob/list |
-| `status` | Index health and collections |
+| `multi_get` | Retrieve multiple by glob/list |
+| `status` | Collections and health |

 ## CLI

 ```bash
-qmd search "keywords"           # BM25 keyword search
-qmd vsearch "question"          # Vector similarity
-qmd query "question"            # Auto-expand + rerank
-qmd query $'lex: X\nvec: Y'     # Structured (same as MCP)
-qmd get "#abc123"               # Retrieve by docid
+qmd query "question"              # Auto-expand + rerank
+qmd query $'lex: X\nvec: Y'       # Structured
+qmd search "keywords"             # BM25 only
+qmd vsearch "question"            # Vector only
+qmd get "#abc123"                 # By docid
 ```

 ## Setup
@ -66,10 +99,5 @@ qmd get "#abc123"               # Retrieve by docid
 ```bash
 npm install -g @tobilu/qmd
 qmd collection add ~/notes --name notes
-qmd embed                       # Generate embeddings
-```
-
-MCP config for Claude Code (`~/.claude/settings.json`):
-```json
-{ "mcpServers": { "qmd": { "command": "qmd", "args": ["mcp"] } } }
+qmd embed
 ```
--- a/src/mcp.ts
+++ b/src/mcp.ts
@ -261,11 +261,11 @@ function createMcpServer(store: Store): McpServer {
        ),
        limit: z.number().optional().default(10).describe("Maximum number of results (default: 10)"),
        minScore: z.number().optional().default(0).describe("Minimum relevance score 0-1 (default: 0)"),
-        collection: z.string().optional().describe("Filter to a specific collection by name"),
-        intent: z.string().optional().describe("(Future) Domain intent hint, e.g., 'distributed systems', 'startup finances'"),
+        collection: z.string().optional().describe("Filter to a single collection by name"),
+        collections: z.array(z.string()).optional().describe("Filter to multiple collections (OR match)"),
      },
    },
-    async ({ searches, limit, minScore, collection, intent }) => {
+    async ({ searches, limit, minScore, collection, collections }) => {
      // Map to internal format
      const subSearches: StructuredSubSearch[] = searches.map(s => ({
        type: s.type,
@ -274,9 +274,9 @@ function createMcpServer(store: Store): McpServer {

      const results = await structuredSearch(store, subSearches, {
        collection,
+        collections,
        limit,
        minScore,
-        intent,
      });

      // Use first lex or vec query for snippet extraction
@ -582,9 +582,9 @@ export async function startMcpHttpServer(port: number, options?: { quiet?: boole

        const results = await structuredSearch(store, subSearches, {
          collection: params.collection,
+          collections: params.collections,
          limit: params.limit ?? 10,
          minScore: params.minScore ?? 0,
-          intent: params.intent,
        });

        // Use first lex or vec query for snippet extraction
--- a/src/store.ts
+++ b/src/store.ts
@ -3072,7 +3072,8 @@ export interface StructuredSubSearch {
 }

 export interface StructuredSearchOptions {
-  collection?: string;
+  collection?: string;      // Single collection filter
+  collections?: string[];   // Multiple collections filter (OR)
  limit?: number;           // default 10
  minScore?: number;        // default 0
  candidateLimit?: number;  // default RERANK_CANDIDATE_LIMIT
@ -3107,9 +3108,12 @@ export async function structuredSearch(
  const limit = options?.limit ?? 10;
  const minScore = options?.minScore ?? 0;
  const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT;
-  const collection = options?.collection;
  const hooks = options?.hooks;

+  // Normalize collection filter to array (undefined = all collections)
+  const collections: string[] | undefined = options?.collections
+    ?? (options?.collection ? [options.collection] : undefined);
+
  if (searches.length === 0) return [];

  const rankedLists: RankedResult[][] = [];
@ -3118,16 +3122,21 @@ export async function structuredSearch(
    `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`
  ).get();

+  // Helper to run search across collections (or all if undefined)
+  const collectionList = collections ?? [undefined]; // undefined = all collections
+
  // Step 1: Run FTS for all lex searches (sync, instant)
  for (const search of searches) {
    if (search.type === 'lex') {
-      const ftsResults = store.searchFTS(search.query, 20, collection);
-      if (ftsResults.length > 0) {
-        for (const r of ftsResults) docidMap.set(r.filepath, r.docid);
-        rankedLists.push(ftsResults.map(r => ({
-          file: r.filepath, displayPath: r.displayPath,
-          title: r.title, body: r.body || "", score: r.score,
-        })));
+      for (const coll of collectionList) {
+        const ftsResults = store.searchFTS(search.query, 20, coll);
+        if (ftsResults.length > 0) {
+          for (const r of ftsResults) docidMap.set(r.filepath, r.docid);
+          rankedLists.push(ftsResults.map(r => ({
+            file: r.filepath, displayPath: r.displayPath,
+            title: r.title, body: r.body || "", score: r.score,
+          })));
+        }
      }
    }
  }
@ -3144,16 +3153,18 @@ export async function structuredSearch(
        const embedding = embeddings[i]?.embedding;
        if (!embedding) continue;

-        const vecResults = await store.searchVec(
-          vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, collection,
-          undefined, embedding
-        );
-        if (vecResults.length > 0) {
-          for (const r of vecResults) docidMap.set(r.filepath, r.docid);
-          rankedLists.push(vecResults.map(r => ({
-            file: r.filepath, displayPath: r.displayPath,
-            title: r.title, body: r.body || "", score: r.score,
-          })));
+        for (const coll of collectionList) {
+          const vecResults = await store.searchVec(
+            vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, coll,
+            undefined, embedding
+          );
+          if (vecResults.length > 0) {
+            for (const r of vecResults) docidMap.set(r.filepath, r.docid);
+            rankedLists.push(vecResults.map(r => ({
+              file: r.filepath, displayPath: r.displayPath,
+              title: r.title, body: r.body || "", score: r.score,
+            })));
+          }
        }
      }
    }