From 7488fe809473ec9d55d108a5fa2e03eea8e2ed40 Mon Sep 17 00:00:00 2001
From: Ryan Malia <ryanmalia@gmail.com>
Date: Sun, 7 Jun 2026 04:35:29 -0700
Subject: [PATCH] docs: CLI reference, collection-flag semantics, MCP params,
 bench, new commands

Documents previously-undocumented surface area surfaced by onboarding feedback
and the bench discoverability report:

- README: collection filtering (-c semantics), collection show/include/exclude/
  update-cmd, --intent/--no-rerank/-C/--full-path, --format <kind> (legacy
  output booleans noted as aliases), vector-search/deep-search aliases, embed
  memory flags, a sample --explain trace, MCP tool parameter reference, qmd
  doctor/init, get :from:count + --no-line-numbers, and a Benchmarking section
  for qmd bench.
- README: removed the misleading `qmd update --pull` example; --pull is parsed
  but never consumed, so it points to `qmd collection update-cmd` (the real
  per-collection pre-reindex mechanism) instead.
- docs/SYNTAX.md: drop the non-existent `q` MCP parameter (the query tool/REST
  endpoint accept only `searches`); add a Scoping section.
- server.ts: buildInstructions now advertises the plural `collections` parameter
  to match the schema (singular was silently stripped, yielding unscoped
  results), and the `get` instruction documents the full file.md:from:count
  range suffix instead of only file.md:100.

Refs #25, #181, #217, #372, #520, #576
---
 CHANGELOG.md      |  25 ++++++
 README.md         | 192 +++++++++++++++++++++++++++++++++++++++++++---
 docs/SYNTAX.md    |  35 ++++++---
 src/mcp/server.ts |   4 +-
 4 files changed, 233 insertions(+), 23 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b367f40..ee68637 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,8 +2,33 @@
 
 ## [Unreleased]
 
+### Documentation
+
+- README: documented collection filtering (`-c` semantics), the `collection
+  show`/`include`/`exclude`/`update-cmd` subcommands, the `--intent`/`--no-rerank`/
+  `-C`/`--full-path` search flags, the `--format <kind>` output selector (with the
+  legacy `--json`/`--csv`/`--md`/`--xml`/`--files` booleans noted as aliases),
+  `vector-search`/`deep-search` aliases, embed
+  memory flags (`--max-docs-per-batch`/`--max-batch-mb`), a sample `--explain`
+  score trace, the `qmd doctor`/`qmd init` commands, the `get` `:from:count`
+  suffix and `--no-line-numbers`, an MCP tool parameter reference, and a
+  Benchmarking section for `qmd bench`.
+- docs/SYNTAX.md: removed the non-existent `q` MCP parameter example (the `query`
+  tool and REST endpoint accept only the `searches` array) and added a Scoping
+  section.
+- README: removed the misleading `qmd update --pull` example. The `--pull` flag is
+  parsed but never consumed (`updateCollections()` ignores it); the real mechanism
+  for running `git pull` before re-indexing is a per-collection `update` command,
+  set via `qmd collection update-cmd`.
+
 ### Fixed
 
+- MCP server instructions now tell agents to scope with the plural `collections`
+  parameter (matching the schema). The previous singular `collection` hint led
+  agents to pass a parameter that Zod silently strips, producing unscoped results.
+  The `get` instruction line also now documents the full `file.md:from:count`
+  range suffix instead of only the single-line `file.md:100` offset.
+
 - Filesystem paths with special characters (`#`, `&`, spaces, `[]`, `()`, etc.)
   now round-trip correctly through index → search → get. Previously
   `reindexCollection` called `handelize()` on relative paths before storing
diff --git a/README.md b/README.md
index 7eadb93..9c28522 100644
--- a/README.md
+++ b/README.md
@@ -135,6 +135,30 @@ LLM models stay loaded in VRAM across requests. Embedding/reranking contexts are
 
 Point any MCP client at `http://localhost:8181/mcp` to connect.
 
+#### MCP Tool Parameters
+
+| Tool | Parameter | Type | Notes |
+|------|-----------|------|-------|
+| `query` | `searches` | array | Typed sub-queries (`lex`/`vec`/`hyde`), 1–10. **Required.** First gets 2x weight. |
+| `query` | `collections` | string[] | Filter by collection names (OR). **Array only** — singular `collection` is silently ignored. |
+| `query` | `intent` | string | Disambiguation context (does not search on its own) |
+| `query` | `limit` | number | Max results (default 10) |
+| `query` | `minScore` | number | Minimum relevance 0–1 (default 0) |
+| `query` | `candidateLimit` | number | Max candidates to rerank (default 40) |
+| `query` | `rerank` | boolean | Run LLM reranking (default **true**); set false for RRF-only |
+| `get` | `file` | string | Path, docid (`#abc123`), or `path:from:count` (e.g. `#abc123:120:40`) |
+| `get` | `fromLine` | number | Start line (1-indexed); overrides the `:from` suffix |
+| `get` | `maxLines` | number | Limit returned lines |
+| `get` | `lineNumbers` | boolean | Prefix lines with numbers (default **true**) |
+| `multi_get` | `pattern` | string | Glob pattern or comma-separated list |
+| `multi_get` | `maxBytes` | number | Skip files larger than N (default 10240) |
+| `multi_get` | `maxLines` | number | Limit lines per file |
+| `multi_get` | `lineNumbers` | boolean | Prefix lines with numbers (default **true**) |
+
+Unknown parameters are silently ignored (not rejected) — double-check names if
+results seem unscoped. The HTTP `/query` and `/search` endpoints return
+`qmd://collection/path` URIs in the `file` field, matching the CLI and MCP output.
+
 ### SDK / Library Usage
 
 Use QMD as a library in your own Node.js or Bun applications.
@@ -555,6 +579,17 @@ qmd collection rename myproject my-project
 # List files in a collection
 qmd ls notes
 qmd ls notes/subfolder
+
+# Show collection details (path, glob mask, include status, context count)
+qmd collection show notes
+
+# Include or exclude a collection from default (unscoped) queries
+qmd collection include notes
+qmd collection exclude notes
+
+# Run a command before every `qmd update` (e.g. git pull); empty arg clears it
+qmd collection update-cmd notes 'git pull --rebase'
+qmd collection update-cmd notes
 ```
 
 ### Generate Vector Embeddings
@@ -571,6 +606,10 @@ qmd embed --chunk-strategy auto
 
 # Also works with query for consistent chunk selection
 qmd query "auth flow" --chunk-strategy auto
+
+# Memory control for large corpora / constrained systems
+qmd embed --max-docs-per-batch 50   # cap docs per embedding batch
+qmd embed --max-batch-mb 64         # cap batch size in MB
 ```
 
 **AST-aware chunking** (`--chunk-strategy auto`) uses tree-sitter to chunk code
@@ -632,6 +671,9 @@ qmd vsearch "how to login"
 qmd query "user authentication"
 ```
 
+Two aliases exist for the semantic/hybrid modes: `vector-search` (→ `vsearch`)
+and `deep-search` (→ `query`).
+
 ### Options
 
 ```sh
@@ -644,24 +686,45 @@ qmd query "user authentication"
 --line-numbers     # Add line numbers to output
 --explain          # Include retrieval score traces (query, JSON/CLI output)
 --index <name>     # Use named index
+--intent "<text>"  # Disambiguation context (e.g. "web page load times")
+--no-rerank        # Skip LLM reranking (RRF scores only; faster on CPU)
+-C, --candidate-limit <n>  # Max candidates to rerank (default: 40)
+--full-path        # Emit on-disk filesystem paths instead of qmd:// URIs
 
 # Output formats (for search and multi-get)
---files            # Output: docid,score,filepath,context
---json             # JSON output with snippets
---csv              # CSV output
---md               # Markdown output
---xml              # XML output
+--format <kind>    # cli (default) | json | csv | md | xml | files
+                   # (--json, --csv, --md, --xml, --files are legacy aliases)
 
 # Get options
-qmd get <file>[:line]  # Get document, optionally starting at line
--l <num>               # Maximum lines to return
---from <num>           # Start from line number
+qmd get <file>[:from[:count]]  # Get document; optional start line and count
+-l <num>                       # Maximum lines to return
+--from <num>                   # Start line (overrides the :from suffix)
+--no-line-numbers              # Disable line numbering (on by default)
 
 # Multi-get options
 -l <num>           # Maximum lines per file
 --max-bytes <num>  # Skip files larger than N bytes (default: 10KB)
 ```
 
+### Collection Filtering
+
+The `-c`/`--collection` flag filters results by collection **name** (as shown by
+`qmd collection list`). Collections are a global registry — you can search any
+collection from any directory:
+
+```sh
+qmd search "auth" -c notes           # single collection
+qmd search "auth" -c notes -c docs   # multiple collections (OR)
+```
+
+With no `-c` flag, all default-included collections are searched. Collections
+marked excluded (`qmd collection exclude <name>`) are skipped unless named
+explicitly with `-c`.
+
+> **Note:** With multiple `-c` flags, results come from a global top-K pool and are
+> then filtered. If one collection dominates the rankings, matches from smaller
+> collections may not appear at the default limit — raise `-n` or use `--all`.
+
 ### Output Format
 
 Default output is colorized CLI format (respects `NO_COLOR` env).
@@ -739,17 +802,48 @@ qmd query --json --explain "quarterly reports"
 qmd --index work search "quarterly reports"
 ```
 
+The `--explain` flag attaches a score breakdown to each result: the FTS/vector
+backend scores plus the RRF fusion math (rank, weight, top-rank bonus) and every
+sub-query's contribution. Abbreviated:
+
+```json
+{
+  "docid": "#6c90f0",
+  "score": 0.89,
+  "file": "qmd://qmd/README.md",
+  "explain": {
+    "ftsScores": [0.892, 0.907],
+    "vectorScores": [0.540, 0.484],
+    "rrf": {
+      "rank": 1,
+      "weight": 0.75,
+      "baseScore": 0.123,
+      "topRankBonus": 0.05,
+      "totalScore": 0.173,
+      "contributions": [
+        { "source": "fts", "queryType": "original", "query": "reranking",
+          "rank": 1, "weight": 2, "backendScore": 0.892, "rrfContribution": 0.0328 }
+      ]
+    }
+  }
+}
+```
+
 ### Index Maintenance
 
 ```sh
 # Show index status and collections with contexts
 qmd status
 
-# Re-index all collections
+# Re-index all collections. If a collection has a configured update command
+# (e.g. `git pull`), it runs first — set one with `qmd collection update-cmd`.
 qmd update
 
-# Re-index with git pull first (for remote repos)
-qmd update --pull
+# Diagnose the install (runtime, sqlite-vec, embedding fingerprints, GPU probe)
+qmd doctor
+
+# Initialize a project-local index in the current directory
+qmd init
 
 # Get document by filepath (with fuzzy matching suggestions)
 qmd get notes/meeting.md
@@ -760,6 +854,13 @@ qmd get "#abc123"
 # Get document starting at line 50, max 100 lines
 qmd get notes/meeting.md:50 -l 100
 
+# Read 40 lines starting at line 120 via the :from:count suffix (works with docids)
+qmd get notes/meeting.md:120:40
+qmd get "#abc123:120:40"
+
+# get / multi-get are line-numbered by default; disable with --no-line-numbers
+qmd get notes/meeting.md --no-line-numbers
+
 # Get multiple documents by glob pattern
 qmd multi-get "journals/2025-05*.md"
 
@@ -776,6 +877,75 @@ qmd multi-get "docs/*.md" --json
 qmd cleanup
 ```
 
+### Benchmarking
+
+Measure search quality across all four backends with `qmd bench` and a fixture file
+of queries with known-relevant documents.
+
+**From a git checkout**, an example fixture and its test corpus ship in the repo:
+
+```sh
+# One-time setup (indexes the repo's test corpus into its own collection)
+qmd collection add test/eval-docs --name eval-docs
+qmd embed -c eval-docs
+
+# Run the benchmark (table output)
+qmd bench src/bench/fixtures/example.json
+
+# JSON output for programmatic analysis
+qmd bench src/bench/fixtures/example.json --json
+```
+
+> The example fixture (`src/bench/fixtures/example.json`) and its test corpus
+> (`test/eval-docs/`) exist only in a git checkout — they are **not** part of the
+> published npm package. If you installed via `npm`/`npx`, write your own fixture
+> (see below) against a collection you have already indexed:
+>
+> ```sh
+> qmd bench my-fixture.json -c my-collection
+> ```
+
+Each query runs against four backends, reporting precision@k, recall, MRR, and F1:
+
+| Backend | What it tests | LLM required |
+|---------|---------------|--------------|
+| `bm25` | Keyword search only (FTS5) | No |
+| `vector` | Semantic similarity only | Embedding model |
+| `hybrid` | BM25 + vector fusion (no reranking) | Embedding model |
+| `full` | Full pipeline with LLM reranking | All three models |
+
+**Score interpretation:** `1.00` = perfect (all expected docs in top results),
+`0.00` = complete miss. The example fixture typically shows bm25 ~0.50, vector
+~0.70, and hybrid/full ~1.00 — a concrete demonstration of why hybrid search beats
+either backend alone.
+
+**Custom fixtures** are JSON:
+
+```json
+{
+  "description": "My benchmark",
+  "version": 1,
+  "collection": "my-collection",
+  "queries": [
+    {
+      "id": "find-auth",
+      "query": "authentication flow",
+      "type": "semantic",
+      "expected_files": ["docs/auth-design.md"],
+      "expected_in_top_k": 3
+    }
+  ]
+}
+```
+
+`expected_files` are collection-relative paths as shown by `qmd ls`. The `type`
+field (`exact`, `semantic`, `topical`, `cross-domain`, `alias`) labels queries for
+grouping — it does not change search behavior.
+
+> **Heads-up:** if the fixture's collection isn't indexed, bench currently runs to
+> completion and reports all zeros with no warning. Verify setup with
+> `qmd ls <collection>` first.
+
 ## Data Storage
 
 Index stored in: `~/.cache/qmd/index.sqlite`
diff --git a/docs/SYNTAX.md b/docs/SYNTAX.md
index b53ec69..8175dca 100644
--- a/docs/SYNTAX.md
+++ b/docs/SYNTAX.md
@@ -127,26 +127,41 @@ Without intent, "performance" is ambiguous (web-perf? team health? fitness?). Wi
 - Empty lines are ignored
 - Leading/trailing whitespace is trimmed
 
-## MCP/HTTP API
+## Scoping
 
-The `query` tool accepts a query document:
+Restrict queries to specific collections with `-c` (CLI) or `collections` (MCP/SDK):
 
-```json
-{
-  "q": "lex: CAP theorem\nvec: consistency vs availability",
-  "collections": ["docs"],
-  "limit": 10
-}
+```bash
+# CLI — by collection name (see `qmd collection list`)
+qmd query -c docs "how does auth work"
+qmd query -c docs -c notes $'lex: auth\nvec: authentication flow'
 ```
 
-Or structured format:
+For MCP / HTTP, pass a plural `collections` array (OR match):
+
+```json
+{ "searches": [ { "type": "lex", "query": "auth" } ], "collections": ["docs", "notes"] }
+```
+
+`-c`/`collections` matches by collection name and works from any directory.
+Multiple values are OR-combined. Without scoping, all default-included collections
+are searched; collections marked excluded (`qmd collection exclude <name>`) are
+skipped unless explicitly named. In MCP the parameter is the plural `collections`
+array — a singular `collection` is silently ignored.
+
+## MCP/HTTP API
+
+The `query` tool (and the REST `/query` endpoint) accept a structured query with a
+`searches` array. There is no `q` string parameter — `searches` is required:
 
 ```json
 {
   "searches": [
     { "type": "lex", "query": "CAP theorem" },
     { "type": "vec", "query": "consistency vs availability" }
-  ]
+  ],
+  "collections": ["docs"],
+  "limit": 10
 }
 ```
 
diff --git a/src/mcp/server.ts b/src/mcp/server.ts
index 3eb80c0..6ddded3 100644
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@@ -120,7 +120,7 @@ async function buildInstructions(store: QMDStore): Promise<string> {
   if (status.collections.length > 0) {
     lines.push("");
     const names = status.collections.map(c => c.name).join(", ");
-    lines.push(`Collections (scope with \`collection\` parameter): ${names}`);
+    lines.push(`Collections (scope with \`collections\` parameter): ${names}`);
     lines.push("Call the `status` tool for collection descriptions, paths, and per-collection doc counts.");
   }
 
@@ -151,7 +151,7 @@ async function buildInstructions(store: QMDStore): Promise<string> {
   // --- Retrieval workflow ---
   lines.push("");
   lines.push("Retrieval:");
-  lines.push("  - `get` — single document by path or docid (#abc123). Supports line offset (`file.md:100`).");
+  lines.push("  - `get` — single document by path or docid (#abc123). Supports a line-range suffix: `file.md:100` (from line 100) or `file.md:100:40` (40 lines from line 100).");
   lines.push("  - `multi_get` — batch retrieve by glob (`journals/2025-05*.md`) or comma-separated list.");
 
   // --- Non-obvious things that prevent mistakes ---