Move frontends into src/cli/ and src/mcp/ to separate them from the core library. The MCP server is fully rewritten to import only from the SDK (src/index.ts) — zero direct store.ts/collections.ts/llm.ts access. - src/qmd.ts → src/cli/qmd.ts - src/formatter.ts → src/cli/formatter.ts - src/mcp.ts → src/mcp/server.ts (rewritten to use QMDStore SDK) - New src/maintenance.ts: Maintenance class for CLI housekeeping - SDK gains: getDocumentBody(), getDefaultCollectionNames(), extractSnippet/addLineNumbers/DEFAULT_MULTI_GET_MAX_BYTES exports, getDefaultDbPath re-export, InternalStore type export - package.json bin/scripts updated for new paths - All 692 tests pass
514 lines
19 KiB
TypeScript
514 lines
19 KiB
TypeScript
/**
|
|
* intent.test.ts - Tests for the intent feature
|
|
*
|
|
* Tests cover:
|
|
* - extractIntentTerms: stop word filtering, punctuation, acronyms, edge cases
|
|
* - extractSnippet with intent: disambiguation across multiple document sections
|
|
* - parseStructuredQuery with intent: lines (parsing, validation, error cases)
|
|
* - Chunk selection scoring with intent
|
|
* - Strong-signal bypass when intent is present
|
|
* - Intent constants
|
|
*
|
|
* Run with: npx vitest run test/intent.test.ts
|
|
*/
|
|
|
|
import { describe, test, expect } from "vitest";
|
|
import {
|
|
extractSnippet,
|
|
extractIntentTerms,
|
|
INTENT_WEIGHT_SNIPPET,
|
|
INTENT_WEIGHT_CHUNK,
|
|
type ExpandedQuery,
|
|
} from "../src/store.js";
|
|
|
|
// =============================================================================
|
|
// parseStructuredQuery — duplicated from src/cli/qmd.ts for unit testing
|
|
// (qmd.ts doesn't export it since it's a CLI internal)
|
|
// =============================================================================
|
|
|
|
interface ParsedStructuredQuery {
|
|
searches: ExpandedQuery[];
|
|
intent?: string;
|
|
}
|
|
|
|
function parseStructuredQuery(query: string): ParsedStructuredQuery | null {
|
|
const rawLines = query.split('\n').map((line, idx) => ({
|
|
raw: line,
|
|
trimmed: line.trim(),
|
|
number: idx + 1,
|
|
})).filter(line => line.trimmed.length > 0);
|
|
|
|
if (rawLines.length === 0) return null;
|
|
|
|
const prefixRe = /^(lex|vec|hyde):\s*/i;
|
|
const expandRe = /^expand:\s*/i;
|
|
const intentRe = /^intent:\s*/i;
|
|
const typed: ExpandedQuery[] = [];
|
|
let intent: string | undefined;
|
|
|
|
for (const line of rawLines) {
|
|
if (expandRe.test(line.trimmed)) {
|
|
if (rawLines.length > 1) {
|
|
throw new Error(`Line ${line.number} starts with expand:, but query documents cannot mix expand with typed lines. Submit a single expand query instead.`);
|
|
}
|
|
const text = line.trimmed.replace(expandRe, '').trim();
|
|
if (!text) {
|
|
throw new Error('expand: query must include text.');
|
|
}
|
|
return null;
|
|
}
|
|
|
|
if (intentRe.test(line.trimmed)) {
|
|
if (intent !== undefined) {
|
|
throw new Error(`Line ${line.number}: only one intent: line is allowed per query document.`);
|
|
}
|
|
const text = line.trimmed.replace(intentRe, '').trim();
|
|
if (!text) {
|
|
throw new Error(`Line ${line.number}: intent: must include text.`);
|
|
}
|
|
intent = text;
|
|
continue;
|
|
}
|
|
|
|
const match = line.trimmed.match(prefixRe);
|
|
if (match) {
|
|
const type = match[1]!.toLowerCase() as 'lex' | 'vec' | 'hyde';
|
|
const text = line.trimmed.slice(match[0].length).trim();
|
|
if (!text) {
|
|
throw new Error(`Line ${line.number} (${type}:) must include text.`);
|
|
}
|
|
if (/\r|\n/.test(text)) {
|
|
throw new Error(`Line ${line.number} (${type}:) contains a newline. Keep each query on a single line.`);
|
|
}
|
|
typed.push({ type, query: text, line: line.number });
|
|
continue;
|
|
}
|
|
|
|
if (rawLines.length === 1) {
|
|
return null;
|
|
}
|
|
|
|
throw new Error(`Line ${line.number} is missing a lex:/vec:/hyde:/intent: prefix. Each line in a query document must start with one.`);
|
|
}
|
|
|
|
if (intent && typed.length === 0) {
|
|
throw new Error('intent: cannot appear alone. Add at least one lex:, vec:, or hyde: line.');
|
|
}
|
|
|
|
return typed.length > 0 ? { searches: typed, intent } : null;
|
|
}
|
|
|
|
// =============================================================================
|
|
// extractIntentTerms
|
|
// =============================================================================
|
|
|
|
describe("extractIntentTerms", () => {
|
|
test("filters stop words", () => {
|
|
// "looking", "for", "notes", "about" are stop words
|
|
expect(extractIntentTerms("looking for notes about latency optimization"))
|
|
.toEqual(["latency", "optimization"]);
|
|
});
|
|
|
|
test("filters common function words", () => {
|
|
// "what", "is", "the", "to", "find" are stop words; "best", "way" survive
|
|
expect(extractIntentTerms("what is the best way to find"))
|
|
.toEqual(["best", "way"]);
|
|
});
|
|
|
|
test("preserves domain terms", () => {
|
|
expect(extractIntentTerms("web performance latency page load times"))
|
|
.toEqual(["web", "performance", "latency", "page", "load", "times"]);
|
|
});
|
|
|
|
test("handles surrounding punctuation with Unicode awareness", () => {
|
|
expect(extractIntentTerms("personal health, fitness, and endurance"))
|
|
.toEqual(["personal", "health", "fitness", "endurance"]);
|
|
});
|
|
|
|
test("preserves internal hyphens", () => {
|
|
expect(extractIntentTerms("self-hosted real-time (decision-making)"))
|
|
.toEqual(["self-hosted", "real-time", "decision-making"]);
|
|
});
|
|
|
|
test("short domain terms survive (API, SQL, LLM)", () => {
|
|
expect(extractIntentTerms("API design for LLM agents"))
|
|
.toEqual(["api", "design", "llm", "agents"]);
|
|
});
|
|
|
|
test("returns empty for empty input", () => {
|
|
expect(extractIntentTerms("")).toEqual([]);
|
|
expect(extractIntentTerms(" ")).toEqual([]);
|
|
});
|
|
|
|
test("filters single-char terms", () => {
|
|
const terms = extractIntentTerms("a b c web");
|
|
expect(terms).toEqual(["web"]);
|
|
});
|
|
|
|
test("all stop words returns empty", () => {
|
|
const terms = extractIntentTerms("the and or but in on at to for of with by");
|
|
expect(terms).toEqual([]);
|
|
});
|
|
|
|
test("preserves 2-char domain terms (CI, CD, DB)", () => {
|
|
const terms = extractIntentTerms("SQL CI CD DB");
|
|
expect(terms).toContain("sql");
|
|
expect(terms).toContain("ci");
|
|
expect(terms).toContain("cd");
|
|
expect(terms).toContain("db");
|
|
});
|
|
|
|
test("lowercases all terms", () => {
|
|
const terms = extractIntentTerms("WebSocket HTTP REST");
|
|
expect(terms).toContain("websocket");
|
|
expect(terms).toContain("http");
|
|
expect(terms).toContain("rest");
|
|
});
|
|
|
|
test("handles C++ style punctuation", () => {
|
|
const terms = extractIntentTerms("C++, performance! optimization.");
|
|
expect(terms).toContain("performance");
|
|
expect(terms).toContain("optimization");
|
|
});
|
|
});
|
|
|
|
// =============================================================================
|
|
// extractSnippet with intent — disambiguation
|
|
// =============================================================================
|
|
|
|
describe("extractSnippet with intent", () => {
|
|
// Each section contains "performance" so the query score is tied (1.0 each).
|
|
// Intent terms (INTENT_WEIGHT_SNIPPET) then break the tie toward the relevant section.
|
|
const body = [
|
|
"# Notes on Various Topics",
|
|
"",
|
|
"## Web Performance Section",
|
|
"Web performance means optimizing page load times and Core Web Vitals.",
|
|
"Reduce latency, improve rendering speed, and measure performance budgets.",
|
|
"",
|
|
"## Team Performance Section",
|
|
"Team performance depends on trust, psychological safety, and feedback.",
|
|
"Build culture where performance reviews drive growth not fear.",
|
|
"",
|
|
"## Health Performance Section",
|
|
"Health performance comes from consistent exercise, sleep, and endurance.",
|
|
"Track fitness metrics, optimize recovery, and monitor healthspan.",
|
|
].join("\n");
|
|
|
|
test("without intent, anchors on query terms only", () => {
|
|
const result = extractSnippet(body, "performance", 500);
|
|
// "performance" appears in title and multiple sections — should anchor on first match
|
|
expect(result.snippet).toContain("Performance");
|
|
});
|
|
|
|
test("with web-perf intent, prefers web performance section", () => {
|
|
const result = extractSnippet(
|
|
body, "performance", 500,
|
|
undefined, undefined,
|
|
"Looking for notes about web performance, latency, and page load times"
|
|
);
|
|
expect(result.snippet).toMatch(/latency|page.*load|Core Web Vitals/i);
|
|
});
|
|
|
|
test("with health intent, prefers health section", () => {
|
|
const result = extractSnippet(
|
|
body, "performance", 500,
|
|
undefined, undefined,
|
|
"Looking for notes about personal health, fitness, and endurance"
|
|
);
|
|
expect(result.snippet).toMatch(/health|fitness|endurance|exercise/i);
|
|
});
|
|
|
|
test("with team intent, prefers team section", () => {
|
|
const result = extractSnippet(
|
|
body, "performance", 500,
|
|
undefined, undefined,
|
|
"Looking for notes about building high-performing teams and culture"
|
|
);
|
|
expect(result.snippet).toMatch(/team|culture|trust|feedback/i);
|
|
});
|
|
|
|
test("intent does not override strong query match", () => {
|
|
// Query "Core Web Vitals" is very specific — intent shouldn't pull away from it
|
|
const result = extractSnippet(
|
|
body, "Core Web Vitals", 500,
|
|
undefined, undefined,
|
|
"Looking for notes about health and fitness"
|
|
);
|
|
expect(result.snippet).toContain("Core Web Vitals");
|
|
});
|
|
|
|
test("absent intent produces same result as undefined", () => {
|
|
const withoutIntent = extractSnippet(body, "performance", 500);
|
|
const withUndefined = extractSnippet(body, "performance", 500, undefined, undefined, undefined);
|
|
expect(withoutIntent.line).toBe(withUndefined.line);
|
|
expect(withoutIntent.snippet).toBe(withUndefined.snippet);
|
|
});
|
|
|
|
test("intent with no matching terms falls back to query-only scoring", () => {
|
|
const result = extractSnippet(
|
|
body, "performance", 500,
|
|
undefined, undefined,
|
|
"quantum computing and entanglement"
|
|
);
|
|
expect(result.snippet).toContain("Performance");
|
|
expect(result.snippet.length).toBeGreaterThan(0);
|
|
});
|
|
|
|
test("intent works with chunk position", () => {
|
|
const webPerfStart = body.indexOf("## Web Performance");
|
|
const result = extractSnippet(
|
|
body, "performance", 500,
|
|
webPerfStart, 200,
|
|
"web page load times"
|
|
);
|
|
expect(result.snippet).toMatch(/Web Performance|Core Web Vitals|Page load/i);
|
|
});
|
|
});
|
|
|
|
// =============================================================================
|
|
// extractSnippet — intent weight verification
|
|
// =============================================================================
|
|
|
|
describe("extractSnippet intent weight behavior", () => {
|
|
// Document where query term appears on every line but intent terms differ
|
|
const body = [
|
|
"performance metrics for team velocity",
|
|
"performance metrics for web latency",
|
|
"performance metrics for athletic endurance",
|
|
].join("\n");
|
|
|
|
test("intent breaks tie when query matches all lines equally", () => {
|
|
const noIntent = extractSnippet(body, "performance metrics", 500);
|
|
// Without intent, first line wins (all equal score)
|
|
expect(noIntent.line).toBe(1);
|
|
|
|
const withIntent = extractSnippet(
|
|
body, "performance metrics", 500,
|
|
undefined, undefined,
|
|
"web latency and page speed"
|
|
);
|
|
// Intent terms "web", "latency" match line 2
|
|
expect(withIntent.snippet).toContain("web latency");
|
|
});
|
|
});
|
|
|
|
// =============================================================================
|
|
// Chunk selection scoring with intent
|
|
// =============================================================================
|
|
|
|
describe("intent keyword extraction logic", () => {
|
|
// Mirrors the chunk selection scoring in hybridQuery, using the shared
|
|
// extractIntentTerms helper and INTENT_WEIGHT_CHUNK constant.
|
|
function scoreChunk(text: string, query: string, intent?: string): number {
|
|
const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2);
|
|
const intentTerms = intent ? extractIntentTerms(intent) : [];
|
|
const lower = text.toLowerCase();
|
|
const qScore = queryTerms.reduce((acc, term) => acc + (lower.includes(term) ? 1 : 0), 0);
|
|
const iScore = intentTerms.reduce((acc, term) => acc + (lower.includes(term) ? INTENT_WEIGHT_CHUNK : 0), 0);
|
|
return qScore + iScore;
|
|
}
|
|
|
|
const chunks = [
|
|
"Web performance: optimize page load times, reduce latency, improve rendering pipeline.",
|
|
"Team performance: build trust, give feedback, set clear expectations for the group.",
|
|
"Health performance: exercise regularly, sleep 8 hours, manage stress for endurance.",
|
|
];
|
|
|
|
test("without intent, all chunks score equally on 'performance'", () => {
|
|
const scores = chunks.map(c => scoreChunk(c, "performance"));
|
|
// All contain "performance", so all score 1
|
|
expect(scores[0]).toBe(scores[1]);
|
|
expect(scores[1]).toBe(scores[2]);
|
|
});
|
|
|
|
test("with web intent, web chunk scores highest", () => {
|
|
const intent = "looking for notes about page load times and latency optimization";
|
|
const scores = chunks.map(c => scoreChunk(c, "performance", intent));
|
|
expect(scores[0]).toBeGreaterThan(scores[1]!);
|
|
expect(scores[0]).toBeGreaterThan(scores[2]!);
|
|
});
|
|
|
|
test("with health intent, health chunk scores highest", () => {
|
|
const intent = "looking for notes about exercise, sleep, and endurance";
|
|
const scores = chunks.map(c => scoreChunk(c, "performance", intent));
|
|
expect(scores[2]).toBeGreaterThan(scores[0]!);
|
|
expect(scores[2]).toBeGreaterThan(scores[1]!);
|
|
});
|
|
|
|
test("intent terms have lower weight than query terms (1.0)", () => {
|
|
const intent = "looking for latency";
|
|
// Chunk 0 has "performance" (query: 1.0) + "latency" (intent: INTENT_WEIGHT_CHUNK) = 1.5
|
|
const withBoth = scoreChunk(chunks[0]!, "performance", intent);
|
|
const queryOnly = scoreChunk(chunks[0]!, "performance");
|
|
expect(withBoth).toBe(queryOnly + INTENT_WEIGHT_CHUNK);
|
|
});
|
|
|
|
test("stop words are filtered, short domain terms survive", () => {
|
|
const intent = "the art of web performance";
|
|
// "the" (stop word), "art" (survives), "of" (stop word),
|
|
// "web" (survives), "performance" (survives)
|
|
// intent terms after filtering: ["art", "web", "performance"]
|
|
// Chunk 0 has "web" + "performance" → 2 intent hits (no "art")
|
|
// Chunks 1,2 have "performance" only → 1 intent hit
|
|
const scores = chunks.map(c => scoreChunk(c, "test", intent));
|
|
expect(scores[0]).toBe(INTENT_WEIGHT_CHUNK * 2); // "web" + "performance"
|
|
expect(scores[1]).toBe(INTENT_WEIGHT_CHUNK); // "performance" only
|
|
expect(scores[2]).toBe(INTENT_WEIGHT_CHUNK); // "performance" only
|
|
});
|
|
});
|
|
|
|
// =============================================================================
|
|
// Strong-signal bypass with intent
|
|
// =============================================================================
|
|
|
|
describe("strong-signal bypass logic", () => {
|
|
// Mirrors the logic in hybridQuery:
|
|
// const hasStrongSignal = !intent && topScore >= STRONG_SIGNAL_MIN_SCORE && gap >= STRONG_SIGNAL_MIN_GAP
|
|
function hasStrongSignal(topScore: number, secondScore: number, intent?: string): boolean {
|
|
return !intent
|
|
&& topScore >= 0.85
|
|
&& (topScore - secondScore) >= 0.15;
|
|
}
|
|
|
|
test("strong signal detected without intent", () => {
|
|
expect(hasStrongSignal(0.90, 0.70)).toBe(true);
|
|
});
|
|
|
|
test("strong signal bypassed when intent provided", () => {
|
|
expect(hasStrongSignal(0.90, 0.70, "looking for health performance")).toBe(false);
|
|
});
|
|
|
|
test("weak signal not affected by intent", () => {
|
|
expect(hasStrongSignal(0.50, 0.45)).toBe(false);
|
|
expect(hasStrongSignal(0.50, 0.45, "some intent")).toBe(false);
|
|
});
|
|
|
|
test("close scores not strong even without intent", () => {
|
|
expect(hasStrongSignal(0.90, 0.80)).toBe(false); // gap < 0.15
|
|
});
|
|
});
|
|
|
|
// =============================================================================
|
|
// parseStructuredQuery with intent
|
|
// =============================================================================
|
|
|
|
describe("parseStructuredQuery with intent", () => {
|
|
test("parses intent + lex query", () => {
|
|
const result = parseStructuredQuery("intent: web performance\nlex: performance");
|
|
expect(result).not.toBeNull();
|
|
expect(result!.intent).toBe("web performance");
|
|
expect(result!.searches).toHaveLength(1);
|
|
expect(result!.searches[0]!.type).toBe("lex");
|
|
expect(result!.searches[0]!.query).toBe("performance");
|
|
});
|
|
|
|
test("parses intent + multiple typed lines", () => {
|
|
const result = parseStructuredQuery(
|
|
"intent: web page load times\nlex: performance\nvec: how to improve performance"
|
|
);
|
|
expect(result).not.toBeNull();
|
|
expect(result!.intent).toBe("web page load times");
|
|
expect(result!.searches).toHaveLength(2);
|
|
expect(result!.searches[0]!.type).toBe("lex");
|
|
expect(result!.searches[1]!.type).toBe("vec");
|
|
});
|
|
|
|
test("intent can appear after typed lines", () => {
|
|
const result = parseStructuredQuery(
|
|
"lex: performance\nintent: web page load times\nvec: latency"
|
|
);
|
|
expect(result).not.toBeNull();
|
|
expect(result!.intent).toBe("web page load times");
|
|
expect(result!.searches).toHaveLength(2);
|
|
});
|
|
|
|
test("intent is case-insensitive prefix", () => {
|
|
const result = parseStructuredQuery("Intent: web perf\nlex: performance");
|
|
expect(result).not.toBeNull();
|
|
expect(result!.intent).toBe("web perf");
|
|
});
|
|
|
|
test("no intent returns undefined", () => {
|
|
const result = parseStructuredQuery("lex: performance\nvec: speed");
|
|
expect(result).not.toBeNull();
|
|
expect(result!.intent).toBeUndefined();
|
|
});
|
|
|
|
test("intent alone throws error", () => {
|
|
expect(() => parseStructuredQuery("intent: web performance")).toThrow(
|
|
/intent: cannot appear alone/
|
|
);
|
|
});
|
|
|
|
test("multiple intent lines throw error", () => {
|
|
expect(() =>
|
|
parseStructuredQuery("intent: web perf\nintent: team health\nlex: performance")
|
|
).toThrow(/only one intent: line is allowed/);
|
|
});
|
|
|
|
test("empty intent text throws error", () => {
|
|
expect(() =>
|
|
parseStructuredQuery("intent:\nlex: performance")
|
|
).toThrow(/intent: must include text/);
|
|
});
|
|
|
|
test("intent with whitespace-only text throws error", () => {
|
|
expect(() =>
|
|
parseStructuredQuery("intent: \nlex: performance")
|
|
).toThrow(/intent: must include text/);
|
|
});
|
|
|
|
test("single plain line still returns null (expand mode)", () => {
|
|
const result = parseStructuredQuery("how does auth work");
|
|
expect(result).toBeNull();
|
|
});
|
|
|
|
test("expand: line still returns null", () => {
|
|
const result = parseStructuredQuery("expand: auth stuff");
|
|
expect(result).toBeNull();
|
|
});
|
|
|
|
test("intent with expand throws error (expand can't mix)", () => {
|
|
expect(() =>
|
|
parseStructuredQuery("intent: web\nexpand: performance")
|
|
).toThrow(/cannot mix expand/);
|
|
});
|
|
|
|
test("empty query returns null", () => {
|
|
expect(parseStructuredQuery("")).toBeNull();
|
|
expect(parseStructuredQuery(" \n \n ")).toBeNull();
|
|
});
|
|
|
|
test("intent with blank lines is fine", () => {
|
|
const result = parseStructuredQuery(
|
|
"intent: web perf\n\nlex: performance\n\nvec: speed"
|
|
);
|
|
expect(result).not.toBeNull();
|
|
expect(result!.intent).toBe("web perf");
|
|
expect(result!.searches).toHaveLength(2);
|
|
});
|
|
|
|
test("intent preserves full text including colons", () => {
|
|
const result = parseStructuredQuery(
|
|
"intent: web performance: LCP, FID, CLS\nlex: performance"
|
|
);
|
|
expect(result).not.toBeNull();
|
|
expect(result!.intent).toBe("web performance: LCP, FID, CLS");
|
|
});
|
|
});
|
|
|
|
// =============================================================================
|
|
// Constants exported
|
|
// =============================================================================
|
|
|
|
describe("intent constants", () => {
|
|
test("INTENT_WEIGHT_SNIPPET is 0.3", () => {
|
|
expect(INTENT_WEIGHT_SNIPPET).toBe(0.3);
|
|
});
|
|
|
|
test("INTENT_WEIGHT_CHUNK is 0.5", () => {
|
|
expect(INTENT_WEIGHT_CHUNK).toBe(0.5);
|
|
});
|
|
});
|