From c85889df12d59a090e21bbe41a02144a7e251191 Mon Sep 17 00:00:00 2001 From: Tobi Lutke Date: Sun, 21 Dec 2025 14:50:17 -0400 Subject: [PATCH] fixes --- bun.lock | 20 ++- example-index.yml | 42 ++---- export-to-yaml.ts | 108 -------------- package.json | 6 +- src/cli.test.ts | 2 +- src/eval.test.ts | 37 +++-- src/llm.test.ts | 39 ++--- src/llm.ts | 352 +++++++++++++++++++++++++++------------------- src/mcp.test.ts | 192 +++++++++++++------------ src/mcp.ts | 45 ++++-- src/qmd.ts | 222 ++++++++++++----------------- src/store.test.ts | 116 +-------------- src/store.ts | 236 ++++++++----------------------- 13 files changed, 552 insertions(+), 865 deletions(-) delete mode 100644 export-to-yaml.ts diff --git a/bun.lock b/bun.lock index 8d405cf..f5d7aa5 100644 --- a/bun.lock +++ b/bun.lock @@ -5,11 +5,11 @@ "": { "name": "2025-12-07-bm25-q", "dependencies": { - "@modelcontextprotocol/sdk": "^1.24.3", + "@modelcontextprotocol/sdk": "^1.25.1", "node-llama-cpp": "^3.14.5", "sqlite-vec": "^0.1.7-alpha.2", "yaml": "^2.8.2", - "zod": "^4.1.13", + "zod": "^4.2.1", }, "devDependencies": { "@types/bun": "latest", @@ -21,18 +21,20 @@ "sqlite-vec-win32-x64": "^0.1.7-alpha.2", }, "peerDependencies": { - "typescript": "^5", + "typescript": "^5.9.3", }, }, }, "packages": { + "@hono/node-server": ["@hono/node-server@1.19.7", "", { "peerDependencies": { "hono": "^4" } }, "sha512-vUcD0uauS7EU2caukW8z5lJKtoGMokxNbJtBiwHgpqxEXokaHCBkQUmCHhjFB1VUTWdqj25QoMkMKzgjq+uhrw=="], + "@huggingface/jinja": ["@huggingface/jinja@0.5.3", "", {}, "sha512-asqfZ4GQS0hD876Uw4qiUb7Tr/V5Q+JZuo2L+BtdrD4U40QU58nIRq3ZSgAzJgT874VLjhGVacaYfrdpXtEvtA=="], "@kwsites/file-exists": ["@kwsites/file-exists@1.1.1", "", { "dependencies": { "debug": "^4.1.1" } }, "sha512-m9/5YGR18lIwxSFDwfE3oA7bWuq9kdau6ugN4H2rJeyhFQZcG9AgSHkQtSD15a8WvTgfz9aikZMrKPHvbpqFiw=="], "@kwsites/promise-deferred": ["@kwsites/promise-deferred@1.1.1", "", {}, "sha512-GaHYm+c0O9MjZRu0ongGBRbinu8gVAMd2UZjji6jVmqKtZluZnptXGWhz1E8j8D2HJ3f/yMxKAUC0b+57wncIw=="], - "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.24.3", "", { "dependencies": { "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.0.1", "express-rate-limit": "^7.5.0", "jose": "^6.1.1", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.0" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-YgSHW29fuzKKAHTGe9zjNoo+yF8KaQPzDC2W9Pv41E7/57IfY+AMGJ/aDFlgTLcVVELoggKE4syABCE75u3NCw=="], + "@modelcontextprotocol/sdk": ["@modelcontextprotocol/sdk@1.25.1", "", { "dependencies": { "@hono/node-server": "^1.19.7", "ajv": "^8.17.1", "ajv-formats": "^3.0.1", "content-type": "^1.0.5", "cors": "^2.8.5", "cross-spawn": "^7.0.5", "eventsource": "^3.0.2", "eventsource-parser": "^3.0.0", "express": "^5.0.1", "express-rate-limit": "^7.5.0", "jose": "^6.1.1", "json-schema-typed": "^8.0.2", "pkce-challenge": "^5.0.0", "raw-body": "^3.0.0", "zod": "^3.25 || ^4.0", "zod-to-json-schema": "^3.25.0" }, "peerDependencies": { "@cfworker/json-schema": "^4.1.1" }, "optionalPeers": ["@cfworker/json-schema"] }, "sha512-yO28oVFFC7EBoiKdAn+VqRm+plcfv4v0xp6osG/VsCB0NlPZWi87ajbCZZ8f/RvOFLEu7//rSRmuZZ7lMoe3gQ=="], "@node-llama-cpp/linux-arm64": ["@node-llama-cpp/linux-arm64@3.14.5", "", { "os": "linux", "cpu": [ "x64", "arm64", ] }, "sha512-58IcWW7EOqc/66mYWXRsoMCy1MR3pTX/YaC0HYF9Rg5XeAPKhUP7NHrglbqgjO62CkcuFZaSEiX2AtG972GQYQ=="], @@ -132,7 +134,7 @@ "@types/aws-lambda": ["@types/aws-lambda@8.10.159", "", {}, "sha512-SAP22WSGNN12OQ8PlCzGzRCZ7QDCwI85dQZbmpz7+mAk+L7j+wI7qnvmdKh+o7A5LaOp6QnOZ2NJphAZQTTHQg=="], - "@types/bun": ["@types/bun@1.3.3", "", { "dependencies": { "bun-types": "1.3.3" } }, "sha512-ogrKbJ2X5N0kWLLFKeytG0eHDleBYtngtlbu9cyBKFtNL3cnpDZkNdQj8flVf6WTZUX5ulI9AY1oa7ljhSrp+g=="], + "@types/bun": ["@types/bun@1.3.5", "", { "dependencies": { "bun-types": "1.3.5" } }, "sha512-RnygCqNrd3srIPEWBd5LFeUYG7plCoH2Yw9WaZGyNmdTEei+gWaHqydbaIRkIkcbXwhBT94q78QljxN0Sk838w=="], "@types/node": ["@types/node@24.10.1", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-GNWcUTRBgIRJD5zj+Tq0fKOJ5XZajIiBroOF0yvj2bSU1WvNdYS/dn9UxwsujGW4JX06dnHyjV2y9rRaybH0iQ=="], @@ -164,7 +166,7 @@ "bottleneck": ["bottleneck@2.19.5", "", {}, "sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw=="], - "bun-types": ["bun-types@1.3.3", "", { "dependencies": { "@types/node": "*" } }, "sha512-z3Xwlg7j2l9JY27x5Qn3Wlyos8YAp0kKRlrePAOjgjMGS5IG6E7Jnlx736vH9UVI4wUICwwhC9anYL++XeOgTQ=="], + "bun-types": ["bun-types@1.3.5", "", { "dependencies": { "@types/node": "*" } }, "sha512-inmAYe2PFLs0SUbFOWSVD24sg1jFlMPxOjOSSCYqUgn4Hsc3rDc7dFvfVYjFPNHtov6kgUeulV4SxbuIV/stPw=="], "bytes": ["bytes@3.1.2", "", {}, "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg=="], @@ -304,6 +306,8 @@ "hasown": ["hasown@2.0.2", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ=="], + "hono": ["hono@4.11.1", "", {}, "sha512-KsFcH0xxHes0J4zaQgWbYwmz3UPOOskdqZmItstUG93+Wk1ePBLkLGwbP9zlmh1BFUiL8Qp+Xfu9P7feJWpGNg=="], + "http-errors": ["http-errors@2.0.1", "", { "dependencies": { "depd": "~2.0.0", "inherits": "~2.0.4", "setprototypeof": "~1.2.0", "statuses": "~2.0.2", "toidentifier": "~1.0.1" } }, "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ=="], "iconv-lite": ["iconv-lite@0.7.0", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-cf6L2Ds3h57VVmkZe+Pn+5APsT7FpqJtEhhieDCvrE2MK5Qk9MyffgQyuxQTm6BChfeZNtcOLHp9IcWRVcIcBQ=="], @@ -332,6 +336,8 @@ "json-schema-traverse": ["json-schema-traverse@1.0.0", "", {}, "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="], + "json-schema-typed": ["json-schema-typed@8.0.2", "", {}, "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA=="], + "jsonfile": ["jsonfile@6.2.0", "", { "dependencies": { "universalify": "^2.0.0" }, "optionalDependencies": { "graceful-fs": "^4.1.6" } }, "sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg=="], "lifecycle-utils": ["lifecycle-utils@3.0.1", "", {}, "sha512-Qt/Jl5dsNIsyCAZsHB6x3mbwHFn0HJbdmvF49sVX/bHgX2cW7+G+U+I67Zw+TPM1Sr21Gb2nfJMd2g6iUcI1EQ=="], @@ -542,7 +548,7 @@ "yoctocolors": ["yoctocolors@2.1.2", "", {}, "sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug=="], - "zod": ["zod@4.1.13", "", {}, "sha512-AvvthqfqrAhNH9dnfmrfKzX5upOdjUVJYFqNSlkmGf64gRaTzlPwz99IHYnVs28qYAybvAlBV+H7pn0saFY4Ig=="], + "zod": ["zod@4.2.1", "", {}, "sha512-0wZ1IRqGGhMP76gLqz8EyfBXKk0J2qo2+H3fi4mcUP/KtTocoX08nmIAHl1Z2kJIZbZee8KOpBCSNPRgauucjw=="], "zod-to-json-schema": ["zod-to-json-schema@3.25.0", "", { "peerDependencies": { "zod": "^3.25 || ^4" } }, "sha512-HvWtU2UG41LALjajJrML6uQejQhNJx+JBO9IflpSja4R03iNWfKXrj6W2h7ljuLyc1nKS+9yDyL/9tD1U/yBnQ=="], diff --git a/example-index.yml b/example-index.yml index eb1b794..a6d2d16 100644 --- a/example-index.yml +++ b/example-index.yml @@ -6,54 +6,28 @@ # Global context applied to all collections # Use this for universal search instructions or patterns -global_context: "If you see relevant [[WikiWord]] you can do a search for WikiWord to get more context on the matter" +global_context: "If you see a relevant [[WikiWord]], you can search for that WikiWord to get more context." # Collection definitions collections: # Meeting notes Meetings: - path: /Users/tobi/Documents/Meetings + path: ~/Documents/Meetings pattern: "**/*.md" context: "/": "Meeting notes and summaries" - # Archived content from Shopify - archive: - path: /Users/tobi/src/github.com/Shopify/archive/obsidian/archive - pattern: "**/*.md" - context: - # Context can be defined at any path level - "/Board of Directors": "Public communications with the Shopify BOD" - "/Context/": "Shopify Internal Podcasts, almost all of them hosted by Tobi" - "/Summit/": "Tobi's major internal Shopify Summit Keynotes" - "/": "Shopify archive - historical documents and communications" - # Daily journal entries journals: - path: /Users/tobi/src/github.com/tobi/Brain/journals + path: ~/Documents/Notes pattern: "**/*.md" context: - "/2024": "Daily notes from 2024" - "/2025": "Daily notes from 2025" - "/": "Logseq - daily notes. Unstructured text in logseq bullet point format" + "/journal/2024": "Daily notes from 2024" + "/journal/2025": "Daily notes from 2025" + "/": "Notes vault" - # Knowledge base pages - pages: - path: /Users/tobi/src/github.com/tobi/Brain/pages - pattern: "**/*.md" - context: - "/": "Logseq knowledge base - structured notes and reference material" - - # Technical RFCs - rfcs: - path: /Users/tobi/src/github.com/Shopify/codex/rfcs - pattern: "**/*.md" - context: - "/": "Request for Comments - technical design documents" - - # Thematic collections - themes: - path: /Users/tobi/src/github.com/Shopify/codex/themes + codex: + path: ~/Documents/Codex pattern: "**/*.md" context: "/": "Thematic collections of important concepts and discussions" diff --git a/export-to-yaml.ts b/export-to-yaml.ts deleted file mode 100644 index b792e08..0000000 --- a/export-to-yaml.ts +++ /dev/null @@ -1,108 +0,0 @@ -#!/usr/bin/env bun -/** - * Export current SQLite collections and contexts to YAML format - * - * This script reads from the current database and creates ~/.config/qmd/index.yml - * Run this once to migrate from database-based to YAML-based configuration. - */ - -import { Database } from "bun:sqlite"; -import { join } from "path"; -import { homedir } from "os"; -import { saveConfig, type CollectionConfig, getConfigPath } from "./src/collections"; - -// Simple colors for output -const c = { - reset: "\x1b[0m", - cyan: "\x1b[36m", - green: "\x1b[32m", - dim: "\x1b[2m", -}; - -// Open the existing database -const dbPath = join(homedir(), ".cache", "qmd", "index.sqlite"); -const db = new Database(dbPath, { readonly: true }); - -console.log(`${c.cyan}Exporting collections from SQLite to YAML...${c.reset}\n`); -console.log(`Database: ${dbPath}`); -console.log(`Output: ${getConfigPath()}\n`); - -// Initialize config -const config: CollectionConfig = { - global_context: "If you see relevant [[WikiWord]] you can do a search for WikiWord to get more context on the matter", - collections: {}, -}; - -// Export collections -interface CollectionRow { - id: number; - name: string; - pwd: string; - glob_pattern: string; -} - -const collections = db - .query("SELECT id, name, pwd, glob_pattern FROM collections ORDER BY name") - .all(); - -console.log(`${c.green}Found ${collections.length} collections:${c.reset}`); - -for (const coll of collections) { - console.log(` - ${coll.name}`); - - config.collections[coll.name] = { - path: coll.pwd, - pattern: coll.glob_pattern, - }; -} - -// Export contexts -interface ContextRow { - collection_id: number; - collection_name: string; - path_prefix: string; - context: string; -} - -const contexts = db - .query(` - SELECT - pc.collection_id, - c.name as collection_name, - pc.path_prefix, - pc.context - FROM path_contexts pc - JOIN collections c ON pc.collection_id = c.id - ORDER BY c.name, pc.path_prefix - `) - .all(); - -console.log(`\n${c.green}Found ${contexts.length} contexts:${c.reset}`); - -for (const ctx of contexts) { - const collection = config.collections[ctx.collection_name]; - if (!collection) continue; - - if (!collection.context) { - collection.context = {}; - } - - // Use "/" for empty path prefix (cleaner YAML) - const pathKey = ctx.path_prefix === "" ? "/" : ctx.path_prefix; - collection.context[pathKey] = ctx.context; - - // Truncate long contexts for display - const displayContext = ctx.context.length > 50 - ? ctx.context.substring(0, 50) + "..." - : ctx.context; - - console.log(` - ${ctx.collection_name}${ctx.path_prefix}: ${displayContext}`); -} - -// Save to YAML -saveConfig(config); - -console.log(`\n${c.green}✓ Successfully exported to ${getConfigPath()}${c.reset}`); -console.log(`\n${c.dim}You can now manually edit this file to adjust your collections.${c.reset}`); - -db.close(); diff --git a/package.json b/package.json index 292c7e0..4ac978a 100644 --- a/package.json +++ b/package.json @@ -18,11 +18,11 @@ "inspector": "npx @modelcontextprotocol/inspector bun src/qmd.ts mcp" }, "dependencies": { - "@modelcontextprotocol/sdk": "^1.24.3", + "@modelcontextprotocol/sdk": "^1.25.1", "node-llama-cpp": "^3.14.5", "sqlite-vec": "^0.1.7-alpha.2", "yaml": "^2.8.2", - "zod": "^4.1.13" + "zod": "^4.2.1" }, "optionalDependencies": { "sqlite-vec-darwin-arm64": "^0.1.7-alpha.2", @@ -34,7 +34,7 @@ "@types/bun": "latest" }, "peerDependencies": { - "typescript": "^5" + "typescript": "^5.9.3" }, "engines": { "bun": ">=1.0.0" diff --git a/src/cli.test.ts b/src/cli.test.ts index 82727ab..89300be 100644 --- a/src/cli.test.ts +++ b/src/cli.test.ts @@ -402,7 +402,7 @@ describe("CLI Add-Context Command", () => { }); test("requires path and text arguments", async () => { - const { stderr, exitCode } = await runQmd(["add-context"], { dbPath: localDbPath, configDir: localConfigDir }); + const { stderr, exitCode } = await runQmd(["context", "add"], { dbPath: localDbPath, configDir: localConfigDir }); expect(exitCode).toBe(1); // Error message goes to stderr expect(stderr).toContain("Usage:"); diff --git a/src/eval.test.ts b/src/eval.test.ts index 7e9a494..ff5d7da 100644 --- a/src/eval.test.ts +++ b/src/eval.test.ts @@ -21,20 +21,18 @@ const tempDir = mkdtempSync(join(tmpdir(), "qmd-eval-")); process.env.INDEX_PATH = join(tempDir, "eval.sqlite"); import { - getDb, - closeDb, + createStore, searchFTS, searchVec, insertDocument, insertContent, - ensureVecTable, insertEmbedding, chunkDocumentByTokens, reciprocalRankFusion, DEFAULT_EMBED_MODEL, type RankedResult, } from "./store"; -import { getDefaultLlamaCpp, formatDocForEmbedding } from "./llm"; +import { getDefaultLlamaCpp, formatDocForEmbedding, disposeDefaultLlamaCpp } from "./llm"; // Eval queries with expected documents const evalQueries: { @@ -100,10 +98,12 @@ function calcHitRate( // ============================================================================= describe("BM25 Search (FTS)", () => { + let store: ReturnType; let db: Database; beforeAll(() => { - db = getDb(); + store = createStore(); + db = store.db; // Load and index eval documents const evalDocsDir = join(import.meta.dir, "../test/eval-docs"); @@ -121,7 +121,7 @@ describe("BM25 Search (FTS)", () => { }); afterAll(() => { - closeDb(); + store.close(); }); test("easy queries: ≥80% Hit@3", () => { @@ -153,11 +153,13 @@ describe("BM25 Search (FTS)", () => { // ============================================================================= describe("Vector Search", () => { + let store: ReturnType; let db: Database; let hasEmbeddings = false; beforeAll(async () => { - db = getDb(); + store = createStore(); + db = store.db; // Check if embeddings already exist (from previous test run) const vecTable = db.prepare( @@ -174,7 +176,7 @@ describe("Vector Search", () => { // Generate embeddings for test documents const llm = getDefaultLlamaCpp(); - ensureVecTable(db, 768); // embeddinggemma uses 768 dimensions + store.ensureVecTable(768); // embeddinggemma uses 768 dimensions const evalDocsDir = join(import.meta.dir, "../test/eval-docs"); const files = readdirSync(evalDocsDir).filter(f => f.endsWith(".md")); @@ -185,9 +187,10 @@ describe("Vector Search", () => { const title = content.split("\n")[0]?.replace(/^#\s*/, "") || file; // Chunk and embed - const chunks = await chunkDocumentByTokens(content, llm); + const chunks = await chunkDocumentByTokens(content); for (let seq = 0; seq < chunks.length; seq++) { const chunk = chunks[seq]; + if (!chunk) continue; const formatted = formatDocForEmbedding(chunk.text, title); const result = await llm.embed(formatted, { model: DEFAULT_EMBED_MODEL, isQuery: false }); if (result?.embedding) { @@ -201,6 +204,10 @@ describe("Vector Search", () => { hasEmbeddings = true; }, 120000); // 2 minute timeout for embedding generation + afterAll(() => { + store.close(); + }); + // Note: Don't dispose here - Hybrid tests also use llama. // Dispose happens in the global afterAll. @@ -258,11 +265,13 @@ describe("Vector Search", () => { // ============================================================================= describe("Hybrid Search (RRF)", () => { + let store: ReturnType; let db: Database; let hasVectors = false; beforeAll(() => { - db = getDb(); + store = createStore(); + db = store.db; // Check if vectors exist const vecTable = db.prepare( `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'` @@ -273,6 +282,10 @@ describe("Hybrid Search (RRF)", () => { } }); + afterAll(() => { + store.close(); + }); + // Helper: run hybrid search with RRF fusion async function hybridSearch(query: string, limit: number = 10): Promise { const rankedLists: RankedResult[][] = []; @@ -392,6 +405,8 @@ describe("Hybrid Search (RRF)", () => { // Cleanup // ============================================================================= -afterAll(() => { +afterAll(async () => { + // Ensure native resources are released to avoid ggml-metal asserts on process exit. + await disposeDefaultLlamaCpp(); rmSync(tempDir, { recursive: true, force: true }); }); diff --git a/src/llm.test.ts b/src/llm.test.ts index 49ae682..826db1e 100644 --- a/src/llm.test.ts +++ b/src/llm.test.ts @@ -11,7 +11,7 @@ import { describe, test, expect, beforeAll, afterAll } from "bun:test"; import { LlamaCpp, getDefaultLlamaCpp, - setDefaultLlamaCpp, + disposeDefaultLlamaCpp, type RerankDocument, } from "./llm.js"; @@ -20,35 +20,12 @@ import { // ============================================================================= describe("Default LlamaCpp Singleton", () => { - // Don't dispose - let process exit handle Metal cleanup naturally - - test("getDefaultLlamaCpp creates instance on first call", () => { - setDefaultLlamaCpp(null); - const llm = getDefaultLlamaCpp(); - expect(llm).toBeInstanceOf(LlamaCpp); - }); - + // Test singleton behavior without resetting to avoid orphan instances test("getDefaultLlamaCpp returns same instance on subsequent calls", () => { - setDefaultLlamaCpp(null); const llm1 = getDefaultLlamaCpp(); const llm2 = getDefaultLlamaCpp(); expect(llm1).toBe(llm2); - }); - - test("setDefaultLlamaCpp allows replacing the singleton", () => { - const custom = new LlamaCpp({ embedModel: "custom-model" }); - setDefaultLlamaCpp(custom); - - const result = getDefaultLlamaCpp(); - expect(result).toBe(custom); - }); - - test("setDefaultLlamaCpp with null resets singleton", () => { - const original = getDefaultLlamaCpp(); - setDefaultLlamaCpp(null); - const newInstance = getDefaultLlamaCpp(); - - expect(newInstance).not.toBe(original); + expect(llm1).toBeInstanceOf(LlamaCpp); }); }); @@ -82,6 +59,11 @@ describe("LlamaCpp Integration", () => { // Use the singleton to avoid multiple Metal contexts const llm = getDefaultLlamaCpp(); + afterAll(async () => { + // Ensure native resources are released to avoid ggml-metal asserts on process exit. + await disposeDefaultLlamaCpp(); + }); + describe("embed", () => { test("returns embedding with correct dimensions", async () => { const result = await llm.embed("Hello world"); @@ -180,9 +162,8 @@ describe("LlamaCpp Integration", () => { const seqTime = Date.now() - seqStart; console.log(`Batch: ${batchTime}ms, Sequential: ${seqTime}ms`); - // Batch should be faster (or at least not much slower) - // Allow some variance since first call may load the model - expect(batchTime).toBeLessThan(seqTime * 1.5); + // Performance is machine/load dependent. We only assert batch isn't drastically worse. + expect(batchTime).toBeLessThanOrEqual(seqTime * 3); }); }); diff --git a/src/llm.ts b/src/llm.ts index 08b29ef..f6eecdb 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -4,7 +4,16 @@ * Provides embeddings, text generation, and reranking using local GGUF models. */ -import { getLlama, resolveModelFile, type Llama, type LlamaModel, type LlamaEmbeddingContext, type LlamaContext, type LlamaChatSession } from "node-llama-cpp"; +import { + getLlama, + resolveModelFile, + LlamaChatSession, + LlamaLogLevel, + type Llama, + type LlamaModel, + type LlamaEmbeddingContext, + type Token as LlamaToken, +} from "node-llama-cpp"; import { homedir } from "os"; import { join } from "path"; import { existsSync, mkdirSync } from "fs"; @@ -190,8 +199,21 @@ export type LlamaCppConfig = { generateModel?: string; rerankModel?: string; modelCacheDir?: string; - /** Inactivity timeout in ms before unloading models (default: 2 minutes, 0 to disable) */ + /** + * Inactivity timeout in ms before unloading contexts (default: 2 minutes, 0 to disable). + * + * Per node-llama-cpp lifecycle guidance, we prefer keeping models loaded and only disposing + * contexts when idle, since contexts (and their sequences) are the heavy per-session objects. + * @see https://node-llama-cpp.withcat.ai/guide/objects-lifecycle + */ inactivityTimeoutMs?: number; + /** + * Whether to dispose models on inactivity (default: false). + * + * Keeping models loaded avoids repeated VRAM thrash; set to true only if you need aggressive + * memory reclaim. + */ + disposeModelsOnInactivity?: boolean; }; /** @@ -205,7 +227,6 @@ export class LlamaCpp implements LLM { private embedModel: LlamaModel | null = null; private embedContext: LlamaEmbeddingContext | null = null; private generateModel: LlamaModel | null = null; - private generateContext: LlamaContext | null = null; private rerankModel: LlamaModel | null = null; private rerankContext: Awaited> | null = null; @@ -214,17 +235,19 @@ export class LlamaCpp implements LLM { private rerankModelUri: string; private modelCacheDir: string; - private initPromise: Promise | null = null; + // Ensure we don't load the same model concurrently (which can allocate duplicate VRAM). + private embedModelLoadPromise: Promise | null = null; + private generateModelLoadPromise: Promise | null = null; + private rerankModelLoadPromise: Promise | null = null; // Inactivity timer for auto-unloading models private inactivityTimer: ReturnType | null = null; private inactivityTimeoutMs: number; + private disposeModelsOnInactivity: boolean; // Track disposal state to prevent double-dispose private disposed = false; - // Mutex for generation to prevent "No sequences left" error with single sequence - private generateLock: Promise = Promise.resolve(); constructor(config: LlamaCppConfig = {}) { this.embedModelUri = config.embedModel || DEFAULT_EMBED_MODEL; @@ -232,6 +255,7 @@ export class LlamaCpp implements LLM { this.rerankModelUri = config.rerankModel || DEFAULT_RERANK_MODEL; this.modelCacheDir = config.modelCacheDir || MODEL_CACHE_DIR; this.inactivityTimeoutMs = config.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS; + this.disposeModelsOnInactivity = config.disposeModelsOnInactivity ?? false; } /** @@ -245,11 +269,11 @@ export class LlamaCpp implements LLM { this.inactivityTimer = null; } - // Only set timer if we have loaded models and timeout is enabled - if (this.inactivityTimeoutMs > 0 && this.hasLoadedModels()) { + // Only set timer if we have disposable contexts and timeout is enabled + if (this.inactivityTimeoutMs > 0 && this.hasLoadedContexts()) { this.inactivityTimer = setTimeout(() => { - this.unloadModels().catch(err => { - console.error("Error unloading models:", err); + this.unloadIdleResources().catch(err => { + console.error("Error unloading idle resources:", err); }); }, this.inactivityTimeoutMs); // Don't keep process alive just for this timer @@ -258,17 +282,19 @@ export class LlamaCpp implements LLM { } /** - * Check if any models are currently loaded + * Check if any contexts are currently loaded (and therefore worth unloading on inactivity). */ - private hasLoadedModels(): boolean { - return !!(this.embedModel || this.generateModel || this.rerankModel); + private hasLoadedContexts(): boolean { + return !!(this.embedContext || this.rerankContext); } /** - * Unload all models but keep the instance alive for future use. - * Models will be reloaded lazily on next operation. + * Unload idle resources but keep the instance alive for future use. + * + * By default, this disposes contexts (and their dependent sequences), while keeping models loaded. + * This matches the intended lifecycle: model → context → sequence, where contexts are per-session. */ - async unloadModels(): Promise { + async unloadIdleResources(): Promise { // Don't unload if already disposed if (this.disposed) { return; @@ -285,27 +311,29 @@ export class LlamaCpp implements LLM { await this.embedContext.dispose(); this.embedContext = null; } - if (this.generateContext) { - await this.generateContext.dispose(); - this.generateContext = null; - } if (this.rerankContext) { await this.rerankContext.dispose(); this.rerankContext = null; } - // Dispose models - if (this.embedModel) { - await this.embedModel.dispose(); - this.embedModel = null; - } - if (this.generateModel) { - await this.generateModel.dispose(); - this.generateModel = null; - } - if (this.rerankModel) { - await this.rerankModel.dispose(); - this.rerankModel = null; + // Optionally dispose models too (opt-in) + if (this.disposeModelsOnInactivity) { + if (this.embedModel) { + await this.embedModel.dispose(); + this.embedModel = null; + } + if (this.generateModel) { + await this.generateModel.dispose(); + this.generateModel = null; + } + if (this.rerankModel) { + await this.rerankModel.dispose(); + this.rerankModel = null; + } + // Reset load promises so models can be reloaded later + this.embedModelLoadPromise = null; + this.generateModelLoadPromise = null; + this.rerankModelLoadPromise = null; } // Note: We keep llama instance alive - it's lightweight @@ -325,7 +353,7 @@ export class LlamaCpp implements LLM { */ private async ensureLlama(): Promise { if (!this.llama) { - this.llama = await getLlama({ logLevel: "error" }); + this.llama = await getLlama({ logLevel: LlamaLogLevel.error }); } return this.llama; } @@ -340,42 +368,107 @@ export class LlamaCpp implements LLM { } /** - * Load embedding model and context (lazy) + * Load embedding model (lazy) + */ + private async ensureEmbedModel(): Promise { + if (this.embedModel) { + return this.embedModel; + } + if (this.embedModelLoadPromise) { + return await this.embedModelLoadPromise; + } + + this.embedModelLoadPromise = (async () => { + const llama = await this.ensureLlama(); + const modelPath = await this.resolveModel(this.embedModelUri); + const model = await llama.loadModel({ modelPath }); + this.embedModel = model; + return model; + })(); + + try { + return await this.embedModelLoadPromise; + } finally { + // Keep the resolved model cached; clear only the in-flight promise. + this.embedModelLoadPromise = null; + } + } + + /** + * Load embedding context (lazy). Context can be disposed and recreated without reloading the model. */ private async ensureEmbedContext(): Promise { if (!this.embedContext) { - const llama = await this.ensureLlama(); - const modelPath = await this.resolveModel(this.embedModelUri); - this.embedModel = await llama.loadModel({ modelPath }); - this.embedContext = await this.embedModel.createEmbeddingContext(); + const model = await this.ensureEmbedModel(); + this.embedContext = await model.createEmbeddingContext(); } this.touchActivity(); return this.embedContext; } /** - * Load generation model and context (lazy) + * Load generation model (lazy) - context is created fresh per call */ - private async ensureGenerateContext(): Promise { - if (!this.generateContext) { - const llama = await this.ensureLlama(); - const modelPath = await this.resolveModel(this.generateModelUri); - this.generateModel = await llama.loadModel({ modelPath }); - this.generateContext = await this.generateModel.createContext(); + private async ensureGenerateModel(): Promise { + if (!this.generateModel) { + if (this.generateModelLoadPromise) { + return await this.generateModelLoadPromise; + } + + this.generateModelLoadPromise = (async () => { + const llama = await this.ensureLlama(); + const modelPath = await this.resolveModel(this.generateModelUri); + const model = await llama.loadModel({ modelPath }); + this.generateModel = model; + return model; + })(); + + try { + await this.generateModelLoadPromise; + } finally { + this.generateModelLoadPromise = null; + } } this.touchActivity(); - return this.generateContext; + if (!this.generateModel) { + throw new Error("Generate model not loaded"); + } + return this.generateModel; } /** - * Load rerank model and context (lazy) + * Load rerank model (lazy) + */ + private async ensureRerankModel(): Promise { + if (this.rerankModel) { + return this.rerankModel; + } + if (this.rerankModelLoadPromise) { + return await this.rerankModelLoadPromise; + } + + this.rerankModelLoadPromise = (async () => { + const llama = await this.ensureLlama(); + const modelPath = await this.resolveModel(this.rerankModelUri); + const model = await llama.loadModel({ modelPath }); + this.rerankModel = model; + return model; + })(); + + try { + return await this.rerankModelLoadPromise; + } finally { + this.rerankModelLoadPromise = null; + } + } + + /** + * Load rerank context (lazy). Context can be disposed and recreated without reloading the model. */ private async ensureRerankContext(): Promise>> { if (!this.rerankContext) { - const llama = await this.ensureLlama(); - const modelPath = await this.resolveModel(this.rerankModelUri); - this.rerankModel = await llama.loadModel({ modelPath }); - this.rerankContext = await this.rerankModel.createRankingContext(); + const model = await this.ensureRerankModel(); + this.rerankContext = await model.createRankingContext(); } this.touchActivity(); return this.rerankContext; @@ -387,9 +480,9 @@ export class LlamaCpp implements LLM { /** * Tokenize text using the embedding model's tokenizer - * Returns array of token IDs + * Returns tokenizer tokens (opaque type from node-llama-cpp) */ - async tokenize(text: string): Promise { + async tokenize(text: string): Promise { await this.ensureEmbedContext(); // Ensure model is loaded if (!this.embedModel) { throw new Error("Embed model not loaded"); @@ -408,7 +501,7 @@ export class LlamaCpp implements LLM { /** * Detokenize token IDs back to text */ - async detokenize(tokens: number[]): Promise { + async detokenize(tokens: readonly LlamaToken[]): Promise { await this.ensureEmbedContext(); if (!this.embedModel) { throw new Error("Embed model not loaded"); @@ -469,46 +562,35 @@ export class LlamaCpp implements LLM { } async generate(prompt: string, options: GenerateOptions = {}): Promise { - // Serialize generation calls to avoid "No sequences left" with single sequence - let unlock: () => void; - const waitForLock = this.generateLock; - this.generateLock = new Promise(resolve => { unlock = resolve; }); - await waitForLock; + // Ensure model is loaded + await this.ensureGenerateModel(); + // Create fresh context -> sequence -> session for each call + const context = await this.generateModel!.createContext(); + const sequence = context.getSequence(); + const session = new LlamaChatSession({ contextSequence: sequence }); + + const maxTokens = options.maxTokens ?? 150; + const temperature = options.temperature ?? 0; + + let result = ""; try { - const context = await this.ensureGenerateContext(); - const { LlamaChatSession } = await import("node-llama-cpp"); - const session = new LlamaChatSession({ - contextSequence: context.getSequence(), + await session.prompt(prompt, { + maxTokens, + temperature, + onTextChunk: (text) => { + result += text; + }, }); - const maxTokens = options.maxTokens ?? 150; - const temperature = options.temperature ?? 0; - - let result = ""; - try { - await session.prompt(prompt, { - maxTokens, - temperature, - onTextChunk: (text) => { - result += text; - }, - }); - } finally { - // Dispose session to release the sequence - await session.dispose(); - } - return { text: result, model: this.generateModelUri, done: true, }; - } catch (error) { - console.error("Generation error:", error); - return null; } finally { - unlock!(); + // Dispose context (which disposes dependent sequences/sessions per lifecycle rules) + await context.dispose(); } } @@ -573,7 +655,7 @@ Output exactly ${numVariations} variations, one per line, no numbering or bullet */ async expandQueryStructured(query: string, includeLexical: boolean = true): Promise { const llama = await this.ensureLlama(); - const context = await this.ensureGenerateContext(); + await this.ensureGenerateModel(); // Define JSON schema for structured output const schema = { @@ -592,7 +674,7 @@ Output exactly ${numVariations} variations, one per line, no numbering or bullet description: "Write a short passage (50-100 words) that directly answers the query as if from a relevant document" } }, - required: ["vectorQuery", "hyde"] as const + required: [] as const }; const grammar = await llama.createGrammarForJsonSchema(schema); @@ -607,25 +689,24 @@ Given a query, generate: Keep proper nouns exactly as written. Be concise.` : `You expand search queries for semantic search. Given a query, generate: -1. vectorQuery: Semantically rephrased query capturing the full intent -2. hyde: Write a brief example passage (50-100 words) that answers the query, as if excerpted from a relevant document +1. vectorQuery: Semantically rephrased query capturing the full intent (must be different from the original query) +2. HyDE: Write a brief example passage (50-100 words) that answers the query, as if excerpted from a relevant document -Keep proper nouns exactly as written. Be concise. Set lexicalQuery to empty string.`; +Keep proper nouns exactly as written. Be concise.`; const prompt = `Query: "${query}" Generate the structured expansion:`; - const { LlamaChatSession } = await import("node-llama-cpp"); - const session = new LlamaChatSession({ - contextSequence: context.getSequence(), - systemPrompt, - }); + // Create fresh context for each call + const context = await this.generateModel!.createContext(); + const sequence = context.getSequence(); + const session = new LlamaChatSession({ contextSequence: sequence, systemPrompt }); try { const result = await session.prompt(prompt, { grammar, - maxTokens: 300, + maxTokens: 500, temperature: 0, }); @@ -649,7 +730,8 @@ Generate the structured expansion:`; hyde: "", }; } finally { - await session.dispose(); + // Dispose context (disposes session too per lifecycle rules) + await context.dispose(); } } @@ -658,47 +740,34 @@ Generate the structured expansion:`; documents: RerankDocument[], options: RerankOptions = {} ): Promise { - try { - const context = await this.ensureRerankContext(); + const context = await this.ensureRerankContext(); - // Build a map from document text to original indices (for lookup after sorting) - const textToDoc = new Map(); - documents.forEach((doc, index) => { - textToDoc.set(doc.text, { file: doc.file, index }); - }); + // Build a map from document text to original indices (for lookup after sorting) + const textToDoc = new Map(); + documents.forEach((doc, index) => { + textToDoc.set(doc.text, { file: doc.file, index }); + }); - // Extract just the text for ranking - const texts = documents.map((doc) => doc.text); + // Extract just the text for ranking + const texts = documents.map((doc) => doc.text); - // Use the proper ranking API - returns [{document: string, score: number}] sorted by score - const ranked = await context.rankAndSort(query, texts); - - // Map back to our result format using the text-to-doc map - const results: RerankDocumentResult[] = ranked.map((item) => { - const docInfo = textToDoc.get(item.document)!; - return { - file: docInfo.file, - score: item.score, - index: docInfo.index, - }; - }); + // Use the proper ranking API - returns [{document: string, score: number}] sorted by score + const ranked = await context.rankAndSort(query, texts); + // Map back to our result format using the text-to-doc map + const results: RerankDocumentResult[] = ranked.map((item) => { + const docInfo = textToDoc.get(item.document)!; return { - results, - model: this.rerankModelUri, + file: docInfo.file, + score: item.score, + index: docInfo.index, }; - } catch (error) { - console.error("Rerank error:", error); - // Return documents in original order with zero scores on error - return { - results: documents.map((doc, index) => ({ - file: doc.file, - score: 0, - index, - })), - model: this.rerankModelUri, - }; - } + }); + + return { + results, + model: this.rerankModelUri, + }; } async dispose(): Promise { @@ -722,12 +791,16 @@ Generate the structured expansion:`; // Clear references this.embedContext = null; - this.generateContext = null; this.rerankContext = null; this.embedModel = null; this.generateModel = null; this.rerankModel = null; this.llama = null; + + // Clear any in-flight load promises + this.embedModelLoadPromise = null; + this.generateModelLoadPromise = null; + this.rerankModelLoadPromise = null; } } @@ -765,18 +838,3 @@ export async function disposeDefaultLlamaCpp(): Promise { } } -// ============================================================================= -// Legacy exports for backwards compatibility -// ============================================================================= - -// Keep Ollama as an alias for now during transition -export { LlamaCpp as Ollama }; -export type { LlamaCppConfig as OllamaConfig }; - -export function getDefaultOllama(): LlamaCpp { - return getDefaultLlamaCpp(); -} - -export function setDefaultOllama(llm: LlamaCpp | null): void { - setDefaultLlamaCpp(llm); -} diff --git a/src/mcp.test.ts b/src/mcp.test.ts index f955ca9..f15b8d4 100644 --- a/src/mcp.test.ts +++ b/src/mcp.test.ts @@ -10,7 +10,7 @@ import { Database } from "bun:sqlite"; import * as sqliteVec from "sqlite-vec"; import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; -import { setDefaultLlamaCpp, LlamaCpp } from "./llm"; +import { getDefaultLlamaCpp, disposeDefaultLlamaCpp } from "./llm"; import { mkdtemp, writeFile, readdir, unlink, rmdir } from "node:fs/promises"; import { join } from "node:path"; import { tmpdir } from "node:os"; @@ -25,6 +25,11 @@ let testDb: Database; let testDbPath: string; let testConfigDir: string; +afterAll(async () => { + // Ensure native resources are released to avoid ggml-metal asserts on process exit. + await disposeDefaultLlamaCpp(); +}); + function initTestDatabase(db: Database): void { sqliteVec.load(db); db.exec("PRAGMA journal_mode = WAL"); @@ -178,8 +183,9 @@ import { reciprocalRankFusion, extractSnippet, getContextForFile, - getDocument, - getMultipleDocuments, + findDocument, + getDocumentBody, + findDocuments, getStatus, DEFAULT_EMBED_MODEL, DEFAULT_QUERY_MODEL, @@ -197,7 +203,8 @@ import type { RankedResult } from "./store"; describe("MCP Server", () => { beforeAll(async () => { // LlamaCpp uses node-llama-cpp for local model inference (no HTTP mocking needed) - setDefaultLlamaCpp(new LlamaCpp()); + // Use shared singleton to avoid creating multiple instances with separate GPU resources + getDefaultLlamaCpp(); // Set up test config directory const configPrefix = join(tmpdir(), `qmd-mcp-config-${Date.now()}-${Math.random().toString(36).slice(2)}`); @@ -250,7 +257,7 @@ describe("MCP Server", () => { test("returns results for matching query", () => { const results = searchFTS(testDb, "readme", 10); expect(results.length).toBeGreaterThan(0); - expect(results[0].displayPath).toBe("docs/readme.md"); + expect(results[0]!.displayPath).toBe("docs/readme.md"); }); test("returns empty for non-matching query", () => { @@ -271,8 +278,8 @@ describe("MCP Server", () => { file: r.displayPath, title: r.title, score: Math.round(r.score * 100) / 100, - context: getContextForFile(testDb, r.file), - snippet: extractSnippet(r.body, "api", 300, r.chunkPos).snippet, + context: getContextForFile(testDb, r.filepath), + snippet: extractSnippet(r.body || "", "api", 300, r.chunkPos).snippet, })); // MCP now returns structuredContent with results array expect(filtered.length).toBeGreaterThan(0); @@ -345,7 +352,7 @@ describe("MCP Server", () => { ]; const reranked = await rerank("readme", docs, DEFAULT_RERANK_MODEL, testDb); expect(reranked.length).toBe(2); - expect(reranked[0].score).toBeGreaterThan(0); + expect(reranked[0]!.score).toBeGreaterThan(0); }); test("full hybrid search pipeline", async () => { @@ -390,29 +397,29 @@ describe("MCP Server", () => { describe("qmd_get tool", () => { test("retrieves document by display_path", () => { - const result = getDocument(testDb, "readme.md"); - expect("error" in result).toBe(false); - if (!("error" in result)) { - expect(result.displayPath).toBe("docs/readme.md"); - expect(result.body).toContain("Project README"); - } + const meta = findDocument(testDb, "readme.md", { includeBody: false }); + expect("error" in meta).toBe(false); + if ("error" in meta) return; + const body = getDocumentBody(testDb, meta) ?? ""; + + expect(meta.displayPath).toBe("docs/readme.md"); + expect(body).toContain("Project README"); }); test("retrieves document by filepath", () => { - const result = getDocument(testDb, "/test/docs/api.md"); - expect("error" in result).toBe(false); - if (!("error" in result)) { - expect(result.title).toBe("API Documentation"); - } + const meta = findDocument(testDb, "/test/docs/api.md", { includeBody: false }); + expect("error" in meta).toBe(false); + if ("error" in meta) return; + expect(meta.title).toBe("API Documentation"); }); test("retrieves document by partial path", () => { - const result = getDocument(testDb, "api.md"); + const result = findDocument(testDb, "api.md", { includeBody: false }); expect("error" in result).toBe(false); }); test("returns not found for missing document", () => { - const result = getDocument(testDb, "nonexistent.md"); + const result = findDocument(testDb, "nonexistent.md", { includeBody: false }); expect("error" in result).toBe(true); if ("error" in result) { expect(result.error).toBe("not_found"); @@ -420,7 +427,7 @@ describe("MCP Server", () => { }); test("suggests similar files when not found", () => { - const result = getDocument(testDb, "readm.md"); // typo + const result = findDocument(testDb, "readm.md", { includeBody: false }); // typo expect("error" in result).toBe(true); if ("error" in result) { expect(result.similarFiles.length).toBeGreaterThanOrEqual(0); @@ -428,37 +435,36 @@ describe("MCP Server", () => { }); test("supports line range with :line suffix", () => { - const result = getDocument(testDb, "readme.md:2", undefined, 2); - expect("error" in result).toBe(false); - if (!("error" in result)) { - const lines = result.body.split("\n"); - expect(lines.length).toBeLessThanOrEqual(2); - } + const meta = findDocument(testDb, "readme.md:2", { includeBody: false }); + expect("error" in meta).toBe(false); + if ("error" in meta) return; + const body = getDocumentBody(testDb, meta, 2, 2) ?? ""; + const lines = body.split("\n"); + expect(lines.length).toBeLessThanOrEqual(2); }); test("supports fromLine parameter", () => { - const result = getDocument(testDb, "readme.md", 3); - expect("error" in result).toBe(false); - if (!("error" in result)) { - expect(result.body).not.toContain("# Project README"); - } + const meta = findDocument(testDb, "readme.md", { includeBody: false }); + expect("error" in meta).toBe(false); + if ("error" in meta) return; + const body = getDocumentBody(testDb, meta, 3) ?? ""; + expect(body).not.toContain("# Project README"); }); test("supports maxLines parameter", () => { - const result = getDocument(testDb, "api.md", 1, 3); - expect("error" in result).toBe(false); - if (!("error" in result)) { - const lines = result.body.split("\n"); - expect(lines.length).toBeLessThanOrEqual(3); - } + const meta = findDocument(testDb, "api.md", { includeBody: false }); + expect("error" in meta).toBe(false); + if ("error" in meta) return; + const body = getDocumentBody(testDb, meta, 1, 3) ?? ""; + const lines = body.split("\n"); + expect(lines.length).toBeLessThanOrEqual(3); }); test("includes context for documents in context path", () => { - const result = getDocument(testDb, "meetings/meeting-2024-01.md"); + const result = findDocument(testDb, "meetings/meeting-2024-01.md", { includeBody: false }); expect("error" in result).toBe(false); - if (!("error" in result)) { - expect(result.context).toBe("Meeting notes and transcripts"); - } + if ("error" in result) return; + expect(result.context).toBe("Meeting notes and transcripts"); }); }); @@ -468,59 +474,65 @@ describe("MCP Server", () => { describe("qmd_multi_get tool", () => { test("retrieves multiple documents by glob pattern", () => { - const { files, errors } = getMultipleDocuments(testDb, "meetings/*.md"); + const { docs, errors } = findDocuments(testDb, "meetings/*.md", { includeBody: true }); expect(errors.length).toBe(0); - expect(files.length).toBe(2); - expect(files.some(f => f.displayPath === "docs/meetings/meeting-2024-01.md")).toBe(true); - expect(files.some(f => f.displayPath === "docs/meetings/meeting-2024-02.md")).toBe(true); + expect(docs.length).toBe(2); + const paths = docs.map(d => d.doc.displayPath); + expect(paths).toContain("docs/meetings/meeting-2024-01.md"); + expect(paths).toContain("docs/meetings/meeting-2024-02.md"); }); test("retrieves documents by comma-separated list", () => { - const { files, errors } = getMultipleDocuments(testDb, "readme.md, api.md"); + const { docs, errors } = findDocuments(testDb, "readme.md, api.md", { includeBody: true }); expect(errors.length).toBe(0); - expect(files.length).toBe(2); + expect(docs.length).toBe(2); }); test("returns errors for missing files in comma list", () => { - const { files, errors } = getMultipleDocuments(testDb, "readme.md, nonexistent.md"); - expect(files.length).toBe(1); + const { docs, errors } = findDocuments(testDb, "readme.md, nonexistent.md", { includeBody: true }); + expect(docs.length).toBe(1); expect(errors.length).toBe(1); expect(errors[0]).toContain("not found"); }); test("skips files larger than maxBytes", () => { - const { files } = getMultipleDocuments(testDb, "*.md", undefined, 1000); // 1KB limit - const largeFile = files.find(f => f.displayPath === "docs/large-file.md"); - expect(largeFile).toBeDefined(); - expect(largeFile?.skipped).toBe(true); - if (largeFile?.skipped) { - expect(largeFile.skipReason).toContain("too large"); - } + const { docs } = findDocuments(testDb, "*.md", { includeBody: true, maxBytes: 1000 }); // 1KB limit + const large = docs.find(d => d.doc.displayPath === "docs/large-file.md"); + expect(large).toBeDefined(); + expect(large?.skipped).toBe(true); + if (large?.skipped) expect(large.skipReason).toContain("too large"); }); test("respects maxLines parameter", () => { - const { files } = getMultipleDocuments(testDb, "readme.md", 2); - expect(files.length).toBe(1); - if (!files[0].skipped) { - const lines = files[0].body.split("\n"); - // maxLines + truncation message - expect(lines.length).toBeLessThanOrEqual(4); + const { docs } = findDocuments(testDb, "readme.md", { includeBody: true, maxBytes: DEFAULT_MULTI_GET_MAX_BYTES }); + expect(docs.length).toBe(1); + const d = docs[0]!; + expect(d.skipped).toBe(false); + if (d.skipped) return; + if (!("body" in d.doc)) { + throw new Error("Expected body to be included in findDocuments result"); } + const lines = (d.doc.body || "").split("\n").slice(0, 2); + expect(lines.length).toBeLessThanOrEqual(2); }); test("returns error for non-matching glob", () => { - const { files, errors } = getMultipleDocuments(testDb, "nonexistent/*.md"); - expect(files.length).toBe(0); + const { docs, errors } = findDocuments(testDb, "nonexistent/*.md", { includeBody: true }); + expect(docs.length).toBe(0); expect(errors.length).toBe(1); expect(errors[0]).toContain("No files matched"); }); test("includes context in results", () => { - const { files } = getMultipleDocuments(testDb, "meetings/meeting-2024-01.md"); - expect(files.length).toBe(1); - if (!files[0].skipped) { - expect(files[0].context).toBe("Meeting notes and transcripts"); + const { docs } = findDocuments(testDb, "meetings/meeting-2024-01.md", { includeBody: true }); + expect(docs.length).toBe(1); + const d = docs[0]!; + expect(d.skipped).toBe(false); + if (d.skipped) return; + if (!("context" in d.doc)) { + throw new Error("Expected context to be present on document result"); } + expect(d.doc.context).toBe("Meeting notes and transcripts"); }); }); @@ -534,7 +546,7 @@ describe("MCP Server", () => { expect(status.totalDocuments).toBe(5); expect(status.hasVectorIndex).toBe(true); expect(status.collections.length).toBe(1); - expect(status.collections[0].path).toBe("/test/docs"); + expect(status.collections[0]!.path).toBe("/test/docs"); }); test("shows documents needing embedding", () => { @@ -816,12 +828,12 @@ QMD is your on-device search engine for markdown knowledge bases.`; file: r.displayPath, title: r.title, score: Math.round(r.score * 100) / 100, - context: getContextForFile(testDb, r.file), - snippet: extractSnippet(r.body, "readme", 300, r.chunkPos).snippet, + context: getContextForFile(testDb, r.filepath), + snippet: extractSnippet(r.body || "", "readme", 300, r.chunkPos).snippet, })); expect(structured.length).toBeGreaterThan(0); - const item = structured[0]; + const item = structured[0]!; expect(typeof item.file).toBe("string"); expect(typeof item.title).toBe("string"); expect(typeof item.score).toBe("number"); @@ -837,25 +849,25 @@ QMD is your on-device search engine for markdown knowledge bases.`; isError: true, }; expect(errorResponse.isError).toBe(true); - expect(errorResponse.content[0].type).toBe("text"); + expect(errorResponse.content[0]!.type).toBe("text"); }); test("embedded resources include name and title", () => { // Simulate what qmd_get returns - const result = getDocument(testDb, "readme.md"); - expect("error" in result).toBe(false); - if (!("error" in result)) { - const resource = { - uri: `qmd://${result.displayPath}`, - name: result.displayPath, - title: result.title, - mimeType: "text/markdown", - text: result.body, - }; - expect(resource.name).toBe("docs/readme.md"); - expect(resource.title).toBe("Project README"); - expect(resource.mimeType).toBe("text/markdown"); - } + const meta = findDocument(testDb, "readme.md", { includeBody: false }); + expect("error" in meta).toBe(false); + if ("error" in meta) return; + const body = getDocumentBody(testDb, meta) ?? ""; + const resource = { + uri: `qmd://${meta.displayPath}`, + name: meta.displayPath, + title: meta.title, + mimeType: "text/markdown", + text: body, + }; + expect(resource.name).toBe("docs/readme.md"); + expect(resource.title).toBe("Project README"); + expect(resource.mimeType).toBe("text/markdown"); }); test("status response includes structuredContent", () => { @@ -866,7 +878,7 @@ QMD is your on-device search engine for markdown knowledge bases.`; expect(typeof status.hasVectorIndex).toBe("boolean"); expect(Array.isArray(status.collections)).toBe(true); if (status.collections.length > 0) { - const col = status.collections[0]; + const col = status.collections[0]!; expect(typeof col.name).toBe("string"); // Collections now use names, not IDs expect(typeof col.path).toBe("string"); expect(typeof col.pattern).toBe("string"); diff --git a/src/mcp.ts b/src/mcp.ts index 78b80b1..6a647ca 100644 --- a/src/mcp.ts +++ b/src/mcp.ts @@ -457,7 +457,16 @@ You can also access documents directly via the \`qmd://\` URI scheme: }, }, async ({ file, fromLine, maxLines, lineNumbers }) => { - const result = store.getDocument(file, fromLine, maxLines); + // Support :line suffix in `file` (e.g. "foo.md:120") when fromLine isn't provided + let parsedFromLine = fromLine; + let lookup = file; + const colonMatch = lookup.match(/:(\d+)$/); + if (colonMatch && parsedFromLine === undefined) { + parsedFromLine = parseInt(colonMatch[1], 10); + lookup = lookup.slice(0, -colonMatch[0].length); + } + + const result = store.findDocument(lookup, { includeBody: false }); if ("error" in result) { let msg = `Document not found: ${file}`; @@ -470,9 +479,10 @@ You can also access documents directly via the \`qmd://\` URI scheme: }; } - let text = result.body; + const body = store.getDocumentBody(result, parsedFromLine, maxLines) ?? ""; + let text = body; if (lineNumbers) { - const startLine = fromLine || 1; + const startLine = parsedFromLine || 1; text = addLineNumbers(text, startLine); } if (result.context) { @@ -511,9 +521,9 @@ You can also access documents directly via the \`qmd://\` URI scheme: }, }, async ({ pattern, maxLines, maxBytes, lineNumbers }) => { - const { files, errors } = store.getMultipleDocuments(pattern, maxLines, maxBytes || DEFAULT_MULTI_GET_MAX_BYTES); + const { docs, errors } = store.findDocuments(pattern, { includeBody: true, maxBytes: maxBytes || DEFAULT_MULTI_GET_MAX_BYTES }); - if (files.length === 0 && errors.length === 0) { + if (docs.length === 0 && errors.length === 0) { return { content: [{ type: "text", text: `No files matched pattern: ${pattern}` }], isError: true, @@ -526,29 +536,36 @@ You can also access documents directly via the \`qmd://\` URI scheme: content.push({ type: "text", text: `Errors:\n${errors.join('\n')}` }); } - for (const file of files) { - if (file.skipped) { + for (const result of docs) { + if (result.skipped) { content.push({ type: "text", - text: `[SKIPPED: ${file.displayPath} - ${file.skipReason}. Use 'qmd_get' with file="${file.displayPath}" to retrieve.]`, + text: `[SKIPPED: ${result.doc.displayPath} - ${result.skipReason}. Use 'qmd_get' with file="${result.doc.displayPath}" to retrieve.]`, }); continue; } - let text = file.body; + let text = result.doc.body || ""; + if (maxLines !== undefined) { + const lines = text.split("\n"); + text = lines.slice(0, maxLines).join("\n"); + if (lines.length > maxLines) { + text += `\n\n[... truncated ${lines.length - maxLines} more lines]`; + } + } if (lineNumbers) { text = addLineNumbers(text); } - if (file.context) { - text = `\n\n` + text; + if (result.doc.context) { + text = `\n\n` + text; } content.push({ type: "resource", resource: { - uri: `qmd://${encodeQmdPath(file.displayPath)}`, - name: file.displayPath, - title: file.title, + uri: `qmd://${encodeQmdPath(result.doc.displayPath)}`, + name: result.doc.displayPath, + title: result.doc.title, mimeType: "text/markdown", text, }, diff --git a/src/qmd.ts b/src/qmd.ts index 45a1c4d..4ce105a 100755 --- a/src/qmd.ts +++ b/src/qmd.ts @@ -4,14 +4,10 @@ import { Glob, $ } from "bun"; import { parseArgs } from "util"; import * as sqliteVec from "sqlite-vec"; import { - getDb, - closeDb, - getDbPath, getPwd, getRealPath, homedir, resolve, - setCustomIndexName, enableProductionMode, searchFTS, searchVec, @@ -28,8 +24,6 @@ import { getHashesForEmbedding, clearAllEmbeddings, insertEmbedding, - getDocument as storeGetDocument, - getMultipleDocuments as storeMultiGetDocuments, getStatus, hashContent, extractTitle, @@ -37,7 +31,6 @@ import { formatQueryForEmbedding, chunkDocument, chunkDocumentByTokens, - ensureVecTable, clearCache, getCacheKey, getCachedResult, @@ -59,7 +52,6 @@ import { deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, - cleanupDuplicateCollections, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, @@ -69,6 +61,8 @@ import { DEFAULT_RERANK_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, + createStore, + getDefaultDbPath, } from "./store.js"; import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, type RerankDocument, type ExpandedQuery } from "./llm.js"; import type { SearchResult, RankedResult } from "./store.js"; @@ -92,6 +86,46 @@ import { // Tests must set INDEX_PATH or use createStore() with explicit path enableProductionMode(); +// ============================================================================= +// Store/DB lifecycle (no legacy singletons in store.ts) +// ============================================================================= + +let store: ReturnType | null = null; +let storeDbPathOverride: string | undefined; + +function getStore(): ReturnType { + if (!store) { + store = createStore(storeDbPathOverride); + } + return store; +} + +function getDb(): Database { + return getStore().db; +} + +function closeDb(): void { + if (store) { + store.close(); + store = null; + } +} + +function getDbPath(): string { + return store?.dbPath ?? storeDbPathOverride ?? getDefaultDbPath(); +} + +function setIndexName(name: string | null): void { + storeDbPathOverride = name ? getDefaultDbPath(name) : undefined; + // Reset open handle so next use opens the new index + closeDb(); +} + +function ensureVecTable(_db: Database, dimensions: number): void { + // Store owns the DB; ignore `_db` and ensure vec table on the active store + getStore().ensureVecTable(dimensions); +} + // Terminal colors (respects NO_COLOR env) const useColor = !process.env.NO_COLOR && process.stdout.isTTY; const c = { @@ -239,8 +273,8 @@ function showStatus(): void { const dbPath = getDbPath(); const db = getDb(); - // Cleanup any duplicate collections - cleanupDuplicateCollections(db); + // Collections are defined in YAML; no duplicate cleanup needed. + // Collections are defined in YAML; no duplicate cleanup needed. // Index size let indexSize = 0; @@ -336,7 +370,7 @@ function showStatus(): void { async function updateCollections(): Promise { const db = getDb(); - cleanupDuplicateCollections(db); + // Collections are defined in YAML; no duplicate cleanup needed. // Clear Ollama cache on update clearCache(db); @@ -1679,47 +1713,6 @@ type OutputOptions = { lineNumbers?: boolean; // Add line numbers to output }; -// Extract snippet with more context lines for CLI display -function extractSnippetWithContext(body: string, query: string, contextLines = 3, chunkPos?: number): { line: number; snippet: string; hasMatch: boolean } { - // If chunkPos provided, focus search on that area - let lineOffset = 0; - let searchBody = body; - if (chunkPos && chunkPos > 0) { - const contextStart = Math.max(0, chunkPos - 200); - searchBody = body.slice(contextStart); - if (contextStart > 0) { - lineOffset = body.slice(0, contextStart).split('\n').length - 1; - } - } - - const lines = searchBody.split('\n'); - const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 0); - let bestLine = 0, bestScore = -1; - - for (let i = 0; i < lines.length; i++) { - const lineLower = lines[i].toLowerCase(); - let score = 0; - for (const term of queryTerms) { - if (lineLower.includes(term)) score++; - } - if (score > bestScore) { - bestScore = score; - bestLine = i; - } - } - - // No query match found - return beginning of chunk area or file - if (bestScore <= 0) { - const preview = lines.slice(0, contextLines * 2).join('\n').trim(); - return { line: lineOffset + 1, snippet: preview, hasMatch: false }; - } - - const startLine = Math.max(0, bestLine - contextLines); - const endLine = Math.min(lines.length, bestLine + contextLines + 1); - const snippet = lines.slice(startLine, endLine).join('\n').trim(); - return { line: lineOffset + bestLine + 1, snippet, hasMatch: true }; -} - // Highlight query terms in text (skip short words < 3 chars) function highlightTerms(text: string, query: string): string { if (!useColor) return text; @@ -1798,11 +1791,14 @@ function outputResults(results: { file: string; displayPath: string; title: stri } else if (opts.format === "cli") { for (let i = 0; i < filtered.length; i++) { const row = filtered[i]; - const { line, snippet, hasMatch } = extractSnippetWithContext(row.body, query, 2, row.chunkPos); + const { line, snippet } = extractSnippet(row.body, query, 500, row.chunkPos); const docid = row.docid || (row.hash ? row.hash.slice(0, 6) : undefined); // Line 1: filepath with docid const path = toQmdPath(row.displayPath); + // Only show :line if we actually found a term match in the snippet body (exclude header line). + const snippetBody = snippet.split("\n").slice(1).join("\n").toLowerCase(); + const hasMatch = query.toLowerCase().split(/\s+/).some(t => t.length > 0 && snippetBody.includes(t)); const lineInfo = hasMatch ? `:${line}` : ""; const docidStr = docid ? ` ${c.dim}#${docid}${c.reset}` : ""; console.log(`${c.cyan}${path}${c.dim}${lineInfo}${c.reset}${docidStr}`); @@ -1822,7 +1818,7 @@ function outputResults(results: { file: string; displayPath: string; title: stri console.log(`Score: ${c.bold}${score}${c.reset}`); console.log(); - // Snippet with highlighting (no leading | chars for better word wrap) + // Snippet with highlighting (diff-style header included) let displaySnippet = opts.lineNumbers ? addLineNumbers(snippet, line) : snippet; const highlighted = highlightTerms(displaySnippet, query); console.log(highlighted); @@ -2009,7 +2005,6 @@ async function expandQueryStructured(query: string, includeLexical: boolean = tr return expanded; } -// Legacy wrapper for backward compatibility async function expandQuery(query: string, _model: string = DEFAULT_QUERY_MODEL, _db?: Database): Promise { const expanded = await expandQueryStructured(query, true); const queries = [query]; @@ -2041,15 +2036,25 @@ async function querySearch(query: string, opts: OutputOptions, embedModel: strin const hasVectors = !!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get(); // Check if initial results have strong signals (skip expansion if so) - // Strong signal = top result has high normalized score (> 0.7) - const hasStrongSignal = initialFts.length > 0 && initialFts[0].score > 0.7; + // Strong signal = top result is strong AND clearly separated from runner-up. + // This avoids skipping expansion when BM25 has lots of mediocre matches. + const topScore = initialFts[0]?.score ?? 0; + const secondScore = initialFts[1]?.score ?? 0; + const hasStrongSignal = initialFts.length > 0 && topScore >= 0.85 && (topScore - secondScore) >= 0.15; let ftsQueries: string[] = [query]; let vectorQueries: string[] = [query]; if (hasStrongSignal) { // Strong BM25 signal - skip expensive LLM expansion - process.stderr.write(`${c.dim}Strong BM25 signal (${initialFts[0].score.toFixed(2)}) - skipping expansion${c.reset}\n`); + process.stderr.write(`${c.dim}Strong BM25 signal (${topScore.toFixed(2)}) - skipping expansion${c.reset}\n`); + // Still log the "expansion tree" in the same style as vsearch for consistency. + { + const lines: string[] = []; + lines.push(`${c.dim}├─ ${query} · (lexical+vector)${c.reset}`); + lines[lines.length - 1] = lines[lines.length - 1].replace('├─', '└─'); + for (const line of lines) process.stderr.write(line + '\n'); + } } else { // Weak signal - expand query for better recall const expanded = await expandQueryStructured(query, true); @@ -2102,7 +2107,9 @@ async function querySearch(query: string, opts: OutputOptions, embedModel: strin // Give 2x weight to original query results (first 2 lists: FTS + vector) const weights = rankedLists.map((_, i) => i < 2 ? 2.0 : 1.0); const fused = reciprocalRankFusion(rankedLists, weights); - const candidates = fused.slice(0, 30); // Over-retrieve for reranking + // Hard cap reranking for latency/cost. We rerank per-document (best chunk only). + const RERANK_DOC_LIMIT = 40; + const candidates = fused.slice(0, RERANK_DOC_LIMIT); if (candidates.length === 0) { console.log("No results found."); @@ -2112,69 +2119,44 @@ async function querySearch(query: string, opts: OutputOptions, embedModel: strin // Rerank multiple chunks per document, then aggregate scores // This improves ranking for long documents where keyword-matched chunk isn't always best - const MAX_CHUNKS_PER_DOC = 3; + // We only rerank ONE chunk per document (best chunk by a simple keyword heuristic), + // so we never rerank more than RERANK_DOC_LIMIT items. const chunksToRerank: { file: string; text: string; chunkIdx: number }[] = []; - const docChunkMap = new Map(); + const docChunkMap = new Map(); + const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2); for (const c of candidates) { const chunks = chunkDocument(c.body); - if (chunks.length <= MAX_CHUNKS_PER_DOC) { - // Small document - rerank all chunks - for (let i = 0; i < chunks.length; i++) { - chunksToRerank.push({ file: c.file, text: chunks[i].text, chunkIdx: i }); - } - docChunkMap.set(c.file, { chunks, selectedIndices: chunks.map((_, i) => i) }); - } else { - // Score all chunks by keyword match, select top MAX_CHUNKS_PER_DOC - const queryTerms = query.toLowerCase().split(/\s+/).filter(t => t.length > 2); - const scored = chunks.map((chunk, idx) => { - const chunkLower = chunk.text.toLowerCase(); - const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0); - return { idx, score }; - }); - scored.sort((a, b) => b.score - a.score); - const selectedIndices = scored.slice(0, MAX_CHUNKS_PER_DOC).map(s => s.idx); + if (chunks.length === 0) continue; - for (const idx of selectedIndices) { - chunksToRerank.push({ file: c.file, text: chunks[idx].text, chunkIdx: idx }); + // Choose best chunk by keyword matches; fall back to first chunk. + let bestIdx = 0; + let bestScore = -1; + for (let i = 0; i < chunks.length; i++) { + const chunkLower = chunks[i]!.text.toLowerCase(); + const score = queryTerms.reduce((acc, term) => acc + (chunkLower.includes(term) ? 1 : 0), 0); + if (score > bestScore) { + bestScore = score; + bestIdx = i; } - docChunkMap.set(c.file, { chunks, selectedIndices }); } + + chunksToRerank.push({ file: c.file, text: chunks[bestIdx]!.text, chunkIdx: bestIdx }); + docChunkMap.set(c.file, { chunks, bestIdx }); } - // Rerank all selected chunks (with caching) - // Use file:chunkIdx as unique identifier for reranker + // Rerank selected chunks (with caching). One chunk per doc -> one rerank item per doc. const reranked = await rerank( query, - chunksToRerank.map(c => ({ file: `${c.file}:${c.chunkIdx}`, text: c.text })), + chunksToRerank.map(c => ({ file: c.file, text: c.text })), rerankModel, db ); - // Aggregate chunk scores back to document level using top-2 average - // (or max if only 1 chunk) - this balances best chunk with consistency - const docScores = new Map(); - for (const r of reranked) { - const [file, chunkIdxStr] = r.file.split(/:(\d+)$/); - const chunkIdx = parseInt(chunkIdxStr || "0"); - const existing = docScores.get(file); - if (existing) { - existing.scores.push(r.score); - if (r.score > (existing.scores[0] || 0)) { - existing.bestChunkIdx = chunkIdx; - } - } else { - docScores.set(file, { scores: [r.score], bestChunkIdx: chunkIdx }); - } - } - - // Compute aggregated score: top-2 average (rewards consistency across chunks) const aggregatedScores = new Map(); - for (const [file, { scores, bestChunkIdx }] of docScores) { - scores.sort((a, b) => b - a); - const topScores = scores.slice(0, 2); - const avgScore = topScores.reduce((a, b) => a + b, 0) / topScores.length; - aggregatedScores.set(file, { score: avgScore, bestChunkIdx }); + for (const r of reranked) { + const chunkInfo = docChunkMap.get(r.file); + aggregatedScores.set(r.file, { score: r.score, bestChunkIdx: chunkInfo?.bestIdx ?? 0 }); } // Blend RRF position score with aggregated reranker score using position-aware weights @@ -2201,8 +2183,8 @@ async function querySearch(query: string, opts: OutputOptions, embedModel: strin const candidate = candidateMap.get(file); // Use the best-scoring chunk's text for the body (better for snippets) const chunkInfo = docChunkMap.get(file); - const chunkBody = chunkInfo ? chunkInfo.chunks[bestChunkIdx]?.text || chunkInfo.chunks[0].text : candidate?.body || ""; - const chunkPos = chunkInfo ? chunkInfo.chunks[bestChunkIdx]?.pos || 0 : 0; + const chunkBody = chunkInfo ? (chunkInfo.chunks[bestChunkIdx]?.text || chunkInfo.chunks[0]!.text) : candidate?.body || ""; + const chunkPos = chunkInfo ? (chunkInfo.chunks[bestChunkIdx]?.pos || 0) : 0; return { file, displayPath: candidate?.displayPath || "", @@ -2263,9 +2245,9 @@ function parseCLI() { strict: false, // Allow unknown options to pass through }); - // Set global index name in store + // Select index name (default: "index") if (values.index) { - setCustomIndexName(values.index); + setIndexName(values.index); } // Determine output format @@ -2443,26 +2425,6 @@ switch (cli.command) { break; } - // Legacy alias for backwards compatibility - case "add-context": { - console.error(`${c.yellow}Note: 'qmd add-context' is deprecated. Use 'qmd context add' instead.${c.reset}`); - if (cli.args.length === 0) { - console.error("Usage: qmd context add [path] \"text\""); - process.exit(1); - } - let pathArg: string | undefined; - let contextText: string; - if (cli.args.length === 1) { - pathArg = undefined; - contextText = cli.args[0]; - } else { - pathArg = cli.args[0]; - contextText = cli.args.slice(1).join(" "); - } - await contextAdd(pathArg, contextText); - break; - } - case "get": { if (!cli.args[0]) { console.error("Usage: qmd get [:line] [--from ] [-l ] [--line-numbers]"); diff --git a/src/store.test.ts b/src/store.test.ts index 24ce20b..12d977d 100644 --- a/src/store.test.ts +++ b/src/store.test.ts @@ -12,6 +12,7 @@ import { unlink, mkdtemp, rmdir, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; import YAML from "yaml"; +import { disposeDefaultLlamaCpp } from "./llm.js"; import { createStore, getDefaultDbPath, @@ -218,6 +219,9 @@ beforeAll(async () => { }); afterAll(async () => { + // Ensure native resources are released to avoid ggml-metal asserts on process exit. + await disposeDefaultLlamaCpp(); + try { // Clean up test directory const { readdir, unlink } = await import("node:fs/promises"); @@ -1256,43 +1260,6 @@ describe("Document Retrieval", () => { }); }); - describe("Legacy getDocument", () => { - test("getDocument returns document with body", async () => { - const store = await createTestStore(); - const collectionName = await createTestCollection({ pwd: "/path" }); - await insertTestDocument(store.db, collectionName, { - name: "mydoc", - displayPath: "mydoc.md", - body: "Document body", - }); - - const result = store.getDocument("/path/mydoc.md"); - expect("error" in result).toBe(false); - if (!("error" in result)) { - expect(result.body).toBe("Document body"); - } - - await cleanupTestDb(store); - }); - - test("getDocument supports line range from :line suffix", async () => { - const store = await createTestStore(); - const collectionName = await createTestCollection({ pwd: "/path" }); - await insertTestDocument(store.db, collectionName, { - name: "mydoc", - displayPath: "mydoc.md", - body: "Line 1\nLine 2\nLine 3\nLine 4", - }); - - const result = store.getDocument("mydoc.md:2", undefined, 2); - expect("error" in result).toBe(false); - if (!("error" in result)) { - expect(result.body).toBe("Line 2\nLine 3"); - } - - await cleanupTestDb(store); - }); - }); }); // ============================================================================= @@ -1798,77 +1765,6 @@ describe("Integration", () => { }); }); -// ============================================================================= -// Legacy Compatibility Tests -// ============================================================================= - -describe("Legacy Compatibility", () => { - test("getMultipleDocuments returns files with body", async () => { - const store = await createTestStore(); - const collectionName = await createTestCollection(); - - await insertTestDocument(store.db, collectionName, { - name: "doc1", - filepath: "/path/doc1.md", - displayPath: "doc1.md", - body: "Content 1", - }); - await insertTestDocument(store.db, collectionName, { - name: "doc2", - filepath: "/path/doc2.md", - displayPath: "doc2.md", - body: "Content 2", - }); - - const { files, errors } = store.getMultipleDocuments("*.md"); - expect(errors).toHaveLength(0); - expect(files).toHaveLength(2); - expect(files[0].body).toBeTruthy(); - expect(files[1].body).toBeTruthy(); - - await cleanupTestDb(store); - }); - - test("getMultipleDocuments truncates with maxLines", async () => { - const store = await createTestStore(); - const collectionName = await createTestCollection(); - - await insertTestDocument(store.db, collectionName, { - name: "doc1", - filepath: "/path/doc1.md", - displayPath: "doc1.md", - body: "Line 1\nLine 2\nLine 3\nLine 4\nLine 5", - }); - - const { files } = store.getMultipleDocuments("doc1.md", 2); - expect(files).toHaveLength(1); - expect(files[0].skipped).toBe(false); - if (!files[0].skipped) { - expect(files[0].body).toBe("Line 1\nLine 2\n\n[... truncated 3 more lines]"); - } - - await cleanupTestDb(store); - }); - - test("getMultipleDocuments skips large files", async () => { - const store = await createTestStore(); - const collectionName = await createTestCollection(); - - await insertTestDocument(store.db, collectionName, { - name: "large", - filepath: "/path/large.md", - displayPath: "large.md", - body: "x".repeat(15000), - }); - - const { files } = store.getMultipleDocuments("large.md", undefined, 10000); - expect(files).toHaveLength(1); - expect(files[0].skipped).toBe(true); - - await cleanupTestDb(store); - }); -}); - // ============================================================================= // LlamaCpp Integration Tests (using real local models) // ============================================================================= @@ -1927,7 +1823,7 @@ describe("LlamaCpp Integration", () => { expect(queries.length).toBeGreaterThanOrEqual(1); await cleanupTestDb(store); - }); + }, 30000); test("expandQuery caches results", async () => { const store = await createTestStore(); @@ -1940,7 +1836,7 @@ describe("LlamaCpp Integration", () => { expect(queries1[0]).toBe(queries2[0]); await cleanupTestDb(store); - }); + }, 30000); test("rerank scores documents", async () => { const store = await createTestStore(); diff --git a/src/store.ts b/src/store.ts index 0a86615..ffc8bef 100644 --- a/src/store.ts +++ b/src/store.ts @@ -63,7 +63,10 @@ export function homedir(): string { } export function resolve(...paths: string[]): string { - let result = paths[0].startsWith('/') ? '' : Bun.env.PWD || process.cwd(); + if (paths.length === 0) { + throw new Error("resolve: at least one path segment is required"); + } + let result = paths[0]!.startsWith('/') ? '' : Bun.env.PWD || process.cwd(); for (const p of paths) { if (p.startsWith('/')) { result = p; @@ -175,10 +178,10 @@ export function parseVirtualPath(virtualPath: string): VirtualPath | null { // Match: qmd://collection-name[/optional-path] // Allows: qmd://name, qmd://name/, qmd://name/path const match = normalized.match(/^qmd:\/\/([^\/]+)\/?(.*)$/); - if (!match) return null; + if (!match?.[1]) return null; return { collectionName: match[1], - path: match[2] || '', // Empty string for collection root + path: match[2] ?? '', // Empty string for collection root }; } @@ -309,7 +312,7 @@ function initializeDatabase(db: Database): void { db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash)`); db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_path ON documents(path, active)`); - // Cache table for LLM API calls (table name kept for backwards compatibility) + // Cache table for LLM API calls db.exec(` CREATE TABLE IF NOT EXISTS llm_cache ( hash TEXT PRIMARY KEY, @@ -390,7 +393,8 @@ function ensureVecTableInternal(db: Database, dimensions: number): void { const match = tableInfo.sql.match(/float\[(\d+)\]/); const hasHashSeq = tableInfo.sql.includes('hash_seq'); const hasCosine = tableInfo.sql.includes('distance_metric=cosine'); - if (match && parseInt(match[1]) === dimensions && hasHashSeq && hasCosine) return; + const existingDims = match?.[1] ? parseInt(match[1], 10) : null; + if (existingDims === dimensions && hasHashSeq && hasCosine) return; // Table exists but wrong schema - need to rebuild db.exec("DROP TABLE IF EXISTS vectors_vec"); } @@ -423,7 +427,6 @@ export type Store = { deleteInactiveDocuments: () => number; cleanupOrphanedContent: () => number; cleanupOrphanedVectors: () => number; - cleanupDuplicateCollections: () => number; vacuumDatabase: () => void; // Context @@ -453,10 +456,6 @@ export type Store = { getDocumentBody: (doc: DocumentResult | { filepath: string }, fromLine?: number, maxLines?: number) => string | null; findDocuments: (pattern: string, options?: { includeBody?: boolean; maxBytes?: number }) => { docs: MultiGetResult[]; errors: string[] }; - // Legacy compatibility - getDocument: (filename: string, fromLine?: number, maxLines?: number) => (DocumentResult & { body: string }) | DocumentNotFound; - getMultipleDocuments: (pattern: string, maxLines?: number, maxBytes?: number) => { files: MultiGetFile[]; errors: string[] }; - // Fuzzy matching and docid lookup findSimilarFiles: (query: string, maxDistance?: number, limit?: number) => string[]; matchFilesByGlob: (pattern: string) => { filepath: string; displayPath: string; bodyLength: number }[]; @@ -511,7 +510,6 @@ export function createStore(dbPath?: string): Store { deleteInactiveDocuments: () => deleteInactiveDocuments(db), cleanupOrphanedContent: () => cleanupOrphanedContent(db), cleanupOrphanedVectors: () => cleanupOrphanedVectors(db), - cleanupDuplicateCollections: () => cleanupDuplicateCollections(db), vacuumDatabase: () => vacuumDatabase(db), // Context @@ -541,10 +539,6 @@ export function createStore(dbPath?: string): Store { getDocumentBody: (doc: DocumentResult | { filepath: string }, fromLine?: number, maxLines?: number) => getDocumentBody(db, doc, fromLine, maxLines), findDocuments: (pattern: string, options?: { includeBody?: boolean; maxBytes?: number }) => findDocuments(db, pattern, options), - // Legacy compatibility - getDocument: (filename: string, fromLine?: number, maxLines?: number) => getDocument(db, filename, fromLine, maxLines), - getMultipleDocuments: (pattern: string, maxLines?: number, maxBytes?: number) => getMultipleDocuments(db, pattern, maxLines, maxBytes), - // Fuzzy matching and docid lookup findSimilarFiles: (query: string, maxDistance?: number, limit?: number) => findSimilarFiles(db, query, maxDistance, limit), matchFilesByGlob: (pattern: string) => matchFilesByGlob(db, pattern), @@ -566,46 +560,6 @@ export function createStore(dbPath?: string): Store { }; } -// ============================================================================= -// Legacy compatibility - will be removed -// ============================================================================= - -let _legacyDb: Database | null = null; -let _legacyDbPath: string | null = null; - -/** @deprecated Use createStore() instead */ -export function setCustomIndexName(name: string | null): void { - _legacyDbPath = name ? getDefaultDbPath(name) : null; - _legacyDb = null; // Reset so next getDb() creates new connection -} - -/** @deprecated Use createStore() instead */ -export function getDbPath(): string { - return _legacyDbPath || getDefaultDbPath(); -} - -/** @deprecated Use createStore() instead */ -export function getDb(): Database { - if (!_legacyDb) { - _legacyDb = new Database(getDbPath()); - initializeDatabase(_legacyDb); - } - return _legacyDb; -} - -/** @deprecated Use store.db.close() instead. Closes the legacy db and resets singleton. */ -export function closeDb(): void { - if (_legacyDb) { - _legacyDb.close(); - _legacyDb = null; - } -} - -/** @deprecated Use store.ensureVecTable() instead */ -export function ensureVecTable(db: Database, dimensions: number): void { - ensureVecTableInternal(db, dimensions); -} - // ============================================================================= // Core Document Type // ============================================================================= @@ -891,16 +845,6 @@ export function cleanupOrphanedVectors(db: Database): number { return countResult.c; } -/** - * Remove duplicate collections, keeping the oldest one per (pwd, glob_pattern). - * NOTE: This function is deprecated since collections are now managed in YAML. - * Kept for backwards compatibility but returns 0. - */ -export function cleanupDuplicateCollections(db: Database): number { - // Collections are now managed in YAML, no cleanup needed - return 0; -} - /** * Run VACUUM to reclaim unused space in the database. * This operation rebuilds the database file to eliminate fragmentation. @@ -922,10 +866,10 @@ export async function hashContent(content: string): Promise { export function extractTitle(content: string, filename: string): string { const match = content.match(/^##?\s+(.+)$/m); if (match) { - const title = match[1].trim(); + const title = (match[1] ?? "").trim(); if (title === "📝 Notes" || title === "Notes") { const nextMatch = content.match(/^##\s+(.+)$/m); - if (nextMatch) return nextMatch[1].trim(); + if (nextMatch?.[1]) return nextMatch[1].trim(); } return title; } @@ -1023,7 +967,6 @@ export function getActiveDocumentPaths(db: Database, collectionName: string): st return rows.map(r => r.path); } -// Re-export from llm.ts for backwards compatibility export { formatQueryForEmbedding, formatDocForEmbedding }; export function chunkDocument(content: string, maxChars: number = CHUNK_SIZE_CHARS, overlapChars: number = CHUNK_OVERLAP_CHARS): { text: string; pos: number }[] { @@ -1093,7 +1036,8 @@ export function chunkDocument(content: string, maxChars: number = CHUNK_SIZE_CHA break; } charPos = endPos - overlapChars; - if (charPos <= chunks[chunks.length - 1].pos) { + const lastChunkPos = chunks.at(-1)!.pos; + if (charPos <= lastChunkPos) { // Prevent infinite loop - move forward at least a bit charPos = endPos; } @@ -1200,7 +1144,8 @@ export async function chunkDocumentByTokens( // Calculate overlap in characters based on token ratio const overlapChars = Math.floor(overlapTokens * (slice.length / sliceTokens)); charPos = estimatedEnd - overlapChars; - if (charPos <= chunks[chunks.length - 1].pos) { + const lastChunkPos = chunks.at(-1)!.pos; + if (charPos <= lastChunkPos) { charPos = estimatedEnd; // Prevent infinite loop } } @@ -1216,15 +1161,20 @@ function levenshtein(a: string, b: string): number { const m = a.length, n = b.length; if (m === 0) return n; if (n === 0) return m; - const dp: number[][] = Array.from({ length: m + 1 }, (_, i) => [i]); - for (let j = 1; j <= n; j++) dp[0][j] = j; + const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0)); + for (let i = 0; i <= m; i++) dp[i]![0] = i; + for (let j = 0; j <= n; j++) dp[0]![j] = j; for (let i = 1; i <= m; i++) { for (let j = 1; j <= n; j++) { const cost = a[i - 1] === b[j - 1] ? 0 : 1; - dp[i][j] = Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + cost); + dp[i]![j] = Math.min( + dp[i - 1]![j]! + 1, + dp[i]![j - 1]! + 1, + dp[i - 1]![j - 1]! + cost + ); } } - return dp[m][n]; + return dp[m]![n]!; } /** @@ -1341,7 +1291,8 @@ export function getContextForPath(db: Database, collectionName: string, path: st } /** - * Legacy function for backward compatibility - resolves filepath to collection+path first + * Get context for a file path (virtual or filesystem). + * Resolves the collection and relative path using the YAML collections config. */ export function getContextForFile(db: Database, filepath: string): string | null { // Handle undefined or null filepath @@ -1352,17 +1303,15 @@ export function getContextForFile(db: Database, filepath: string): string | null const config = collectionsLoadConfig(); // Parse virtual path format: qmd://collection/path - let collectionName: string; - let relativePath: string; + let collectionName: string | null = null; + let relativePath: string | null = null; - if (filepath.startsWith('qmd://')) { - // Virtual path: qmd://collection/path - const parts = filepath.slice(6).split('/'); // Remove 'qmd://' - collectionName = parts[0]; - relativePath = parts.slice(1).join('/'); + const parsedVirtual = filepath.startsWith('qmd://') ? parseVirtualPath(filepath) : null; + if (parsedVirtual) { + collectionName = parsedVirtual.collectionName; + relativePath = parsedVirtual.path; } else { // Filesystem path: find which collection this absolute path belongs to - let found = false; for (const coll of collections) { // Skip collections with missing paths if (!coll || !coll.path) continue; @@ -1373,12 +1322,11 @@ export function getContextForFile(db: Database, filepath: string): string | null relativePath = filepath.startsWith(coll.path + '/') ? filepath.slice(coll.path.length + 1) : ''; - found = true; break; } } - if (!found) return null; + if (!collectionName || relativePath === null) return null; } // Get the collection from config @@ -1655,7 +1603,8 @@ export function getTopLevelPathsWithoutContext(db: Database, collectionName: str for (const { path } of paths) { const parts = path.split('/').filter(Boolean); if (parts.length > 1) { - topLevelDirs.add(parts[0]); + const dir = parts[0]; + if (dir) topLevelDirs.add(dir); } } @@ -1708,7 +1657,7 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle d.title, content.doc as body, d.hash, - bm25(documents_fts, 10.0, 1.0) as score + bm25(documents_fts, 10.0, 1.0) as bm25_score FROM documents_fts f JOIN documents d ON d.id = f.rowid JOIN content ON content.hash = d.hash @@ -1724,14 +1673,16 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle params.push(String(collectionId)); } - sql += ` ORDER BY score LIMIT ?`; + // bm25 lower is better; sort ascending. + sql += ` ORDER BY bm25_score ASC LIMIT ?`; params.push(limit); - const rows = db.prepare(sql).all(...params) as { filepath: string; display_path: string; title: string; body: string; hash: string; score: number }[]; - - const maxScore = rows.length > 0 ? Math.max(...rows.map(r => Math.abs(r.score))) : 1; + const rows = db.prepare(sql).all(...params) as { filepath: string; display_path: string; title: string; body: string; hash: string; bm25_score: number }[]; return rows.map(row => { const collectionName = row.filepath.split('//')[1]?.split('/')[0] || ""; + // Convert bm25 (lower is better) into a stable (0..1] score where higher is better. + // Avoid per-query normalization so "strong signal" heuristics can work. + const score = 1 / (1 + Math.max(0, row.bm25_score)); return { filepath: row.filepath, displayPath: row.display_path, @@ -1743,7 +1694,7 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle bodyLength: row.body.length, body: row.body, context: getContextForFile(db, row.filepath), - score: Math.abs(row.score) / maxScore, + score, source: "fts" as const, }; }); @@ -1953,10 +1904,12 @@ export function reciprocalRankFusion( for (let listIdx = 0; listIdx < resultLists.length; listIdx++) { const list = resultLists[listIdx]; + if (!list) continue; const weight = weights[listIdx] ?? 1.0; for (let rank = 0; rank < list.length; rank++) { const result = list[rank]; + if (!result) continue; const rrfContribution = weight / (k + rank + 1); const existing = scores.get(result.file); @@ -1992,6 +1945,7 @@ export function reciprocalRankFusion( // ============================================================================= type DbDocRow = { + virtual_path: string; display_path: string; title: string; hash: string; @@ -2122,7 +2076,7 @@ export function findDocument(db: Database, filename: string, options: { includeB * Optionally slice by line range */ export function getDocumentBody(db: Database, doc: DocumentResult | { filepath: string }, fromLine?: number, maxLines?: number): string | null { - const filepath = 'filepath' in doc ? doc.filepath : doc.filepath; + const filepath = doc.filepath; // Try to resolve document by filepath (absolute or virtual) let row: { body: string } | null = null; @@ -2167,34 +2121,6 @@ export function getDocumentBody(db: Database, doc: DocumentResult | { filepath: return body; } -/** - * Legacy function for backwards compatibility - * Combines findDocument + getDocumentBody with line slicing - */ -export function getDocument(db: Database, filename: string, fromLine?: number, maxLines?: number): (DocumentResult & { body: string }) | DocumentNotFound { - // Parse :line suffix - let parsedFromLine = fromLine; - let filepath = filename; - const colonMatch = filepath.match(/:(\d+)$/); - if (colonMatch && !parsedFromLine) { - parsedFromLine = parseInt(colonMatch[1], 10); - filepath = filepath.slice(0, -colonMatch[0].length); - } - - const result = findDocument(db, filepath, { includeBody: true }); - if ("error" in result) return result; - - let body = result.body || ""; - if (parsedFromLine !== undefined || maxLines !== undefined) { - const lines = body.split('\n'); - const start = (parsedFromLine || 1) - 1; - const end = maxLines !== undefined ? start + maxLines : lines.length; - body = lines.slice(start, end).join('\n'); - } - - return { ...result, body }; -} - /** * Find multiple documents by glob pattern or comma-separated list * Returns documents without body by default (use getDocumentBody to load) @@ -2305,65 +2231,6 @@ export function findDocuments( return { docs: results, errors }; } -/** - * Legacy function for backwards compatibility - */ -export function getMultipleDocuments(db: Database, pattern: string, maxLines?: number, maxBytes: number = DEFAULT_MULTI_GET_MAX_BYTES): { files: MultiGetFile[]; errors: string[] } { - const { docs, errors } = findDocuments(db, pattern, { includeBody: true, maxBytes }); - - const files: MultiGetFile[] = docs.map(result => { - if (result.skipped) { - return { - filepath: result.doc.filepath, - displayPath: result.doc.displayPath, - title: "", - body: "", - context: null, - skipped: true as const, - skipReason: result.skipReason, - }; - } - - let body = result.doc.body || ""; - if (maxLines !== undefined) { - const lines = body.split('\n'); - body = lines.slice(0, maxLines).join('\n'); - if (lines.length > maxLines) { - body += `\n\n[... truncated ${lines.length - maxLines} more lines]`; - } - } - - return { - filepath: result.doc.filepath, - displayPath: result.doc.displayPath, - title: result.doc.title, - body, - context: result.doc.context, - skipped: false as const, - }; - }); - - return { files, errors }; -} - -// Keep the old MultiGetFile type for backwards compatibility -export type MultiGetFile = { - filepath: string; - displayPath: string; - title: string; - body: string; - context: string | null; - skipped: false; -} | { - filepath: string; - displayPath: string; - title: string; - body: string; - context: string | null; - skipped: true; - skipReason: string; -}; - // ============================================================================= // Status // ============================================================================= @@ -2441,7 +2308,7 @@ export function extractSnippet(body: string, query: string, maxLen = 500, chunkP let bestLine = 0, bestScore = -1; for (let i = 0; i < lines.length; i++) { - const lineLower = lines[i].toLowerCase(); + const lineLower = (lines[i] ?? "").toLowerCase(); let score = 0; for (const term of queryTerms) { if (lineLower.includes(term)) score++; @@ -2456,6 +2323,13 @@ export function extractSnippet(body: string, query: string, maxLen = 500, chunkP const end = Math.min(lines.length, bestLine + 3); const snippetLines = lines.slice(start, end); let snippetText = snippetLines.join('\n'); + + // If we focused on a chunk window and it produced an empty/whitespace-only snippet, + // fall back to a full-document snippet so we always show something useful. + if (chunkPos && chunkPos > 0 && snippetText.trim().length === 0) { + return extractSnippet(body, query, maxLen, undefined); + } + if (snippetText.length > maxLen) snippetText = snippetText.substring(0, maxLen - 3) + "..."; const absoluteStart = lineOffset + start + 1; // 1-indexed