This commit is contained in:
Tobi Lutke 2025-12-12 15:02:00 -05:00
parent 3b22f88c9f
commit e67fb83a17
No known key found for this signature in database
4 changed files with 478 additions and 37 deletions

3
.beads/issues.jsonl Normal file
View File

@ -0,0 +1,3 @@
{"id":"qmd-ama","title":"Refactor database system","description":"All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection, path, hash,\n┃ created_at, updated_at. (collection,path)\n┃\n┃\n\n┃ All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection_id, path, hash,\n┃ created_at, updated_at. (collection,path) is unique. There is also collection which stores PWD\n┃ + glob pattern, name (\\w+). Every document is treated as path qmd://collection.name/","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-10T10:57:35.497489-05:00","updated_at":"2025-12-10T10:57:35.497489-05:00"}
{"id":"qmd-deh","title":"Refactor database introduce qmd collection *","description":"","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-10T10:56:04.516137-05:00","updated_at":"2025-12-10T10:56:04.516137-05:00"}
{"id":"qmd-p1h","title":"Create collection add|remove","description":"","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-10T10:57:00.717864-05:00","updated_at":"2025-12-10T10:57:00.717864-05:00"}

417
cli.test.ts Normal file
View File

@ -0,0 +1,417 @@
/**
* CLI Integration Tests
*
* Tests all qmd CLI commands using a temporary test database via INDEX_PATH.
* These tests spawn actual qmd processes to verify end-to-end functionality.
*/
import { describe, test, expect, beforeAll, afterAll, beforeEach } from "bun:test";
import { mkdtemp, rm, writeFile, mkdir } from "fs/promises";
import { tmpdir } from "os";
import { join } from "path";
// Test fixtures directory and database path
let testDir: string;
let testDbPath: string;
let fixturesDir: string;
let testCounter = 0; // Unique counter for each test run
// Get the directory where this test file lives (same as qmd.ts)
const qmdDir = import.meta.dir;
const qmdScript = join(qmdDir, "qmd.ts");
// Helper to run qmd command with test database
async function runQmd(
args: string[],
options: { cwd?: string; env?: Record<string, string>; dbPath?: string } = {}
): Promise<{ stdout: string; stderr: string; exitCode: number }> {
const workingDir = options.cwd || fixturesDir;
const dbPath = options.dbPath || testDbPath;
const proc = Bun.spawn(["bun", qmdScript, ...args], {
cwd: workingDir,
env: {
...process.env,
INDEX_PATH: dbPath,
PWD: workingDir, // Must explicitly set PWD since getPwd() checks this
...options.env,
},
stdout: "pipe",
stderr: "pipe",
});
const stdout = await new Response(proc.stdout).text();
const stderr = await new Response(proc.stderr).text();
const exitCode = await proc.exited;
return { stdout, stderr, exitCode };
}
// Get a fresh database path for isolated tests
function getFreshDbPath(): string {
testCounter++;
return join(testDir, `test-${testCounter}.sqlite`);
}
// Setup test fixtures
beforeAll(async () => {
// Create temp directory structure
testDir = await mkdtemp(join(tmpdir(), "qmd-test-"));
testDbPath = join(testDir, "test.sqlite");
fixturesDir = join(testDir, "fixtures");
await mkdir(fixturesDir, { recursive: true });
await mkdir(join(fixturesDir, "notes"), { recursive: true });
await mkdir(join(fixturesDir, "docs"), { recursive: true });
// Create test markdown files
await writeFile(
join(fixturesDir, "README.md"),
`# Test Project
This is a test project for QMD CLI testing.
## Features
- Full-text search with BM25
- Vector similarity search
- Hybrid search with reranking
`
);
await writeFile(
join(fixturesDir, "notes", "meeting.md"),
`# Team Meeting Notes
Date: 2024-01-15
## Attendees
- Alice
- Bob
- Charlie
## Discussion Topics
- Project timeline review
- Resource allocation
- Technical debt prioritization
## Action Items
1. Alice to update documentation
2. Bob to fix authentication bug
3. Charlie to review pull requests
`
);
await writeFile(
join(fixturesDir, "notes", "ideas.md"),
`# Product Ideas
## Feature Requests
- Dark mode support
- Keyboard shortcuts
- Export to PDF
## Technical Improvements
- Improve search performance
- Add caching layer
- Optimize database queries
`
);
await writeFile(
join(fixturesDir, "docs", "api.md"),
`# API Documentation
## Endpoints
### GET /search
Search for documents.
Parameters:
- q: Search query (required)
- limit: Max results (default: 10)
### GET /document/:id
Retrieve a specific document.
### POST /index
Index new documents.
`
);
});
// Cleanup after all tests
afterAll(async () => {
if (testDir) {
await rm(testDir, { recursive: true, force: true });
}
});
describe("CLI Help", () => {
test("shows help with --help flag", async () => {
const { stdout, exitCode } = await runQmd(["--help"]);
expect(exitCode).toBe(0);
expect(stdout).toContain("Usage:");
expect(stdout).toContain("qmd add");
expect(stdout).toContain("qmd search");
});
test("shows help with no arguments", async () => {
const { stdout, exitCode } = await runQmd([]);
expect(exitCode).toBe(1);
expect(stdout).toContain("Usage:");
});
});
describe("CLI Add Command", () => {
test("adds files from current directory", async () => {
const { stdout, exitCode } = await runQmd(["add", "."]);
expect(exitCode).toBe(0);
expect(stdout).toContain("Collection:");
expect(stdout).toContain("Indexed:");
});
test("adds files with custom glob pattern", async () => {
const { stdout, exitCode } = await runQmd(["add", "notes/*.md"]);
expect(exitCode).toBe(0);
expect(stdout).toContain("Collection:");
// Should find meeting.md and ideas.md in notes/
expect(stdout).toContain("notes/*.md");
});
test("adds files with --drop flag recreates collection", async () => {
// First add
await runQmd(["add", "."]);
// Then drop and re-add
const { stdout, exitCode } = await runQmd(["add", "--drop", "."]);
expect(exitCode).toBe(0);
expect(stdout).toContain("Dropped collection:");
});
});
describe("CLI Status Command", () => {
beforeEach(async () => {
// Ensure we have indexed files
await runQmd(["add", "."]);
});
test("shows index status", async () => {
const { stdout, exitCode } = await runQmd(["status"]);
expect(exitCode).toBe(0);
// Should show collection info
expect(stdout).toContain("Collection");
});
});
describe("CLI Search Command", () => {
beforeEach(async () => {
// Ensure we have indexed files
await runQmd(["add", "."]);
});
test("searches for documents with BM25", async () => {
const { stdout, exitCode } = await runQmd(["search", "meeting"]);
expect(exitCode).toBe(0);
// Should find meeting.md
expect(stdout.toLowerCase()).toContain("meeting");
});
test("searches with limit option", async () => {
const { stdout, exitCode } = await runQmd(["search", "-n", "1", "test"]);
expect(exitCode).toBe(0);
});
test("searches with all results option", async () => {
const { stdout, exitCode } = await runQmd(["search", "--all", "the"]);
expect(exitCode).toBe(0);
});
test("returns no results message for non-matching query", async () => {
const { stdout, exitCode } = await runQmd(["search", "xyznonexistent123"]);
expect(exitCode).toBe(0);
expect(stdout).toContain("No results");
});
test("requires query argument", async () => {
const { stdout, stderr, exitCode } = await runQmd(["search"]);
expect(exitCode).toBe(1);
// Error message goes to stderr
expect(stderr).toContain("Usage:");
});
});
describe("CLI Get Command", () => {
beforeEach(async () => {
// Ensure we have indexed files
await runQmd(["add", "."]);
});
test("retrieves document content by path", async () => {
const { stdout, exitCode } = await runQmd(["get", "README.md"]);
expect(exitCode).toBe(0);
expect(stdout).toContain("Test Project");
});
test("retrieves document from subdirectory", async () => {
const { stdout, exitCode } = await runQmd(["get", "notes/meeting.md"]);
expect(exitCode).toBe(0);
expect(stdout).toContain("Team Meeting");
});
test("handles non-existent file", async () => {
const { stdout, exitCode } = await runQmd(["get", "nonexistent.md"]);
// Should indicate file not found
expect(exitCode).toBe(1);
});
});
describe("CLI Multi-Get Command", () => {
beforeEach(async () => {
// Ensure we have indexed files
await runQmd(["add", "."]);
});
test("retrieves multiple documents by pattern", async () => {
const { stdout, exitCode } = await runQmd(["multi-get", "notes/*.md"]);
expect(exitCode).toBe(0);
// Should contain content from both notes files
expect(stdout).toContain("Meeting");
expect(stdout).toContain("Ideas");
});
test("retrieves documents by comma-separated paths", async () => {
const { stdout, exitCode } = await runQmd([
"multi-get",
"README.md,notes/meeting.md",
]);
expect(exitCode).toBe(0);
expect(stdout).toContain("Test Project");
expect(stdout).toContain("Team Meeting");
});
});
describe("CLI Update Command", () => {
let localDbPath: string;
beforeEach(async () => {
// Use a fresh database for this test suite
localDbPath = getFreshDbPath();
// Ensure we have indexed files
await runQmd(["add", "."], { dbPath: localDbPath });
});
test("updates all collections", async () => {
const { stdout, exitCode } = await runQmd(["update"], { dbPath: localDbPath });
expect(exitCode).toBe(0);
expect(stdout).toContain("Updating");
});
});
describe("CLI Add-Context Command", () => {
beforeEach(async () => {
// Ensure we have indexed files
await runQmd(["add", "."]);
});
test("adds context to a path", async () => {
const { stdout, exitCode } = await runQmd([
"add-context",
"notes",
"Personal notes and meeting logs",
]);
expect(exitCode).toBe(0);
});
test("requires path and text arguments", async () => {
const { stderr, exitCode } = await runQmd(["add-context"]);
expect(exitCode).toBe(1);
// Error message goes to stderr
expect(stderr).toContain("Usage:");
});
});
describe("CLI Cleanup Command", () => {
beforeEach(async () => {
// Ensure we have indexed files
await runQmd(["add", "."]);
});
test("cleans up orphaned entries", async () => {
const { stdout, exitCode } = await runQmd(["cleanup"]);
expect(exitCode).toBe(0);
});
});
describe("CLI Error Handling", () => {
test("handles unknown command", async () => {
const { stderr, exitCode } = await runQmd(["unknowncommand"]);
expect(exitCode).toBe(1);
// Should indicate unknown command
expect(stderr).toContain("Unknown command");
});
test("uses INDEX_PATH environment variable", async () => {
// Verify the test DB path is being used by creating a separate index
const customDbPath = join(testDir, "custom.sqlite");
const { exitCode } = await runQmd(["add", "."], {
env: { INDEX_PATH: customDbPath },
});
expect(exitCode).toBe(0);
// The custom database should exist
const file = Bun.file(customDbPath);
expect(await file.exists()).toBe(true);
});
});
describe("CLI Output Formats", () => {
beforeEach(async () => {
await runQmd(["add", "."]);
});
test("search with --json flag outputs JSON", async () => {
const { stdout, exitCode } = await runQmd(["search", "--json", "test"]);
expect(exitCode).toBe(0);
// Should be valid JSON
const parsed = JSON.parse(stdout);
expect(Array.isArray(parsed)).toBe(true);
});
test("search with --files flag outputs file paths", async () => {
const { stdout, exitCode } = await runQmd(["search", "--files", "meeting"]);
expect(exitCode).toBe(0);
expect(stdout).toContain(".md");
});
test("search output includes snippets by default", async () => {
const { stdout, exitCode } = await runQmd(["search", "API"]);
expect(exitCode).toBe(0);
// If results found, should have snippet content
if (!stdout.includes("No results")) {
expect(stdout.toLowerCase()).toContain("api");
}
});
});
describe("CLI Search with Collection Filter", () => {
let localDbPath: string;
beforeEach(async () => {
// Use a fresh database for this test suite
localDbPath = getFreshDbPath();
// Create multiple collections
await runQmd(["add", "notes/*.md"], { dbPath: localDbPath });
await runQmd(["add", "docs/*.md"], { dbPath: localDbPath });
});
test("filters search by collection name", async () => {
const { stdout, exitCode } = await runQmd([
"search",
"-c",
"notes",
"meeting",
], { dbPath: localDbPath });
expect(exitCode).toBe(0);
// Should find results from notes collection
expect(stdout.toLowerCase()).toContain("meeting");
});
});

75
qmd.ts
View File

@ -5,6 +5,7 @@ import { parseArgs } from "util";
import * as sqliteVec from "sqlite-vec";
import {
getDb,
closeDb,
getDbPath,
getPwd,
getRealPath,
@ -116,7 +117,7 @@ function checkIndexHealth(db: Database): void {
// Check if most recent document update is older than 2 weeks
if (daysStale !== null && daysStale >= 14) {
process.stderr.write(`${c.dim}Tip: Index last updated ${daysStale} days ago. Run 'qmd update-all' to refresh.${c.reset}\n`);
process.stderr.write(`${c.dim}Tip: Index last updated ${daysStale} days ago. Run 'qmd update' to refresh.${c.reset}\n`);
}
}
@ -447,7 +448,7 @@ function showStatus(): void {
console.log(`\n${c.dim}No collections. Run 'qmd add .' to index markdown files.${c.reset}`);
}
db.close();
closeDb();
}
// Update display_paths for all documents that have empty display_path
@ -481,7 +482,7 @@ function updateDisplayPaths(db: Database): number {
return updated;
}
async function updateAllCollections(): Promise<void> {
async function updateCollections(): Promise<void> {
const db = getDb();
cleanupDuplicateCollections(db);
@ -492,7 +493,7 @@ async function updateAllCollections(): Promise<void> {
if (collections.length === 0) {
console.log(`${c.dim}No collections found. Run 'qmd add .' to index markdown files.${c.reset}`);
db.close();
closeDb();
return;
}
@ -502,8 +503,7 @@ async function updateAllCollections(): Promise<void> {
console.log(`${c.green}${c.reset} Updated ${pathsUpdated} display paths`);
}
db.close();
// Don't close db here - indexFiles will reuse it and close at the end
console.log(`${c.bold}Updating ${collections.length} collection(s)...${c.reset}\n`);
for (let i = 0; i < collections.length; i++) {
@ -544,7 +544,7 @@ async function addContext(pathArg: string, contextText: string): Promise<void> {
console.log(`${c.green}${c.reset} Added context for: ${shortPath(pathPrefix)}`);
console.log(`${c.dim}Context: ${contextText}${c.reset}`);
db.close();
closeDb();
}
function getDocument(filename: string, fromLine?: number, maxLines?: number): void {
@ -591,7 +591,7 @@ function getDocument(filename: string, fromLine?: number, maxLines?: number): vo
console.error(` ${s}`);
}
}
db.close();
closeDb();
process.exit(1);
}
@ -613,7 +613,7 @@ function getDocument(filename: string, fromLine?: number, maxLines?: number): vo
console.log(`Folder Context: ${context}\n---\n`);
}
console.log(output);
db.close();
closeDb();
}
// Multi-get: fetch multiple documents by glob pattern or comma-separated list
@ -652,7 +652,7 @@ function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT
files = matchFilesByGlob(db, pattern);
if (files.length === 0) {
console.error(`No files matched pattern: ${pattern}`);
db.close();
closeDb();
process.exit(1);
}
}
@ -701,7 +701,7 @@ function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT
});
}
db.close();
closeDb();
// Output based on format
if (format === "json") {
@ -800,8 +800,7 @@ async function dropCollection(globPattern: string): Promise<void> {
const collection = db.prepare(`SELECT id FROM collections WHERE pwd = ? AND glob_pattern = ?`).get(pwd, globPattern) as { id: number } | null;
if (!collection) {
console.log(`No collection found for ${pwd} with pattern ${globPattern}`);
db.close();
// No collection to drop - this is fine, we'll create one during indexing
return;
}
@ -814,8 +813,7 @@ async function dropCollection(globPattern: string): Promise<void> {
console.log(`Dropped collection: ${pwd} (${globPattern})`);
console.log(`Removed ${deleted.changes} documents`);
console.log(`(Vectors kept for potential reuse)`);
db.close();
// Don't close db - indexFiles will use it and close at the end
}
async function indexFiles(globPattern: string = DEFAULT_GLOB): Promise<void> {
@ -851,7 +849,7 @@ async function indexFiles(globPattern: string = DEFAULT_GLOB): Promise<void> {
if (total === 0) {
progress.clear();
console.log("No files found matching pattern.");
db.close();
closeDb();
return;
}
@ -953,7 +951,7 @@ async function indexFiles(globPattern: string = DEFAULT_GLOB): Promise<void> {
console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
}
db.close();
closeDb();
}
function renderProgressBar(percent: number, width: number = 30): string {
@ -986,7 +984,7 @@ async function vectorIndex(model: string = DEFAULT_EMBED_MODEL, force: boolean =
if (hashesToEmbed.length === 0) {
console.log(`${c.green}✓ All content hashes already have embeddings.${c.reset}`);
db.close();
closeDb();
return;
}
@ -1021,7 +1019,7 @@ async function vectorIndex(model: string = DEFAULT_EMBED_MODEL, force: boolean =
if (allChunks.length === 0) {
console.log(`${c.green}✓ No non-empty documents to embed.${c.reset}`);
db.close();
closeDb();
return;
}
@ -1099,7 +1097,7 @@ async function vectorIndex(model: string = DEFAULT_EMBED_MODEL, force: boolean =
if (errors > 0) {
console.log(`${c.yellow}${errors} chunks failed${c.reset}`);
}
db.close();
closeDb();
}
// Sanitize a term for FTS5: remove punctuation except apostrophes
@ -1144,9 +1142,15 @@ function normalizeBM25(score: number): number {
return 1 / (1 + Math.exp(-(absScore - 5) / 3));
}
// Get collection ID by name (matches pwd suffix)
// Get collection ID by name (matches pwd or glob_pattern suffix)
function getCollectionIdByName(db: Database, name: string): number | null {
const result = db.prepare(`SELECT id FROM collections WHERE pwd LIKE ? ORDER BY LENGTH(pwd) DESC LIMIT 1`).get(`%${name}`) as { id: number } | null;
// Search both pwd and glob_pattern columns for the name
const result = db.prepare(`
SELECT id FROM collections
WHERE pwd LIKE ? OR glob_pattern LIKE ?
ORDER BY LENGTH(pwd) DESC
LIMIT 1
`).get(`%${name}%`, `%${name}%`) as { id: number } | null;
return result?.id || null;
}
@ -1467,7 +1471,7 @@ function search(query: string, opts: OutputOptions): void {
collectionId = getCollectionIdByName(db, opts.collection) ?? undefined;
if (collectionId === undefined) {
console.error(`Collection not found: ${opts.collection}`);
db.close();
closeDb();
process.exit(1);
}
}
@ -1482,7 +1486,7 @@ function search(query: string, opts: OutputOptions): void {
context: getContextForFile(db, r.file),
}));
db.close();
closeDb();
if (resultsWithContext.length === 0) {
console.log("No results found.");
@ -1500,7 +1504,7 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
collectionId = getCollectionIdByName(db, opts.collection) ?? undefined;
if (collectionId === undefined) {
console.error(`Collection not found: ${opts.collection}`);
db.close();
closeDb();
process.exit(1);
}
}
@ -1508,7 +1512,7 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
const tableExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
if (!tableExists) {
console.error("Vector index not found. Run 'qmd embed' first to create embeddings.");
db.close();
closeDb();
return;
}
@ -1540,7 +1544,7 @@ async function vectorSearch(query: string, opts: OutputOptions, model: string =
.slice(0, opts.limit)
.map(r => ({ ...r, context: getContextForFile(db, r.file) }));
db.close();
closeDb();
if (results.length === 0) {
console.log("No results found.");
@ -1625,7 +1629,7 @@ async function querySearch(query: string, opts: OutputOptions, embedModel: strin
collectionId = getCollectionIdByName(db, opts.collection) ?? undefined;
if (collectionId === undefined) {
console.error(`Collection not found: ${opts.collection}`);
db.close();
closeDb();
process.exit(1);
}
}
@ -1665,7 +1669,7 @@ async function querySearch(query: string, opts: OutputOptions, embedModel: strin
if (candidates.length === 0) {
console.log("No results found.");
db.close();
closeDb();
return;
}
@ -1709,7 +1713,7 @@ async function querySearch(query: string, opts: OutputOptions, embedModel: strin
};
}).sort((a, b) => b.score - a.score);
db.close();
closeDb();
outputResults(finalResults, query, opts);
}
@ -1788,7 +1792,7 @@ function showHelp(): void {
console.log(" qmd get <file>[:line] [-l N] [--from N] - Get document (optionally from line, max N lines)");
console.log(" qmd multi-get <pattern> [-l N] [--max-bytes N] - Get multiple docs by glob or comma-separated list");
console.log(" qmd status - Show index status and collections");
console.log(" qmd update-all - Re-index all collections");
console.log(" qmd update - Re-index all collections");
console.log(" qmd embed [-f] - Create vector embeddings (chunks ~6KB each)");
console.log(" qmd cleanup - Remove cache and orphaned data, vacuum DB");
console.log(" qmd search <query> - Full-text search (BM25)");
@ -1842,9 +1846,8 @@ switch (cli.command) {
const globPattern = (!globArg || globArg === ".") ? DEFAULT_GLOB : globArg;
if (cli.values.drop) {
await dropCollection(globPattern);
} else {
await indexFiles(globPattern);
}
await indexFiles(globPattern);
break;
}
@ -1896,8 +1899,8 @@ switch (cli.command) {
showStatus();
break;
case "update-all":
await updateAllCollections();
case "update":
await updateCollections();
break;
case "embed":
@ -1984,7 +1987,7 @@ switch (cli.command) {
db.exec(`VACUUM`);
console.log(`${c.green}${c.reset} Database vacuumed`);
db.close();
closeDb();
break;
}

View File

@ -66,6 +66,10 @@ export function resolve(...paths: string[]): string {
}
export function getDefaultDbPath(indexName: string = "index"): string {
// Allow override via INDEX_PATH for testing
if (Bun.env.INDEX_PATH) {
return Bun.env.INDEX_PATH;
}
const cacheDir = Bun.env.XDG_CACHE_HOME || resolve(homedir(), ".cache");
const qmdCacheDir = resolve(cacheDir, "qmd");
try { Bun.spawnSync(["mkdir", "-p", qmdCacheDir]); } catch {}
@ -357,6 +361,14 @@ export function getDb(): Database {
return _legacyDb;
}
/** @deprecated Use store.db.close() instead. Closes the legacy db and resets singleton. */
export function closeDb(): void {
if (_legacyDb) {
_legacyDb.close();
_legacyDb = null;
}
}
/** @deprecated Use store.ensureVecTable() instead */
export function ensureVecTable(db: Database, dimensions: number): void {
ensureVecTableInternal(db, dimensions);
@ -642,7 +654,13 @@ export function getContextForFile(db: Database, filepath: string): string | null
}
export function getCollectionIdByName(db: Database, name: string): number | null {
const result = db.prepare(`SELECT id FROM collections WHERE pwd LIKE ? ORDER BY LENGTH(pwd) DESC LIMIT 1`).get(`%${name}`) as { id: number } | null;
// Search both pwd and glob_pattern columns for the name
const result = db.prepare(`
SELECT id FROM collections
WHERE pwd LIKE ? OR glob_pattern LIKE ?
ORDER BY LENGTH(pwd) DESC
LIMIT 1
`).get(`%${name}%`, `%${name}%`) as { id: number } | null;
return result?.id || null;
}