Migrate documents table to use collection names instead of IDs

Schema changes:
- documents.collection_id (INTEGER FK) → documents.collection (TEXT)
- Update UNIQUE constraint to (collection, path)
- Update indices to use collection name
- Update FTS triggers to compute filepath from collection || '/' || path

Code changes in store.ts:
- Change all function parameters from collectionId: number to collectionName: string
- Update all SQL queries to use d.collection instead of d.collection_id
- Remove unnecessary JOINs where collection name is already available
- Update DocumentResult type: collectionId → collectionName
- Update renameCollection() to also update documents.collection

Successfully migrated 2309 documents across 6 collections.

This prepares for YAML-based collection configuration where collections
table will be removed and collection names will be the primary identifier.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Tobi Lutke 2025-12-13 10:08:24 -05:00
parent 8f49b51db2
commit 5f68faa853
No known key found for this signature in database
3 changed files with 360 additions and 94 deletions

View File

@ -7,7 +7,7 @@
{"id":"qmd-4u4","title":"Move embedding/vector DB operations to store.ts","description":"Move vector indexing DB operations from vectorIndex() to store.ts. Create methods like getHashesForEmbedding(), insertEmbedding(), clearEmbeddings(), etc.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-12T16:36:21.683434-05:00","updated_at":"2025-12-12T16:42:40.42653-05:00","closed_at":"2025-12-12T16:42:40.42653-05:00","dependencies":[{"issue_id":"qmd-4u4","depends_on_id":"qmd-29c","type":"parent-child","created_at":"2025-12-12T16:37:02.944591-05:00","created_by":"daemon"}]} {"id":"qmd-4u4","title":"Move embedding/vector DB operations to store.ts","description":"Move vector indexing DB operations from vectorIndex() to store.ts. Create methods like getHashesForEmbedding(), insertEmbedding(), clearEmbeddings(), etc.","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-12T16:36:21.683434-05:00","updated_at":"2025-12-12T16:42:40.42653-05:00","closed_at":"2025-12-12T16:42:40.42653-05:00","dependencies":[{"issue_id":"qmd-4u4","depends_on_id":"qmd-29c","type":"parent-child","created_at":"2025-12-12T16:37:02.944591-05:00","created_by":"daemon"}]}
{"id":"qmd-6s5","title":"Export current database to index.yml","description":"Write a script to export current collections and path_contexts from SQLite to ~/.config/qmd/index.yml format. Include all collection metadata and contexts.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-13T09:54:52.707844-05:00","updated_at":"2025-12-13T09:57:36.650437-05:00","closed_at":"2025-12-13T09:57:36.650437-05:00","dependencies":[{"issue_id":"qmd-6s5","depends_on_id":"qmd-3z9","type":"blocks","created_at":"2025-12-13T09:55:07.606834-05:00","created_by":"daemon"}]} {"id":"qmd-6s5","title":"Export current database to index.yml","description":"Write a script to export current collections and path_contexts from SQLite to ~/.config/qmd/index.yml format. Include all collection metadata and contexts.","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-13T09:54:52.707844-05:00","updated_at":"2025-12-13T09:57:36.650437-05:00","closed_at":"2025-12-13T09:57:36.650437-05:00","dependencies":[{"issue_id":"qmd-6s5","depends_on_id":"qmd-3z9","type":"blocks","created_at":"2025-12-13T09:55:07.606834-05:00","created_by":"daemon"}]}
{"id":"qmd-7ss","title":"remove all the symlinks and stuff in the git repo, clean up the root directory","description":"","status":"closed","priority":4,"issue_type":"task","created_at":"2025-12-12T16:40:00.744982-05:00","updated_at":"2025-12-12T17:11:18.034215-05:00","closed_at":"2025-12-12T17:11:18.034215-05:00"} {"id":"qmd-7ss","title":"remove all the symlinks and stuff in the git repo, clean up the root directory","description":"","status":"closed","priority":4,"issue_type":"task","created_at":"2025-12-12T16:40:00.744982-05:00","updated_at":"2025-12-12T17:11:18.034215-05:00","closed_at":"2025-12-12T17:11:18.034215-05:00"}
{"id":"qmd-8eu","title":"Update documents table schema for collection names","description":"Change documents.collection_id (integer FK) to documents.collection (text). Update all queries and indices. Keep backwards compatibility during transition.","design":"Schema change:\n- Add `collection TEXT` column\n- Migrate data: UPDATE documents SET collection = (SELECT name FROM collections WHERE id = collection_id)\n- Drop collection_id column\n- Update FTS5 trigger\n- Update all queries in store.ts","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-13T09:54:52.830305-05:00","updated_at":"2025-12-13T09:54:52.830305-05:00","dependencies":[{"issue_id":"qmd-8eu","depends_on_id":"qmd-6s5","type":"blocks","created_at":"2025-12-13T09:55:07.662048-05:00","created_by":"daemon"}]} {"id":"qmd-8eu","title":"Update documents table schema for collection names","description":"Change documents.collection_id (integer FK) to documents.collection (text). Update all queries and indices. Keep backwards compatibility during transition.","design":"Schema change:\n- Add `collection TEXT` column\n- Migrate data: UPDATE documents SET collection = (SELECT name FROM collections WHERE id = collection_id)\n- Drop collection_id column\n- Update FTS5 trigger\n- Update all queries in store.ts","status":"in_progress","priority":1,"issue_type":"task","created_at":"2025-12-13T09:54:52.830305-05:00","updated_at":"2025-12-13T09:57:58.370734-05:00","dependencies":[{"issue_id":"qmd-8eu","depends_on_id":"qmd-6s5","type":"blocks","created_at":"2025-12-13T09:55:07.662048-05:00","created_by":"daemon"}]}
{"id":"qmd-9ua","title":"Update all qmd commands for YAML-based collections","description":"Update qmd.ts commands: collection add/list/remove/rename, status, update, ls. All should use collections.ts instead of store.ts collection functions.","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-13T09:54:53.14644-05:00","updated_at":"2025-12-13T09:54:53.14644-05:00","dependencies":[{"issue_id":"qmd-9ua","depends_on_id":"qmd-u84","type":"blocks","created_at":"2025-12-13T09:55:07.893268-05:00","created_by":"daemon"},{"issue_id":"qmd-9ua","depends_on_id":"qmd-oxy","type":"blocks","created_at":"2025-12-13T09:55:07.942221-05:00","created_by":"daemon"}]} {"id":"qmd-9ua","title":"Update all qmd commands for YAML-based collections","description":"Update qmd.ts commands: collection add/list/remove/rename, status, update, ls. All should use collections.ts instead of store.ts collection functions.","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-13T09:54:53.14644-05:00","updated_at":"2025-12-13T09:54:53.14644-05:00","dependencies":[{"issue_id":"qmd-9ua","depends_on_id":"qmd-u84","type":"blocks","created_at":"2025-12-13T09:55:07.893268-05:00","created_by":"daemon"},{"issue_id":"qmd-9ua","depends_on_id":"qmd-oxy","type":"blocks","created_at":"2025-12-13T09:55:07.942221-05:00","created_by":"daemon"}]}
{"id":"qmd-afe","title":"implement qmd collection rename, which changes the global path prefix for the collection","description":"","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-12T15:55:54.779325-05:00","updated_at":"2025-12-12T16:29:24.153196-05:00","closed_at":"2025-12-12T16:29:24.153196-05:00"} {"id":"qmd-afe","title":"implement qmd collection rename, which changes the global path prefix for the collection","description":"","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-12T15:55:54.779325-05:00","updated_at":"2025-12-12T16:29:24.153196-05:00","closed_at":"2025-12-12T16:29:24.153196-05:00"}
{"id":"qmd-ama","title":"Refactor database system","description":"All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection, path, hash,\n┃ created_at, updated_at. (collection,path)\n┃\n┃\n\n┃ All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection_id, path, hash,\n┃ created_at, updated_at. (collection,path) is unique. There is also collection which stores PWD\n┃ + glob pattern, name (\\w+). Every document is treated as path qmd://collection.name/","notes":"## Completed\n- ✅ Implemented content-addressable storage (content table with hash→doc mapping)\n- ✅ Refactored documents table as file system layer (collection_id, path, hash)\n- ✅ Added collection names (e.g., \"pages\", \"journals\", \"archive\")\n- ✅ Implemented virtual paths (qmd://collection-name/path/to/file.md)\n- ✅ Added hierarchical context support (collection-scoped)\n- ✅ Successfully migrated existing database\n- ✅ Updated search functions to work with new schema\n- ✅ Updated indexing logic to use content-addressable storage\n- ✅ Orphaned content hash cleanup\n\n## Still TODO\n- Fix migration SQL to properly extract basename (currently needs manual fix)\n- Implement `qmd collection add . --name \u003cname\u003e --mask '**/*.md'`\n- Implement `qmd ls [path]` for exploring virtual file tree","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-10T10:57:35.497489-05:00","updated_at":"2025-12-12T15:39:48.879143-05:00","closed_at":"2025-12-12T15:39:48.879143-05:00"} {"id":"qmd-ama","title":"Refactor database system","description":"All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection, path, hash,\n┃ created_at, updated_at. (collection,path)\n┃\n┃\n\n┃ All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection_id, path, hash,\n┃ created_at, updated_at. (collection,path) is unique. There is also collection which stores PWD\n┃ + glob pattern, name (\\w+). Every document is treated as path qmd://collection.name/","notes":"## Completed\n- ✅ Implemented content-addressable storage (content table with hash→doc mapping)\n- ✅ Refactored documents table as file system layer (collection_id, path, hash)\n- ✅ Added collection names (e.g., \"pages\", \"journals\", \"archive\")\n- ✅ Implemented virtual paths (qmd://collection-name/path/to/file.md)\n- ✅ Added hierarchical context support (collection-scoped)\n- ✅ Successfully migrated existing database\n- ✅ Updated search functions to work with new schema\n- ✅ Updated indexing logic to use content-addressable storage\n- ✅ Orphaned content hash cleanup\n\n## Still TODO\n- Fix migration SQL to properly extract basename (currently needs manual fix)\n- Implement `qmd collection add . --name \u003cname\u003e --mask '**/*.md'`\n- Implement `qmd ls [path]` for exploring virtual file tree","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-10T10:57:35.497489-05:00","updated_at":"2025-12-12T15:39:48.879143-05:00","closed_at":"2025-12-12T15:39:48.879143-05:00"}

162
migrate-schema.ts Normal file
View File

@ -0,0 +1,162 @@
#!/usr/bin/env bun
/**
* Migrate documents table from collection_id to collection name
*
* This script updates the database schema to use collection names
* instead of collection_id foreign keys, preparing for YAML-based
* collection management.
*/
import { Database } from "bun:sqlite";
import { join } from "path";
import { homedir } from "os";
const c = {
reset: "\x1b[0m",
cyan: "\x1b[36m",
green: "\x1b[32m",
yellow: "\x1b[33m",
dim: "\x1b[2m",
};
const dbPath = join(homedir(), ".cache", "qmd", "index.sqlite");
console.log(`${c.cyan}Migrating database schema...${c.reset}\n`);
console.log(`Database: ${dbPath}\n`);
const db = new Database(dbPath);
try {
db.exec("BEGIN TRANSACTION");
// Step 1: Add collection column to documents
console.log(`${c.yellow}1. Adding 'collection' column to documents table...${c.reset}`);
db.exec(`ALTER TABLE documents ADD COLUMN collection TEXT`);
console.log(` ${c.green}${c.reset} Column added`);
// Step 2: Populate collection names from collections table
console.log(`\n${c.yellow}2. Populating collection names...${c.reset}`);
const result = db.exec(`
UPDATE documents
SET collection = (
SELECT name FROM collections WHERE collections.id = documents.collection_id
)
WHERE collection IS NULL
`);
console.log(` ${c.green}${c.reset} Updated ${result} rows`);
// Step 3: Verify no NULL values
const nullCount = db.query<{ count: number }, []>(
`SELECT COUNT(*) as count FROM documents WHERE collection IS NULL`
).get();
if (nullCount && nullCount.count > 0) {
throw new Error(`Found ${nullCount.count} documents with NULL collection names`);
}
console.log(` ${c.green}${c.reset} All documents have collection names`);
// Step 4: Create new documents table without collection_id
console.log(`\n${c.yellow}3. Creating new documents table...${c.reset}`);
db.exec(`
CREATE TABLE documents_new (
id INTEGER PRIMARY KEY AUTOINCREMENT,
collection TEXT NOT NULL,
path TEXT NOT NULL,
title TEXT NOT NULL,
hash TEXT NOT NULL,
created_at TEXT NOT NULL,
modified_at TEXT NOT NULL,
active INTEGER DEFAULT 1,
FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE,
UNIQUE(collection, path)
)
`);
console.log(` ${c.green}${c.reset} New table created`);
// Step 5: Copy data
console.log(`\n${c.yellow}4. Copying data to new table...${c.reset}`);
db.exec(`
INSERT INTO documents_new (id, collection, path, title, hash, created_at, modified_at, active)
SELECT id, collection, path, title, hash, created_at, modified_at, active
FROM documents
`);
const rowCount = db.query<{ count: number }, []>(
`SELECT COUNT(*) as count FROM documents_new`
).get();
console.log(` ${c.green}${c.reset} Copied ${rowCount?.count} documents`);
// Step 6: Drop old table and rename new one
console.log(`\n${c.yellow}5. Replacing old table...${c.reset}`);
db.exec(`DROP TABLE documents`);
db.exec(`ALTER TABLE documents_new RENAME TO documents`);
console.log(` ${c.green}${c.reset} Table replaced`);
// Step 7: Recreate indices
console.log(`\n${c.yellow}6. Recreating indices...${c.reset}`);
db.exec(`CREATE INDEX idx_documents_collection ON documents(collection, active)`);
db.exec(`CREATE INDEX idx_documents_hash ON documents(hash)`);
console.log(` ${c.green}${c.reset} Indices created`);
// Step 8: Update FTS trigger to use collection name
console.log(`\n${c.yellow}7. Updating FTS trigger...${c.reset}`);
db.exec(`DROP TRIGGER IF EXISTS documents_ai`);
db.exec(`
CREATE TRIGGER documents_ai AFTER INSERT ON documents
WHEN new.active = 1
BEGIN
INSERT INTO documents_fts(rowid, filepath, title, body)
SELECT
new.id,
new.collection || '/' || new.path,
new.title,
(SELECT doc FROM content WHERE hash = new.hash)
WHERE new.active = 1;
END
`);
db.exec(`DROP TRIGGER IF EXISTS documents_au`);
db.exec(`
CREATE TRIGGER documents_au AFTER UPDATE ON documents
BEGIN
-- Delete from FTS if no longer active
DELETE FROM documents_fts WHERE rowid = old.id AND new.active = 0;
-- Update FTS if still/newly active
INSERT OR REPLACE INTO documents_fts(rowid, filepath, title, body)
SELECT
new.id,
new.collection || '/' || new.path,
new.title,
(SELECT doc FROM content WHERE hash = new.hash)
WHERE new.active = 1;
END
`);
console.log(` ${c.green}${c.reset} Triggers updated`);
// Commit transaction
db.exec("COMMIT");
console.log(`\n${c.green}✓ Migration completed successfully!${c.reset}`);
// Show summary
const collections = db.query<{ collection: string; count: number }, []>(`
SELECT collection, COUNT(*) as count
FROM documents
WHERE active = 1
GROUP BY collection
ORDER BY collection
`).all();
console.log(`\n${c.dim}Documents by collection:${c.reset}`);
for (const coll of collections) {
console.log(` ${coll.collection}: ${coll.count} files`);
}
} catch (error) {
db.exec("ROLLBACK");
console.error(`\n${c.yellow}✗ Migration failed:${c.reset} ${error}`);
console.error(`${c.dim}Database rolled back to previous state${c.reset}`);
process.exit(1);
} finally {
db.close();
}

View File

@ -148,9 +148,9 @@ export function resolveVirtualPath(db: Database, virtualPath: string): string |
*/ */
export function toVirtualPath(db: Database, absolutePath: string): string | null { export function toVirtualPath(db: Database, absolutePath: string): string | null {
const doc = db.prepare(` const doc = db.prepare(`
SELECT c.name, d.path SELECT d.collection as name, d.path
FROM documents d FROM documents d
JOIN collections c ON c.id = d.collection_id JOIN collections c ON c.name = d.collection
WHERE c.pwd || '/' || d.path = ? AND d.active = 1 WHERE c.pwd || '/' || d.path = ? AND d.active = 1
LIMIT 1 LIMIT 1
`).get(absolutePath) as { name: string; path: string } | null; `).get(absolutePath) as { name: string; path: string } | null;
@ -214,20 +214,19 @@ function initializeDatabase(db: Database): void {
db.exec(` db.exec(`
CREATE TABLE IF NOT EXISTS documents ( CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT, id INTEGER PRIMARY KEY AUTOINCREMENT,
collection_id INTEGER NOT NULL, collection TEXT NOT NULL,
path TEXT NOT NULL, path TEXT NOT NULL,
title TEXT NOT NULL, title TEXT NOT NULL,
hash TEXT NOT NULL, hash TEXT NOT NULL,
created_at TEXT NOT NULL, created_at TEXT NOT NULL,
modified_at TEXT NOT NULL, modified_at TEXT NOT NULL,
active INTEGER NOT NULL DEFAULT 1, active INTEGER NOT NULL DEFAULT 1,
FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE,
FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE, FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE,
UNIQUE(collection_id, path) UNIQUE(collection, path)
) )
`); `);
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection_id, active)`); db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection, active)`);
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash)`); db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash)`);
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_path ON documents(path, active)`); db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_path ON documents(path, active)`);
@ -346,16 +345,15 @@ function migrateToContentAddressable(db: Database): void {
db.exec(` db.exec(`
CREATE TABLE documents ( CREATE TABLE documents (
id INTEGER PRIMARY KEY AUTOINCREMENT, id INTEGER PRIMARY KEY AUTOINCREMENT,
collection_id INTEGER NOT NULL, collection TEXT NOT NULL,
path TEXT NOT NULL, path TEXT NOT NULL,
title TEXT NOT NULL, title TEXT NOT NULL,
hash TEXT NOT NULL, hash TEXT NOT NULL,
created_at TEXT NOT NULL, created_at TEXT NOT NULL,
modified_at TEXT NOT NULL, modified_at TEXT NOT NULL,
active INTEGER NOT NULL DEFAULT 1, active INTEGER NOT NULL DEFAULT 1,
FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE,
FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE, FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE,
UNIQUE(collection_id, path) UNIQUE(collection, path)
) )
`); `);
@ -422,7 +420,7 @@ function migrateToContentAddressable(db: Database): void {
// Migrate documents: convert filepath to relative path within collection // Migrate documents: convert filepath to relative path within collection
console.log("Migrating documents..."); console.log("Migrating documents...");
const oldDocs = db.prepare(` const oldDocs = db.prepare(`
SELECT d.id, d.collection_id, d.filepath, d.title, d.hash, d.created_at, d.modified_at, c.pwd SELECT d.id, d.collection_id, d.filepath, d.title, d.hash, d.created_at, d.modified_at, c.pwd, c.name
FROM documents_old d FROM documents_old d
JOIN collections c ON c.id = d.collection_id JOIN collections c ON c.id = d.collection_id
WHERE d.active = 1 WHERE d.active = 1
@ -435,10 +433,11 @@ function migrateToContentAddressable(db: Database): void {
created_at: string; created_at: string;
modified_at: string; modified_at: string;
pwd: string; pwd: string;
name: string;
}>; }>;
const insertDoc = db.prepare(` const insertDoc = db.prepare(`
INSERT INTO documents (collection_id, path, title, hash, created_at, modified_at, active) INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active)
VALUES (?, ?, ?, ?, ?, ?, 1) VALUES (?, ?, ?, ?, ?, ?, 1)
`); `);
@ -454,9 +453,9 @@ function migrateToContentAddressable(db: Database): void {
path = path.replace(/^\/+/, ''); path = path.replace(/^\/+/, '');
try { try {
insertDoc.run(doc.collection_id, path, doc.title, doc.hash, doc.created_at, doc.modified_at); insertDoc.run(doc.name, path, doc.title, doc.hash, doc.created_at, doc.modified_at);
} catch (e) { } catch (e) {
console.warn(`Skipping duplicate path: ${path} in collection ${doc.collection_id}`); console.warn(`Skipping duplicate path: ${path} in collection ${doc.name}`);
} }
} }
@ -545,7 +544,7 @@ function migrateToContentAddressable(db: Database): void {
`); `);
// Create indexes // Create indexes
db.exec(`CREATE INDEX idx_documents_collection ON documents(collection_id, active)`); db.exec(`CREATE INDEX idx_documents_collection ON documents(collection, active)`);
db.exec(`CREATE INDEX idx_documents_hash ON documents(hash)`); db.exec(`CREATE INDEX idx_documents_hash ON documents(hash)`);
db.exec(`CREATE INDEX idx_documents_path ON documents(path, active)`); db.exec(`CREATE INDEX idx_documents_path ON documents(path, active)`);
db.exec(`CREATE INDEX idx_path_contexts_collection ON path_contexts(collection_id, path_prefix)`); db.exec(`CREATE INDEX idx_path_contexts_collection ON path_contexts(collection_id, path_prefix)`);
@ -602,11 +601,11 @@ export type Store = {
// Context // Context
getContextForFile: (filepath: string) => string | null; getContextForFile: (filepath: string) => string | null;
getContextForPath: (collectionId: number, path: string) => string | null; getContextForPath: (collectionName: string, path: string) => string | null;
getCollectionIdByName: (name: string) => number | null; getCollectionIdByName: (name: string) => number | null;
getCollectionByName: (name: string) => { id: number; name: string; pwd: string; glob_pattern: string } | null; getCollectionByName: (name: string) => { id: number; name: string; pwd: string; glob_pattern: string } | null;
getCollectionsWithoutContext: () => { id: number; name: string; pwd: string; doc_count: number }[]; getCollectionsWithoutContext: () => { id: number; name: string; pwd: string; doc_count: number }[];
getTopLevelPathsWithoutContext: (collectionId: number) => string[]; getTopLevelPathsWithoutContext: (collectionName: string) => string[];
// Virtual paths // Virtual paths
parseVirtualPath: typeof parseVirtualPath; parseVirtualPath: typeof parseVirtualPath;
@ -638,12 +637,12 @@ export type Store = {
// Document indexing operations // Document indexing operations
insertContent: (hash: string, content: string, createdAt: string) => void; insertContent: (hash: string, content: string, createdAt: string) => void;
insertDocument: (collectionId: number, path: string, title: string, hash: string, createdAt: string, modifiedAt: string) => void; insertDocument: (collectionName: string, path: string, title: string, hash: string, createdAt: string, modifiedAt: string) => void;
findActiveDocument: (collectionId: number, path: string) => { id: number; hash: string; title: string } | null; findActiveDocument: (collectionName: string, path: string) => { id: number; hash: string; title: string } | null;
updateDocumentTitle: (documentId: number, title: string, modifiedAt: string) => void; updateDocumentTitle: (documentId: number, title: string, modifiedAt: string) => void;
updateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => void; updateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => void;
deactivateDocument: (collectionId: number, path: string) => void; deactivateDocument: (collectionName: string, path: string) => void;
getActiveDocumentPaths: (collectionId: number) => string[]; getActiveDocumentPaths: (collectionName: string) => string[];
// Vector/embedding operations // Vector/embedding operations
getHashesForEmbedding: () => { hash: string; body: string; path: string }[]; getHashesForEmbedding: () => { hash: string; body: string; path: string }[];
@ -690,9 +689,11 @@ export function createStore(dbPath?: string): Store {
// Context // Context
getContextForFile: (filepath: string) => getContextForFile(db, filepath), getContextForFile: (filepath: string) => getContextForFile(db, filepath),
getContextForPath: (collectionId: number, path: string) => getContextForPath(db, collectionId, path), getContextForPath: (collectionName: string, path: string) => getContextForPath(db, collectionName, path),
getCollectionIdByName: (name: string) => getCollectionIdByName(db, name), getCollectionIdByName: (name: string) => getCollectionIdByName(db, name),
getCollectionByName: (name: string) => getCollectionByName(db, name), getCollectionByName: (name: string) => getCollectionByName(db, name),
getCollectionsWithoutContext: () => getCollectionsWithoutContext(db),
getTopLevelPathsWithoutContext: (collectionName: string) => getTopLevelPathsWithoutContext(db, collectionName),
// Virtual paths // Virtual paths
parseVirtualPath, parseVirtualPath,
@ -724,12 +725,12 @@ export function createStore(dbPath?: string): Store {
// Document indexing operations // Document indexing operations
insertContent: (hash: string, content: string, createdAt: string) => insertContent(db, hash, content, createdAt), insertContent: (hash: string, content: string, createdAt: string) => insertContent(db, hash, content, createdAt),
insertDocument: (collectionId: number, path: string, title: string, hash: string, createdAt: string, modifiedAt: string) => insertDocument(db, collectionId, path, title, hash, createdAt, modifiedAt), insertDocument: (collectionName: string, path: string, title: string, hash: string, createdAt: string, modifiedAt: string) => insertDocument(db, collectionName, path, title, hash, createdAt, modifiedAt),
findActiveDocument: (collectionId: number, path: string) => findActiveDocument(db, collectionId, path), findActiveDocument: (collectionName: string, path: string) => findActiveDocument(db, collectionName, path),
updateDocumentTitle: (documentId: number, title: string, modifiedAt: string) => updateDocumentTitle(db, documentId, title, modifiedAt), updateDocumentTitle: (documentId: number, title: string, modifiedAt: string) => updateDocumentTitle(db, documentId, title, modifiedAt),
updateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => updateDocument(db, documentId, title, hash, modifiedAt), updateDocument: (documentId: number, title: string, hash: string, modifiedAt: string) => updateDocument(db, documentId, title, hash, modifiedAt),
deactivateDocument: (collectionId: number, path: string) => deactivateDocument(db, collectionId, path), deactivateDocument: (collectionName: string, path: string) => deactivateDocument(db, collectionName, path),
getActiveDocumentPaths: (collectionId: number) => getActiveDocumentPaths(db, collectionId), getActiveDocumentPaths: (collectionName: string) => getActiveDocumentPaths(db, collectionName),
// Vector/embedding operations // Vector/embedding operations
getHashesForEmbedding: () => getHashesForEmbedding(db), getHashesForEmbedding: () => getHashesForEmbedding(db),
@ -792,7 +793,7 @@ export type DocumentResult = {
title: string; // Document title (from first heading or filename) title: string; // Document title (from first heading or filename)
context: string | null; // Folder context description if configured context: string | null; // Folder context description if configured
hash: string; // Content hash for caching/change detection hash: string; // Content hash for caching/change detection
collectionId: number; // Parent collection ID collectionName: string; // Parent collection name
modifiedAt: string; // Last modification timestamp modifiedAt: string; // Last modification timestamp
bodyLength: number; // Body length in bytes (useful before loading) bodyLength: number; // Body length in bytes (useful before loading)
body?: string; // Document body (optional, load with getDocumentBody) body?: string; // Document body (optional, load with getDocumentBody)
@ -1068,7 +1069,7 @@ export function insertContent(db: Database, hash: string, content: string, creat
*/ */
export function insertDocument( export function insertDocument(
db: Database, db: Database,
collectionId: number, collectionName: string,
path: string, path: string,
title: string, title: string,
hash: string, hash: string,
@ -1076,23 +1077,23 @@ export function insertDocument(
modifiedAt: string modifiedAt: string
): void { ): void {
db.prepare(` db.prepare(`
INSERT INTO documents (collection_id, path, title, hash, created_at, modified_at, active) INSERT INTO documents (collection, path, title, hash, created_at, modified_at, active)
VALUES (?, ?, ?, ?, ?, ?, 1) VALUES (?, ?, ?, ?, ?, ?, 1)
`).run(collectionId, path, title, hash, createdAt, modifiedAt); `).run(collectionName, path, title, hash, createdAt, modifiedAt);
} }
/** /**
* Find an active document by collection ID and path. * Find an active document by collection name and path.
*/ */
export function findActiveDocument( export function findActiveDocument(
db: Database, db: Database,
collectionId: number, collectionName: string,
path: string path: string
): { id: number; hash: string; title: string } | null { ): { id: number; hash: string; title: string } | null {
return db.prepare(` return db.prepare(`
SELECT id, hash, title FROM documents SELECT id, hash, title FROM documents
WHERE collection_id = ? AND path = ? AND active = 1 WHERE collection = ? AND path = ? AND active = 1
`).get(collectionId, path) as { id: number; hash: string; title: string } | null; `).get(collectionName, path) as { id: number; hash: string; title: string } | null;
} }
/** /**
@ -1126,18 +1127,18 @@ export function updateDocument(
/** /**
* Deactivate a document (mark as inactive but don't delete). * Deactivate a document (mark as inactive but don't delete).
*/ */
export function deactivateDocument(db: Database, collectionId: number, path: string): void { export function deactivateDocument(db: Database, collectionName: string, path: string): void {
db.prepare(`UPDATE documents SET active = 0 WHERE collection_id = ? AND path = ? AND active = 1`) db.prepare(`UPDATE documents SET active = 0 WHERE collection = ? AND path = ? AND active = 1`)
.run(collectionId, path); .run(collectionName, path);
} }
/** /**
* Get all active document paths for a collection. * Get all active document paths for a collection.
*/ */
export function getActiveDocumentPaths(db: Database, collectionId: number): string[] { export function getActiveDocumentPaths(db: Database, collectionName: string): string[] {
const rows = db.prepare(` const rows = db.prepare(`
SELECT path FROM documents WHERE collection_id = ? AND active = 1 SELECT path FROM documents WHERE collection = ? AND active = 1
`).all(collectionId) as { path: string }[]; `).all(collectionName) as { path: string }[];
return rows.map(r => r.path); return rows.map(r => r.path);
} }
@ -1227,7 +1228,11 @@ function levenshtein(a: string, b: string): number {
} }
export function findSimilarFiles(db: Database, query: string, maxDistance: number = 3, limit: number = 5): string[] { export function findSimilarFiles(db: Database, query: string, maxDistance: number = 3, limit: number = 5): string[] {
const allFiles = db.prepare(`SELECT display_path FROM documents WHERE active = 1`).all() as { display_path: string }[]; const allFiles = db.prepare(`
SELECT 'qmd://' || d.collection || '/' || d.path as display_path
FROM documents d
WHERE d.active = 1
`).all() as { display_path: string }[];
const queryLower = query.toLowerCase(); const queryLower = query.toLowerCase();
const scored = allFiles const scored = allFiles
.map(f => ({ path: f.display_path, dist: levenshtein(f.display_path.toLowerCase(), queryLower) })) .map(f => ({ path: f.display_path, dist: levenshtein(f.display_path.toLowerCase(), queryLower) }))
@ -1240,15 +1245,13 @@ export function findSimilarFiles(db: Database, query: string, maxDistance: numbe
export function matchFilesByGlob(db: Database, pattern: string): { filepath: string; displayPath: string; bodyLength: number }[] { export function matchFilesByGlob(db: Database, pattern: string): { filepath: string; displayPath: string; bodyLength: number }[] {
const allFiles = db.prepare(` const allFiles = db.prepare(`
SELECT SELECT
'qmd://' || c.name || '/' || d.path as virtual_path, 'qmd://' || d.collection || '/' || d.path as virtual_path,
LENGTH(content.doc) as body_length, LENGTH(content.doc) as body_length,
d.collection_id,
d.path d.path
FROM documents d FROM documents d
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash JOIN content ON content.hash = d.hash
WHERE d.active = 1 WHERE d.active = 1
`).all() as { virtual_path: string; body_length: number; collection_id: number; path: string }[]; `).all() as { virtual_path: string; body_length: number; path: string }[];
const glob = new Glob(pattern); const glob = new Glob(pattern);
return allFiles return allFiles
@ -1270,11 +1273,15 @@ export function matchFilesByGlob(db: Database, pattern: string): { filepath: str
* For example, context at "/talks" applies to "/talks/2024/keynote.md". * For example, context at "/talks" applies to "/talks/2024/keynote.md".
* *
* @param db Database instance * @param db Database instance
* @param collectionId Collection ID * @param collectionName Collection name
* @param path Relative path within the collection * @param path Relative path within the collection
* @returns Context string or null if no context is defined * @returns Context string or null if no context is defined
*/ */
export function getContextForPath(db: Database, collectionId: number, path: string): string | null { export function getContextForPath(db: Database, collectionName: string, path: string): string | null {
// First get the collection_id from the collection name
const coll = db.prepare(`SELECT id FROM collections WHERE name = ?`).get(collectionName) as { id: number } | null;
if (!coll) return null;
// Find the most specific (longest) matching path prefix for this collection // Find the most specific (longest) matching path prefix for this collection
const result = db.prepare(` const result = db.prepare(`
SELECT context FROM path_contexts SELECT context FROM path_contexts
@ -1282,7 +1289,7 @@ export function getContextForPath(db: Database, collectionId: number, path: stri
AND (? LIKE path_prefix || '/%' OR ? = path_prefix OR path_prefix = '') AND (? LIKE path_prefix || '/%' OR ? = path_prefix OR path_prefix = '')
ORDER BY LENGTH(path_prefix) DESC ORDER BY LENGTH(path_prefix) DESC
LIMIT 1 LIMIT 1
`).get(collectionId, path, path) as { context: string } | null; `).get(coll.id, path, path) as { context: string } | null;
return result?.context || null; return result?.context || null;
} }
@ -1290,17 +1297,17 @@ export function getContextForPath(db: Database, collectionId: number, path: stri
* Legacy function for backward compatibility - resolves filepath to collection+path first * Legacy function for backward compatibility - resolves filepath to collection+path first
*/ */
export function getContextForFile(db: Database, filepath: string): string | null { export function getContextForFile(db: Database, filepath: string): string | null {
// Try to find the document to get its collection_id and path // Try to find the document to get its collection name and path
const doc = db.prepare(` const doc = db.prepare(`
SELECT d.collection_id, d.path SELECT d.collection, d.path
FROM documents d FROM documents d
JOIN collections c ON c.id = d.collection_id JOIN collections c ON c.name = d.collection
WHERE c.pwd || '/' || d.path = ? AND d.active = 1 WHERE c.pwd || '/' || d.path = ? AND d.active = 1
LIMIT 1 LIMIT 1
`).get(filepath) as { collection_id: number; path: string } | null; `).get(filepath) as { collection: string; path: string } | null;
if (!doc) return null; if (!doc) return null;
return getContextForPath(db, doc.collection_id, doc.path); return getContextForPath(db, doc.collection, doc.path);
} }
/** /**
@ -1334,7 +1341,7 @@ export function listCollections(db: Database): { id: number; name: string; pwd:
SUM(CASE WHEN d.active = 1 THEN 1 ELSE 0 END) as active_count, SUM(CASE WHEN d.active = 1 THEN 1 ELSE 0 END) as active_count,
MAX(d.modified_at) as last_modified MAX(d.modified_at) as last_modified
FROM collections c FROM collections c
LEFT JOIN documents d ON d.collection_id = c.id LEFT JOIN documents d ON d.collection = c.name
GROUP BY c.id GROUP BY c.id
ORDER BY c.name ORDER BY c.name
`).all() as { id: number; name: string; pwd: string; glob_pattern: string; created_at: string; updated_at: string; doc_count: number; active_count: number; last_modified: string | null }[]; `).all() as { id: number; name: string; pwd: string; glob_pattern: string; created_at: string; updated_at: string; doc_count: number; active_count: number; last_modified: string | null }[];
@ -1342,8 +1349,14 @@ export function listCollections(db: Database): { id: number; name: string; pwd:
} }
export function removeCollection(db: Database, collectionId: number): { deletedDocs: number; cleanedHashes: number } { export function removeCollection(db: Database, collectionId: number): { deletedDocs: number; cleanedHashes: number } {
// Get collection name first
const coll = db.prepare(`SELECT name FROM collections WHERE id = ?`).get(collectionId) as { name: string } | null;
if (!coll) {
return { deletedDocs: 0, cleanedHashes: 0 };
}
// Delete documents // Delete documents
const docResult = db.prepare(`DELETE FROM documents WHERE collection_id = ?`).run(collectionId); const docResult = db.prepare(`DELETE FROM documents WHERE collection = ?`).run(coll.name);
// Delete contexts // Delete contexts
db.prepare(`DELETE FROM path_contexts WHERE collection_id = ?`).run(collectionId); db.prepare(`DELETE FROM path_contexts WHERE collection_id = ?`).run(collectionId);
@ -1364,7 +1377,17 @@ export function removeCollection(db: Database, collectionId: number): { deletedD
} }
export function renameCollection(db: Database, collectionId: number, newName: string): void { export function renameCollection(db: Database, collectionId: number, newName: string): void {
// Get old collection name first
const coll = db.prepare(`SELECT name FROM collections WHERE id = ?`).get(collectionId) as { name: string } | null;
if (!coll) return;
const now = new Date().toISOString(); const now = new Date().toISOString();
// Update all documents with the new collection name
db.prepare(`UPDATE documents SET collection = ? WHERE collection = ?`)
.run(newName, coll.name);
// Update collection name
db.prepare(`UPDATE collections SET name = ?, updated_at = ? WHERE id = ?`) db.prepare(`UPDATE collections SET name = ?, updated_at = ? WHERE id = ?`)
.run(newName, now, collectionId); .run(newName, now, collectionId);
} }
@ -1435,7 +1458,7 @@ export function getCollectionsWithoutContext(db: Database): { id: number; name:
const collections = db.prepare(` const collections = db.prepare(`
SELECT c.id, c.name, c.pwd, COUNT(d.id) as doc_count SELECT c.id, c.name, c.pwd, COUNT(d.id) as doc_count
FROM collections c FROM collections c
LEFT JOIN documents d ON d.collection_id = c.id AND d.active = 1 LEFT JOIN documents d ON d.collection = c.name AND d.active = 1
WHERE NOT EXISTS ( WHERE NOT EXISTS (
SELECT 1 FROM path_contexts pc WHERE pc.collection_id = c.id SELECT 1 FROM path_contexts pc WHERE pc.collection_id = c.id
) )
@ -1449,17 +1472,21 @@ export function getCollectionsWithoutContext(db: Database): { id: number; name:
* Get top-level directories in a collection that don't have context. * Get top-level directories in a collection that don't have context.
* Useful for suggesting where context might be needed. * Useful for suggesting where context might be needed.
*/ */
export function getTopLevelPathsWithoutContext(db: Database, collectionId: number): string[] { export function getTopLevelPathsWithoutContext(db: Database, collectionName: string): string[] {
// First get the collection_id from the collection name
const coll = db.prepare(`SELECT id FROM collections WHERE name = ?`).get(collectionName) as { id: number } | null;
if (!coll) return [];
// Get all paths in the collection // Get all paths in the collection
const paths = db.prepare(` const paths = db.prepare(`
SELECT DISTINCT path FROM documents SELECT DISTINCT path FROM documents
WHERE collection_id = ? AND active = 1 WHERE collection = ? AND active = 1
`).all(collectionId) as { path: string }[]; `).all(collectionName) as { path: string }[];
// Get existing contexts for this collection // Get existing contexts for this collection
const contexts = db.prepare(` const contexts = db.prepare(`
SELECT path_prefix FROM path_contexts WHERE collection_id = ? SELECT path_prefix FROM path_contexts WHERE collection_id = ?
`).all(collectionId) as { path_prefix: string }[]; `).all(coll.id) as { path_prefix: string }[];
const contextPrefixes = new Set(contexts.map(c => c.path_prefix)); const contextPrefixes = new Set(contexts.map(c => c.path_prefix));
@ -1516,22 +1543,25 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle
let sql = ` let sql = `
SELECT SELECT
'qmd://' || c.name || '/' || d.path as filepath, 'qmd://' || d.collection || '/' || d.path as filepath,
'qmd://' || c.name || '/' || d.path as display_path, 'qmd://' || d.collection || '/' || d.path as display_path,
d.title, d.title,
content.doc as body, content.doc as body,
bm25(documents_fts, 10.0, 1.0) as score bm25(documents_fts, 10.0, 1.0) as score
FROM documents_fts f FROM documents_fts f
JOIN documents d ON d.id = f.rowid JOIN documents d ON d.id = f.rowid
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash JOIN content ON content.hash = d.hash
WHERE documents_fts MATCH ? AND d.active = 1 WHERE documents_fts MATCH ? AND d.active = 1
`; `;
const params: (string | number)[] = [ftsQuery]; const params: (string | number)[] = [ftsQuery];
if (collectionId !== undefined) { if (collectionId !== undefined) {
sql += ` AND d.collection_id = ?`; // Convert collectionId to collection name for filtering
params.push(collectionId); const coll = db.prepare(`SELECT name FROM collections WHERE id = ?`).get(collectionId) as { name: string } | null;
if (coll) {
sql += ` AND d.collection = ?`;
params.push(coll.name);
}
} }
sql += ` ORDER BY score LIMIT ?`; sql += ` ORDER BY score LIMIT ?`;
@ -1541,10 +1571,15 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle
const maxScore = rows.length > 0 ? Math.max(...rows.map(r => Math.abs(r.score))) : 1; const maxScore = rows.length > 0 ? Math.max(...rows.map(r => Math.abs(r.score))) : 1;
return rows.map(row => ({ return rows.map(row => ({
file: row.filepath, filepath: row.filepath,
displayPath: row.display_path, displayPath: row.display_path,
title: row.title, title: row.title,
hash: "", // Not available in FTS query
collectionName: row.filepath.split('//')[1]?.split('/')[0] || "", // Extract from virtual path
modifiedAt: "", // Not available in FTS query
bodyLength: row.body.length,
body: row.body, body: row.body,
context: null, // Not loaded in FTS
score: Math.abs(row.score) / maxScore, score: Math.abs(row.score) / maxScore,
source: "fts" as const, source: "fts" as const,
})); }));
@ -1566,21 +1601,24 @@ export async function searchVec(db: Database, query: string, model: string, limi
SELECT SELECT
v.hash_seq, v.hash_seq,
v.distance, v.distance,
'qmd://' || c.name || '/' || d.path as filepath, 'qmd://' || d.collection || '/' || d.path as filepath,
'qmd://' || c.name || '/' || d.path as display_path, 'qmd://' || d.collection || '/' || d.path as display_path,
d.title, d.title,
content.doc as body, content.doc as body,
cv.pos cv.pos
FROM vectors_vec v FROM vectors_vec v
JOIN content_vectors cv ON cv.hash || '_' || cv.seq = v.hash_seq JOIN content_vectors cv ON cv.hash || '_' || cv.seq = v.hash_seq
JOIN documents d ON d.hash = cv.hash AND d.active = 1 JOIN documents d ON d.hash = cv.hash AND d.active = 1
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash JOIN content ON content.hash = d.hash
WHERE v.embedding MATCH ? AND k = ? WHERE v.embedding MATCH ? AND k = ?
`; `;
if (collectionId !== undefined) { if (collectionId !== undefined) {
sql += ` AND d.collection_id = ${collectionId}`; // Convert collectionId to collection name for filtering
const coll = db.prepare(`SELECT name FROM collections WHERE id = ?`).get(collectionId) as { name: string } | null;
if (coll) {
sql += ` AND d.collection = '${coll.name}'`;
}
} }
sql += ` ORDER BY v.distance`; sql += ` ORDER BY v.distance`;
@ -1599,10 +1637,15 @@ export async function searchVec(db: Database, query: string, model: string, limi
.sort((a, b) => a.bestDist - b.bestDist) .sort((a, b) => a.bestDist - b.bestDist)
.slice(0, limit) .slice(0, limit)
.map(({ row }) => ({ .map(({ row }) => ({
file: row.filepath, filepath: row.filepath,
displayPath: row.display_path, displayPath: row.display_path,
title: row.title, title: row.title,
hash: "", // Not available in vec query
collectionName: row.filepath.split('//')[1]?.split('/')[0] || "", // Extract from virtual path
modifiedAt: "", // Not available in vec query
bodyLength: row.body.length,
body: row.body, body: row.body,
context: null, // Not loaded in vec
score: 1 / (1 + row.distance), score: 1 / (1 + row.distance),
source: "vec" as const, source: "vec" as const,
chunkPos: row.pos, chunkPos: row.pos,
@ -1782,7 +1825,7 @@ type DbDocRow = {
display_path: string; display_path: string;
title: string; title: string;
hash: string; hash: string;
collection_id: number; collection: string;
modified_at: string; modified_at: string;
body_length: number; body_length: number;
body?: string; body?: string;
@ -1803,20 +1846,48 @@ export function findDocument(db: Database, filename: string, options: { includeB
filepath = homedir() + filepath.slice(1); filepath = homedir() + filepath.slice(1);
} }
const selectCols = options.includeBody const bodyCol = options.includeBody ? `, content.doc as body` : ``;
? `filepath, display_path, title, hash, collection_id, modified_at, LENGTH(body) as body_length, body`
: `filepath, display_path, title, hash, collection_id, modified_at, LENGTH(body) as body_length`; // Build computed columns for filepath and display_path
const selectCols = `
c.pwd || '/' || d.path as filepath,
'qmd://' || d.collection || '/' || d.path as display_path,
d.title,
d.hash,
d.collection,
d.modified_at,
LENGTH(content.doc) as body_length
${bodyCol}
`;
// Try various match strategies - always join content for body_length
let doc = db.prepare(`
SELECT ${selectCols}
FROM documents d
JOIN collections c ON c.name = d.collection
JOIN content ON content.hash = d.hash
WHERE c.pwd || '/' || d.path = ? AND d.active = 1
`).get(filepath) as DbDocRow | null;
// Try various match strategies
let doc = db.prepare(`SELECT ${selectCols} FROM documents WHERE filepath = ? AND active = 1`).get(filepath) as DbDocRow | null;
if (!doc) { if (!doc) {
doc = db.prepare(`SELECT ${selectCols} FROM documents WHERE display_path = ? AND active = 1`).get(filepath) as DbDocRow | null; doc = db.prepare(`
SELECT ${selectCols}
FROM documents d
JOIN collections c ON c.name = d.collection
JOIN content ON content.hash = d.hash
WHERE 'qmd://' || d.collection || '/' || d.path = ? AND d.active = 1
`).get(filepath) as DbDocRow | null;
} }
if (!doc) { if (!doc) {
doc = db.prepare(`SELECT ${selectCols} FROM documents WHERE filepath LIKE ? AND active = 1 LIMIT 1`).get(`%${filepath}`) as DbDocRow | null; doc = db.prepare(`
} SELECT ${selectCols}
if (!doc) { FROM documents d
doc = db.prepare(`SELECT ${selectCols} FROM documents WHERE display_path LIKE ? AND active = 1 LIMIT 1`).get(`%${filepath}`) as DbDocRow | null; JOIN collections c ON c.name = d.collection
JOIN content ON content.hash = d.hash
WHERE (c.pwd || '/' || d.path LIKE ? OR 'qmd://' || d.collection || '/' || d.path LIKE ?) AND d.active = 1
LIMIT 1
`).get(`%${filepath}`, `%${filepath}`) as DbDocRow | null;
} }
if (!doc) { if (!doc) {
@ -1832,7 +1903,7 @@ export function findDocument(db: Database, filename: string, options: { includeB
title: doc.title, title: doc.title,
context, context,
hash: doc.hash, hash: doc.hash,
collectionId: doc.collection_id, collectionName: doc.collection,
modifiedAt: doc.modified_at, modifiedAt: doc.modified_at,
bodyLength: doc.body_length, bodyLength: doc.body_length,
...(options.includeBody && doc.body !== undefined && { body: doc.body }), ...(options.includeBody && doc.body !== undefined && { body: doc.body }),
@ -1845,7 +1916,13 @@ export function findDocument(db: Database, filename: string, options: { includeB
*/ */
export function getDocumentBody(db: Database, doc: DocumentResult | { filepath: string }, fromLine?: number, maxLines?: number): string | null { export function getDocumentBody(db: Database, doc: DocumentResult | { filepath: string }, fromLine?: number, maxLines?: number): string | null {
const filepath = 'filepath' in doc ? doc.filepath : doc.filepath; const filepath = 'filepath' in doc ? doc.filepath : doc.filepath;
const row = db.prepare(`SELECT body FROM documents WHERE filepath = ? AND active = 1`).get(filepath) as { body: string } | null; const row = db.prepare(`
SELECT content.doc as body
FROM documents d
JOIN collections c ON c.name = d.collection
JOIN content ON content.hash = d.hash
WHERE c.pwd || '/' || d.path = ? AND d.active = 1
`).get(filepath) as { body: string } | null;
if (!row) return null; if (!row) return null;
let body = row.body; let body = row.body;
@ -1900,9 +1977,17 @@ export function findDocuments(
const errors: string[] = []; const errors: string[] = [];
const maxBytes = options.maxBytes ?? DEFAULT_MULTI_GET_MAX_BYTES; const maxBytes = options.maxBytes ?? DEFAULT_MULTI_GET_MAX_BYTES;
const selectCols = options.includeBody const bodyCol = options.includeBody ? `, content.doc as body` : ``;
? `filepath, display_path, title, hash, collection_id, modified_at, LENGTH(body) as body_length, body` const selectCols = `
: `filepath, display_path, title, hash, collection_id, modified_at, LENGTH(body) as body_length`; c.pwd || '/' || d.path as filepath,
'qmd://' || d.collection || '/' || d.path as display_path,
d.title,
d.hash,
d.collection,
d.modified_at,
LENGTH(content.doc) as body_length
${bodyCol}
`;
let fileRows: DbDocRow[]; let fileRows: DbDocRow[];
@ -1910,9 +1995,22 @@ export function findDocuments(
const names = pattern.split(',').map(s => s.trim()).filter(Boolean); const names = pattern.split(',').map(s => s.trim()).filter(Boolean);
fileRows = []; fileRows = [];
for (const name of names) { for (const name of names) {
let doc = db.prepare(`SELECT ${selectCols} FROM documents WHERE display_path = ? AND active = 1`).get(name) as DbDocRow | null; let doc = db.prepare(`
SELECT ${selectCols}
FROM documents d
JOIN collections c ON c.name = d.collection
JOIN content ON content.hash = d.hash
WHERE 'qmd://' || d.collection || '/' || d.path = ? AND d.active = 1
`).get(name) as DbDocRow | null;
if (!doc) { if (!doc) {
doc = db.prepare(`SELECT ${selectCols} FROM documents WHERE display_path LIKE ? AND active = 1 LIMIT 1`).get(`%${name}`) as DbDocRow | null; doc = db.prepare(`
SELECT ${selectCols}
FROM documents d
JOIN collections c ON c.name = d.collection
JOIN content ON content.hash = d.hash
WHERE 'qmd://' || d.collection || '/' || d.path LIKE ? AND d.active = 1
LIMIT 1
`).get(`%${name}`) as DbDocRow | null;
} }
if (doc) { if (doc) {
fileRows.push(doc); fileRows.push(doc);
@ -1932,9 +2030,15 @@ export function findDocuments(
errors.push(`No files matched pattern: ${pattern}`); errors.push(`No files matched pattern: ${pattern}`);
return { docs: [], errors }; return { docs: [], errors };
} }
const filepaths = matched.map(m => m.filepath); const virtualPaths = matched.map(m => m.filepath);
const placeholders = filepaths.map(() => '?').join(','); const placeholders = virtualPaths.map(() => '?').join(',');
fileRows = db.prepare(`SELECT ${selectCols} FROM documents WHERE filepath IN (${placeholders}) AND active = 1`).all(...filepaths) as DbDocRow[]; fileRows = db.prepare(`
SELECT ${selectCols}
FROM documents d
JOIN collections c ON c.name = d.collection
JOIN content ON content.hash = d.hash
WHERE 'qmd://' || d.collection || '/' || d.path IN (${placeholders}) AND d.active = 1
`).all(...virtualPaths) as DbDocRow[];
} }
const results: MultiGetResult[] = []; const results: MultiGetResult[] = [];
@ -1958,7 +2062,7 @@ export function findDocuments(
title: row.title || row.display_path.split('/').pop() || row.display_path, title: row.title || row.display_path.split('/').pop() || row.display_path,
context, context,
hash: row.hash, hash: row.hash,
collectionId: row.collection_id, collectionName: row.collection,
modifiedAt: row.modified_at, modifiedAt: row.modified_at,
bodyLength: row.body_length, bodyLength: row.body_length,
...(options.includeBody && row.body !== undefined && { body: row.body }), ...(options.includeBody && row.body !== undefined && { body: row.body }),
@ -2039,7 +2143,7 @@ export function getStatus(db: Database): IndexStatus {
COUNT(d.id) as active_count, COUNT(d.id) as active_count,
MAX(d.modified_at) as last_doc_update MAX(d.modified_at) as last_doc_update
FROM collections c FROM collections c
LEFT JOIN documents d ON d.collection_id = c.id AND d.active = 1 LEFT JOIN documents d ON d.collection = c.name AND d.active = 1
GROUP BY c.id GROUP BY c.id
ORDER BY last_doc_update DESC ORDER BY last_doc_update DESC
`).all() as { id: number; pwd: string; glob_pattern: string; created_at: string; active_count: number; last_doc_update: string | null }[]; `).all() as { id: number; pwd: string; glob_pattern: string; created_at: string; active_count: number; last_doc_update: string | null }[];