From 99aee7190387483079358cf50b4cd152e607c2c3 Mon Sep 17 00:00:00 2001 From: Tobi Lutke Date: Fri, 12 Dec 2025 15:47:42 -0500 Subject: [PATCH] Update get and multi-get commands for virtual paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Update getDocument() to support qmd:// virtual paths and filesystem paths - Update multiGet() to handle virtual paths in patterns and comma-separated lists - Update matchFilesByGlob() in store.ts to return virtual paths - Remove duplicate getContextForFile() function from qmd.ts - Use collection-scoped getContextForPath() instead of legacy function - All get and multi-get tests now passing Closes qmd-vro 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- .beads/.gitignore | 29 ++ .beads/.local_version | 1 + .beads/README.md | 81 +++++ .beads/config.yaml | 62 ++++ .beads/issues.jsonl | 7 +- .gitattributes | 3 + CLAUDE.md | 46 ++- archive | 1 + cli.test.ts | 99 ++++++ qmd.ts | 715 ++++++++++++++++++++++++++++++++---------- store.ts | 537 +++++++++++++++++++++++++++---- texts | 1 + 12 files changed, 1340 insertions(+), 242 deletions(-) create mode 100644 .beads/.gitignore create mode 100644 .beads/.local_version create mode 100644 .beads/README.md create mode 100644 .beads/config.yaml create mode 100644 .gitattributes create mode 120000 archive create mode 120000 texts diff --git a/.beads/.gitignore b/.beads/.gitignore new file mode 100644 index 0000000..f438450 --- /dev/null +++ b/.beads/.gitignore @@ -0,0 +1,29 @@ +# SQLite databases +*.db +*.db?* +*.db-journal +*.db-wal +*.db-shm + +# Daemon runtime files +daemon.lock +daemon.log +daemon.pid +bd.sock + +# Legacy database files +db.sqlite +bd.db + +# Merge artifacts (temporary files from 3-way merge) +beads.base.jsonl +beads.base.meta.json +beads.left.jsonl +beads.left.meta.json +beads.right.jsonl +beads.right.meta.json + +# Keep JSONL exports and config (source of truth for git) +!issues.jsonl +!metadata.json +!config.json diff --git a/.beads/.local_version b/.beads/.local_version new file mode 100644 index 0000000..ae6dd4e --- /dev/null +++ b/.beads/.local_version @@ -0,0 +1 @@ +0.29.0 diff --git a/.beads/README.md b/.beads/README.md new file mode 100644 index 0000000..50f281f --- /dev/null +++ b/.beads/README.md @@ -0,0 +1,81 @@ +# Beads - AI-Native Issue Tracking + +Welcome to Beads! This repository uses **Beads** for issue tracking - a modern, AI-native tool designed to live directly in your codebase alongside your code. + +## What is Beads? + +Beads is issue tracking that lives in your repo, making it perfect for AI coding agents and developers who want their issues close to their code. No web UI required - everything works through the CLI and integrates seamlessly with git. + +**Learn more:** [github.com/steveyegge/beads](https://github.com/steveyegge/beads) + +## Quick Start + +### Essential Commands + +```bash +# Create new issues +bd create "Add user authentication" + +# View all issues +bd list + +# View issue details +bd show + +# Update issue status +bd update --status in_progress +bd update --status done + +# Sync with git remote +bd sync +``` + +### Working with Issues + +Issues in Beads are: +- **Git-native**: Stored in `.beads/issues.jsonl` and synced like code +- **AI-friendly**: CLI-first design works perfectly with AI coding agents +- **Branch-aware**: Issues can follow your branch workflow +- **Always in sync**: Auto-syncs with your commits + +## Why Beads? + +✨ **AI-Native Design** +- Built specifically for AI-assisted development workflows +- CLI-first interface works seamlessly with AI coding agents +- No context switching to web UIs + +🚀 **Developer Focused** +- Issues live in your repo, right next to your code +- Works offline, syncs when you push +- Fast, lightweight, and stays out of your way + +🔧 **Git Integration** +- Automatic sync with git commits +- Branch-aware issue tracking +- Intelligent JSONL merge resolution + +## Get Started with Beads + +Try Beads in your own projects: + +```bash +# Install Beads +curl -sSL https://raw.githubusercontent.com/steveyegge/beads/main/scripts/install.sh | bash + +# Initialize in your repo +bd init + +# Create your first issue +bd create "Try out Beads" +``` + +## Learn More + +- **Documentation**: [github.com/steveyegge/beads/docs](https://github.com/steveyegge/beads/tree/main/docs) +- **Quick Start Guide**: Run `bd quickstart` +- **Examples**: [github.com/steveyegge/beads/examples](https://github.com/steveyegge/beads/tree/main/examples) + +--- + +*Beads: Issue tracking that moves at the speed of thought* ⚡ diff --git a/.beads/config.yaml b/.beads/config.yaml new file mode 100644 index 0000000..f242785 --- /dev/null +++ b/.beads/config.yaml @@ -0,0 +1,62 @@ +# Beads Configuration File +# This file configures default behavior for all bd commands in this repository +# All settings can also be set via environment variables (BD_* prefix) +# or overridden with command-line flags + +# Issue prefix for this repository (used by bd init) +# If not set, bd init will auto-detect from directory name +# Example: issue-prefix: "myproject" creates issues like "myproject-1", "myproject-2", etc. +# issue-prefix: "" + +# Use no-db mode: load from JSONL, no SQLite, write back after each command +# When true, bd will use .beads/issues.jsonl as the source of truth +# instead of SQLite database +# no-db: false + +# Disable daemon for RPC communication (forces direct database access) +# no-daemon: false + +# Disable auto-flush of database to JSONL after mutations +# no-auto-flush: false + +# Disable auto-import from JSONL when it's newer than database +# no-auto-import: false + +# Enable JSON output by default +# json: false + +# Default actor for audit trails (overridden by BD_ACTOR or --actor) +# actor: "" + +# Path to database (overridden by BEADS_DB or --db) +# db: "" + +# Auto-start daemon if not running (can also use BEADS_AUTO_START_DAEMON) +# auto-start-daemon: true + +# Debounce interval for auto-flush (can also use BEADS_FLUSH_DEBOUNCE) +# flush-debounce: "5s" + +# Git branch for beads commits (bd sync will commit to this branch) +# IMPORTANT: Set this for team projects so all clones use the same sync branch. +# This setting persists across clones (unlike database config which is gitignored). +# Can also use BEADS_SYNC_BRANCH env var for local override. +# If not set, bd sync will require you to run 'bd config set sync.branch '. +# sync-branch: "beads-sync" + +# Multi-repo configuration (experimental - bd-307) +# Allows hydrating from multiple repositories and routing writes to the correct JSONL +# repos: +# primary: "." # Primary repo (where this database lives) +# additional: # Additional repos to hydrate from (read-only) +# - ~/beads-planning # Personal planning repo +# - ~/work-planning # Work planning repo + +# Integration settings (access with 'bd config get/set') +# These are stored in the database, not in this file: +# - jira.url +# - jira.project +# - linear.url +# - linear.api-key +# - github.org +# - github.repo diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 703a230..88f7bf3 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -1,12 +1,13 @@ {"id":"qmd-4ru","title":"Update document retrieval for new schema","description":"Functions like getDocument, findDocument, getMultipleDocuments need to work with new schema (path instead of filepath, content joins, virtual paths).","status":"in_progress","priority":0,"issue_type":"task","created_at":"2025-12-12T15:29:53.911881-05:00","updated_at":"2025-12-12T15:30:10.835834-05:00","dependencies":[{"issue_id":"qmd-4ru","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.912607-05:00","created_by":"daemon"}]} -{"id":"qmd-ama","title":"Refactor database system","description":"All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection, path, hash,\n┃ created_at, updated_at. (collection,path)\n┃\n┃\n\n┃ All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection_id, path, hash,\n┃ created_at, updated_at. (collection,path) is unique. There is also collection which stores PWD\n┃ + glob pattern, name (\\w+). Every document is treated as path qmd://collection.name/","notes":"## Completed\n- ✅ Implemented content-addressable storage (content table with hash→doc mapping)\n- ✅ Refactored documents table as file system layer (collection_id, path, hash)\n- ✅ Added collection names (e.g., \"pages\", \"journals\", \"archive\")\n- ✅ Implemented virtual paths (qmd://collection-name/path/to/file.md)\n- ✅ Added hierarchical context support (collection-scoped)\n- ✅ Successfully migrated existing database\n- ✅ Updated search functions to work with new schema\n- ✅ Updated indexing logic to use content-addressable storage\n- ✅ Orphaned content hash cleanup\n\n## Still TODO\n- Fix migration SQL to properly extract basename (currently needs manual fix)\n- Implement `qmd collection add . --name \u003cname\u003e --mask '**/*.md'`\n- Implement `qmd ls [path]` for exploring virtual file tree","status":"in_progress","priority":2,"issue_type":"task","created_at":"2025-12-10T10:57:35.497489-05:00","updated_at":"2025-12-12T15:26:27.345436-05:00"} +{"id":"qmd-ama","title":"Refactor database system","description":"All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection, path, hash,\n┃ created_at, updated_at. (collection,path)\n┃\n┃\n\n┃ All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection_id, path, hash,\n┃ created_at, updated_at. (collection,path) is unique. There is also collection which stores PWD\n┃ + glob pattern, name (\\w+). Every document is treated as path qmd://collection.name/","notes":"## Completed\n- ✅ Implemented content-addressable storage (content table with hash→doc mapping)\n- ✅ Refactored documents table as file system layer (collection_id, path, hash)\n- ✅ Added collection names (e.g., \"pages\", \"journals\", \"archive\")\n- ✅ Implemented virtual paths (qmd://collection-name/path/to/file.md)\n- ✅ Added hierarchical context support (collection-scoped)\n- ✅ Successfully migrated existing database\n- ✅ Updated search functions to work with new schema\n- ✅ Updated indexing logic to use content-addressable storage\n- ✅ Orphaned content hash cleanup\n\n## Still TODO\n- Fix migration SQL to properly extract basename (currently needs manual fix)\n- Implement `qmd collection add . --name \u003cname\u003e --mask '**/*.md'`\n- Implement `qmd ls [path]` for exploring virtual file tree","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-10T10:57:35.497489-05:00","updated_at":"2025-12-12T15:39:48.879143-05:00","closed_at":"2025-12-12T15:39:48.879143-05:00"} {"id":"qmd-bx1","title":"Fix migration SQL for proper basename extraction","description":"The migration currently generates collection names incorrectly (uses full path instead of basename). Need to fix the SQL in migrateToContentAddressable to properly extract the directory basename.","status":"open","priority":1,"issue_type":"bug","created_at":"2025-12-12T15:29:53.757723-05:00","updated_at":"2025-12-12T15:29:53.757723-05:00","dependencies":[{"issue_id":"qmd-bx1","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.758524-05:00","created_by":"daemon"}]} {"id":"qmd-c0m","title":"Comprehensive CLI review and consistency pass","description":"Review entire CLI command structure:\n- Consistent naming (add vs create, remove vs delete)\n- Consistent flag usage (--name, --mask, etc)\n- Update help text for all commands\n- Ensure virtual paths work everywhere\n- Test all commands end-to-end","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-12T15:29:38.083564-05:00","updated_at":"2025-12-12T15:29:38.083564-05:00"} {"id":"qmd-deh","title":"Refactor database introduce qmd collection *","description":"","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-10T10:56:04.516137-05:00","updated_at":"2025-12-10T10:56:04.516137-05:00"} {"id":"qmd-dmi","title":"Implement 'qmd collection' commands","description":"Add explicit collection management:\n- qmd collection add . --name \u003cname\u003e --mask '**/*.md'\n- qmd collection list\n- qmd collection remove \u003cname\u003e\n\nThis gives users control over collection names and patterns.","status":"open","priority":1,"issue_type":"feature","created_at":"2025-12-12T15:29:53.810666-05:00","updated_at":"2025-12-12T15:29:53.810666-05:00","dependencies":[{"issue_id":"qmd-dmi","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.811294-05:00","created_by":"daemon"}]} {"id":"qmd-e2c","title":"Implement 'qmd ls' command","description":"Add command to explore virtual file tree:\n- qmd ls → list all collections\n- qmd ls \u003ccollection\u003e → list files in collection\n- qmd ls \u003ccollection\u003e/\u003cpath\u003e → list files under path\nOutput: flat list of qmd:// paths","status":"open","priority":1,"issue_type":"feature","created_at":"2025-12-12T15:29:53.859804-05:00","updated_at":"2025-12-12T15:29:53.859804-05:00","dependencies":[{"issue_id":"qmd-e2c","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.860535-05:00","created_by":"daemon"}]} +{"id":"qmd-j9z","title":"Add unit tests for content addressable hashes","description":"add same file from multiple places and verify that they both point at same hash. drop one collection and the content stays.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-12T15:39:15.459504-05:00","updated_at":"2025-12-12T15:39:15.459504-05:00"} {"id":"qmd-p1h","title":"Create collection add|remove","description":"","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-10T10:57:00.717864-05:00","updated_at":"2025-12-10T10:57:00.717864-05:00"} {"id":"qmd-rhd","title":"Fix 'qmd status' output for new schema","description":"Update status to show collections by name, cleaner context display, virtual path examples.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-12T15:29:54.020596-05:00","updated_at":"2025-12-12T15:29:54.020596-05:00","dependencies":[{"issue_id":"qmd-rhd","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:54.021095-05:00","created_by":"daemon"}]} -{"id":"qmd-s1y","title":"Update 'qmd add-context' for collection scoping","description":"Update add-context to work with collection-scoped contexts using new path_contexts schema.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-12T15:29:54.076582-05:00","updated_at":"2025-12-12T15:29:54.076582-05:00"} -{"id":"qmd-vro","title":"Update 'qmd get' to support virtual paths","description":"Allow qmd get to accept both virtual paths (qmd://journals/...) and filesystem paths, plus fuzzy matching by filename.","status":"open","priority":0,"issue_type":"task","created_at":"2025-12-12T15:29:53.963113-05:00","updated_at":"2025-12-12T15:29:53.963113-05:00","dependencies":[{"issue_id":"qmd-vro","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.963641-05:00","created_by":"daemon"}]} +{"id":"qmd-s1y","title":"Update 'qmd add-context' for collection scoping","description":"Update add-context to work with collection-scoped contexts using new path_contexts schema.","notes":"Refactoring to:\n- qmd context add [path] \"text\" (defaults to current collection if in one)\n- qmd context list\n- qmd context rm \u003cpath\u003e\n- Support \"/\" for global/system context\n- Auto-detect collection from pwd","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-12T15:29:54.076582-05:00","updated_at":"2025-12-12T15:37:47.683263-05:00","closed_at":"2025-12-12T15:37:47.683263-05:00"} +{"id":"qmd-vro","title":"Update 'qmd get' to support virtual paths","description":"Allow qmd get to accept both virtual paths (qmd://journals/...) and filesystem paths, plus fuzzy matching by filename.","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-12T15:29:53.963113-05:00","updated_at":"2025-12-12T15:47:29.178955-05:00","closed_at":"2025-12-12T15:47:29.178955-05:00","dependencies":[{"issue_id":"qmd-vro","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.963641-05:00","created_by":"daemon"}]} {"id":"qmd-x19","title":"Update 'qmd add-context' for collection-scoped contexts","description":"Update add-context to work with collections:\n- qmd add-context \u003ccollection\u003e/\u003cpath\u003e \"context description\"\n- Support both virtual and filesystem paths\n- Update to use new path_contexts schema","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-12T15:29:38.142575-05:00","updated_at":"2025-12-12T15:29:38.142575-05:00"} diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..807d598 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ + +# Use bd merge for beads JSONL files +.beads/issues.jsonl merge=beads diff --git a/CLAUDE.md b/CLAUDE.md index d54e928..37a7ee5 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,19 +1,47 @@ # QMD - Quick Markdown Search +**Note**: This project uses [bd (beads)](https://github.com/steveyegge/beads) for issue tracking. Use `bd` commands instead of markdown TODOs. See AGENTS.md for workflow details. + Use Bun instead of Node.js (`bun` not `node`, `bun install` not `npm install`). ## Commands ```sh -qmd add . # Index markdown files in current directory -qmd status # Show index status and collections -qmd update-all # Re-index all collections -qmd embed # Generate vector embeddings (requires Ollama) -qmd search # BM25 full-text search -qmd vsearch # Vector similarity search -qmd query # Hybrid search with reranking (best quality) -qmd get # Get document content (fuzzy matches if not found) -qmd multi-get # Get multiple docs by glob or comma-separated list +qmd add . # Index markdown files in current directory +qmd context add [path] "text" # Add context for path (defaults to current dir) +qmd context list # List all contexts +qmd context rm # Remove context +qmd status # Show index status and collections +qmd update # Re-index all collections +qmd embed # Generate vector embeddings (requires Ollama) +qmd search # BM25 full-text search +qmd vsearch # Vector similarity search +qmd query # Hybrid search with reranking (best quality) +qmd get # Get document content (fuzzy matches if not found) +qmd multi-get # Get multiple docs by glob or comma-separated list +``` + +## Context Management + +```sh +# Add context to current directory (auto-detects collection) +qmd context add "Description of these files" + +# Add context to a specific path +qmd context add /subfolder "Description for subfolder" + +# Add global context to all collections (system message) +qmd context add / "Always include this context" + +# Add context using virtual paths +qmd context add qmd://journals/2024 "Journal entries from 2024" + +# List all contexts +qmd context list + +# Remove context +qmd context rm qmd://journals/2024 +qmd context rm / # Remove global context ``` ## Options diff --git a/archive b/archive new file mode 120000 index 0000000..878d6bc --- /dev/null +++ b/archive @@ -0,0 +1 @@ +/Users/tobi/src/github.com/Shopify/archive/obsidian/archive/Context \ No newline at end of file diff --git a/cli.test.ts b/cli.test.ts index e9fd93c..7d48c02 100644 --- a/cli.test.ts +++ b/cli.test.ts @@ -415,3 +415,102 @@ describe("CLI Search with Collection Filter", () => { expect(stdout.toLowerCase()).toContain("meeting"); }); }); + +describe("CLI Context Management", () => { + let localDbPath: string; + + beforeEach(async () => { + // Use a fresh database for this test suite + localDbPath = getFreshDbPath(); + // Index some files first + await runQmd(["add", "."], { dbPath: localDbPath }); + }); + + test("add global context with /", async () => { + const { stdout, exitCode } = await runQmd([ + "context", + "add", + "/", + "Global system context", + ], { dbPath: localDbPath }); + expect(exitCode).toBe(0); + expect(stdout).toContain("✓ Added global context"); + expect(stdout).toContain("Global system context"); + }); + + test("list contexts", async () => { + // Add a global context first + await runQmd([ + "context", + "add", + "/", + "Test context", + ], { dbPath: localDbPath }); + + const { stdout, exitCode } = await runQmd([ + "context", + "list", + ], { dbPath: localDbPath }); + expect(exitCode).toBe(0); + expect(stdout).toContain("Configured Contexts"); + expect(stdout).toContain("Test context"); + }); + + test("add context to virtual path", async () => { + // Collection name should be "fixtures" (basename of the fixtures directory) + const { stdout, exitCode } = await runQmd([ + "context", + "add", + "qmd://fixtures/notes", + "Context for notes subdirectory", + ], { dbPath: localDbPath }); + expect(exitCode).toBe(0); + expect(stdout).toContain("✓ Added context for: qmd://fixtures/notes"); + }); + + test("remove global context", async () => { + // Add a global context first + await runQmd([ + "context", + "add", + "/", + "Global context to remove", + ], { dbPath: localDbPath }); + + const { stdout, exitCode } = await runQmd([ + "context", + "rm", + "/", + ], { dbPath: localDbPath }); + expect(exitCode).toBe(0); + expect(stdout).toContain("✓ Removed"); + }); + + test("remove virtual path context", async () => { + // Add a context first + await runQmd([ + "context", + "add", + "qmd://fixtures/notes", + "Context to remove", + ], { dbPath: localDbPath }); + + const { stdout, exitCode } = await runQmd([ + "context", + "rm", + "qmd://fixtures/notes", + ], { dbPath: localDbPath }); + expect(exitCode).toBe(0); + expect(stdout).toContain("✓ Removed context for: qmd://fixtures/notes"); + }); + + test("fails to remove non-existent context", async () => { + const { stdout, stderr, exitCode } = await runQmd([ + "context", + "rm", + "qmd://nonexistent/path", + ], { dbPath: localDbPath }); + expect(exitCode).toBe(1); + expect(stderr || stdout).toContain("not found"); + }); +}); diff --git a/qmd.ts b/qmd.ts index 2725b72..393586e 100755 --- a/qmd.ts +++ b/qmd.ts @@ -17,7 +17,9 @@ import { reciprocalRankFusion, extractSnippet, getContextForFile, + getContextForPath, getCollectionIdByName, + getCollectionByName, findSimilarFiles, matchFilesByGlob, getHashesNeedingEmbedding, @@ -35,6 +37,11 @@ import { getCachedResult, setCachedResult, getIndexHealth, + parseVirtualPath, + buildVirtualPath, + isVirtualPath, + resolveVirtualPath, + toVirtualPath, OLLAMA_URL, DEFAULT_EMBED_MODEL, DEFAULT_QUERY_MODEL, @@ -338,13 +345,35 @@ async function rerank(query: string, documents: { file: string; text: string }[] return results.sort((a, b) => b.score - a.score); } -function getOrCreateCollection(db: Database, pwd: string, globPattern: string): number { +function getOrCreateCollection(db: Database, pwd: string, globPattern: string, name?: string): number { const now = new Date().toISOString(); - // Use INSERT OR IGNORE to handle race conditions, then SELECT - db.prepare(`INSERT OR IGNORE INTO collections (pwd, glob_pattern, created_at) VALUES (?, ?, ?)`).run(pwd, globPattern, now); - const existing = db.prepare(`SELECT id FROM collections WHERE pwd = ? AND glob_pattern = ?`).get(pwd, globPattern) as { id: number }; - return existing.id; + // Generate collection name from pwd basename if not provided + if (!name) { + const parts = pwd.split('/').filter(Boolean); + name = parts[parts.length - 1] || 'root'; + } + + // Check if collection with this pwd+glob already exists + const existing = db.prepare(`SELECT id FROM collections WHERE pwd = ? AND glob_pattern = ?`).get(pwd, globPattern) as { id: number } | null; + if (existing) return existing.id; + + // Try to insert with generated name + try { + const result = db.prepare(`INSERT INTO collections (name, pwd, glob_pattern, created_at, updated_at) VALUES (?, ?, ?, ?, ?)`).run(name, pwd, globPattern, now, now); + return result.lastInsertRowid as number; + } catch (e) { + // Name collision - append a unique suffix + const allCollections = db.prepare(`SELECT name FROM collections WHERE name LIKE ?`).all(`${name}%`) as { name: string }[]; + let suffix = 2; + let uniqueName = `${name}-${suffix}`; + while (allCollections.some(c => c.name === uniqueName)) { + suffix++; + uniqueName = `${name}-${suffix}`; + } + const result = db.prepare(`INSERT INTO collections (name, pwd, glob_pattern, created_at, updated_at) VALUES (?, ?, ?, ?, ?)`).run(uniqueName, pwd, globPattern, now, now); + return result.lastInsertRowid as number; + } } function cleanupDuplicateCollections(db: Database): void { @@ -521,82 +550,320 @@ async function updateCollections(): Promise { console.log(`${c.green}✓ All collections updated.${c.reset}`); } -async function addContext(pathArg: string, contextText: string): Promise { +/** + * Detect which collection (if any) contains the given filesystem path. + * Returns { collectionId, collectionName, relativePath } or null if not in any collection. + */ +function detectCollectionFromPath(db: Database, fsPath: string): { collectionId: number; collectionName: string; relativePath: string } | null { + const realPath = getRealPath(fsPath); + + // Find collections that this path is under + const collections = db.prepare(` + SELECT id, name, pwd + FROM collections + WHERE ? LIKE pwd || '/%' OR ? = pwd + ORDER BY LENGTH(pwd) DESC + LIMIT 1 + `).get(realPath, realPath) as { id: number; name: string; pwd: string } | null; + + if (!collections) return null; + + // Calculate relative path + let relativePath = realPath; + if (relativePath.startsWith(collections.pwd + '/')) { + relativePath = relativePath.slice(collections.pwd.length + 1); + } else if (relativePath === collections.pwd) { + relativePath = ''; + } + + return { + collectionId: collections.id, + collectionName: collections.name, + relativePath + }; +} + +async function contextAdd(pathArg: string | undefined, contextText: string): Promise { const db = getDb(); const now = new Date().toISOString(); - // Resolve path - could be relative, absolute, or use ~ - let pathPrefix = pathArg; - if (pathPrefix === '.' || pathPrefix === './') { - pathPrefix = getPwd(); - } else if (pathPrefix.startsWith('~/')) { - pathPrefix = homedir() + pathPrefix.slice(1); - } else if (!pathPrefix.startsWith('/')) { - pathPrefix = resolve(getPwd(), pathPrefix); + // Handle "/" as global/root context (applies to all collections) + if (pathArg === '/') { + // Find all collections and add context to each + const collections = db.prepare(`SELECT id, name FROM collections`).all() as { id: number; name: string }[]; + for (const coll of collections) { + db.prepare(` + INSERT INTO path_contexts (collection_id, path_prefix, context, created_at) + VALUES (?, '', ?, ?) + ON CONFLICT(collection_id, path_prefix) DO UPDATE SET context = excluded.context + `).run(coll.id, contextText, now); + } + console.log(`${c.green}✓${c.reset} Added global context to ${collections.length} collection(s)`); + console.log(`${c.dim}Context: ${contextText}${c.reset}`); + closeDb(); + return; } - // Get realpath and normalize: remove trailing slash - pathPrefix = getRealPath(pathPrefix).replace(/\/$/, ''); + // Resolve path - defaults to current directory if not provided + let fsPath = pathArg || '.'; + if (fsPath === '.' || fsPath === './') { + fsPath = getPwd(); + } else if (fsPath.startsWith('~/')) { + fsPath = homedir() + fsPath.slice(1); + } else if (!fsPath.startsWith('/') && !fsPath.startsWith('qmd://')) { + fsPath = resolve(getPwd(), fsPath); + } - // Insert or update - db.prepare(`INSERT INTO path_contexts (path_prefix, context, created_at) VALUES (?, ?, ?) - ON CONFLICT(path_prefix) DO UPDATE SET context = excluded.context`).run(pathPrefix, contextText, now); + // Handle virtual paths (qmd://collection/path) + if (isVirtualPath(fsPath)) { + const parsed = parseVirtualPath(fsPath); + if (!parsed) { + console.error(`${c.yellow}Invalid virtual path: ${fsPath}${c.reset}`); + process.exit(1); + } - console.log(`${c.green}✓${c.reset} Added context for: ${shortPath(pathPrefix)}`); + const coll = getCollectionByName(db, parsed.collectionName); + if (!coll) { + console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`); + process.exit(1); + } + + db.prepare(` + INSERT INTO path_contexts (collection_id, path_prefix, context, created_at) + VALUES (?, ?, ?, ?) + ON CONFLICT(collection_id, path_prefix) DO UPDATE SET context = excluded.context + `).run(coll.id, parsed.path, contextText, now); + + console.log(`${c.green}✓${c.reset} Added context for: qmd://${parsed.collectionName}/${parsed.path || ''}`); + console.log(`${c.dim}Context: ${contextText}${c.reset}`); + closeDb(); + return; + } + + // Detect collection from filesystem path + const detected = detectCollectionFromPath(db, fsPath); + if (!detected) { + console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`); + console.error(`${c.dim}Run 'qmd status' to see indexed collections${c.reset}`); + process.exit(1); + } + + db.prepare(` + INSERT INTO path_contexts (collection_id, path_prefix, context, created_at) + VALUES (?, ?, ?, ?) + ON CONFLICT(collection_id, path_prefix) DO UPDATE SET context = excluded.context + `).run(detected.collectionId, detected.relativePath, contextText, now); + + const displayPath = detected.relativePath ? `qmd://${detected.collectionName}/${detected.relativePath}` : `qmd://${detected.collectionName}/`; + console.log(`${c.green}✓${c.reset} Added context for: ${displayPath}`); console.log(`${c.dim}Context: ${contextText}${c.reset}`); closeDb(); } +function contextList(): void { + const db = getDb(); + + const contexts = db.prepare(` + SELECT c.name as collection_name, pc.path_prefix, pc.context + FROM path_contexts pc + JOIN collections c ON c.id = pc.collection_id + ORDER BY c.name, LENGTH(pc.path_prefix) DESC, pc.path_prefix + `).all() as { collection_name: string; path_prefix: string; context: string }[]; + + if (contexts.length === 0) { + console.log(`${c.dim}No contexts configured. Use 'qmd context add' to add one.${c.reset}`); + closeDb(); + return; + } + + console.log(`\n${c.bold}Configured Contexts${c.reset}\n`); + + let lastCollection = ''; + for (const ctx of contexts) { + if (ctx.collection_name !== lastCollection) { + console.log(`${c.cyan}${ctx.collection_name}${c.reset}`); + lastCollection = ctx.collection_name; + } + + const path = ctx.path_prefix || '/'; + const displayPath = ctx.path_prefix ? ` ${path}` : ' / (root)'; + console.log(`${displayPath}`); + console.log(` ${c.dim}${ctx.context}${c.reset}`); + } + + closeDb(); +} + +function contextRemove(pathArg: string): void { + const db = getDb(); + + if (pathArg === '/') { + // Remove all root contexts + const result = db.prepare(`DELETE FROM path_contexts WHERE path_prefix = ''`).run(); + console.log(`${c.green}✓${c.reset} Removed ${result.changes} global context(s)`); + closeDb(); + return; + } + + // Handle virtual paths + if (isVirtualPath(pathArg)) { + const parsed = parseVirtualPath(pathArg); + if (!parsed) { + console.error(`${c.yellow}Invalid virtual path: ${pathArg}${c.reset}`); + process.exit(1); + } + + const coll = getCollectionByName(db, parsed.collectionName); + if (!coll) { + console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`); + process.exit(1); + } + + const result = db.prepare(` + DELETE FROM path_contexts + WHERE collection_id = ? AND path_prefix = ? + `).run(coll.id, parsed.path); + + if (result.changes === 0) { + console.error(`${c.yellow}No context found for: ${pathArg}${c.reset}`); + process.exit(1); + } + + console.log(`${c.green}✓${c.reset} Removed context for: ${pathArg}`); + closeDb(); + return; + } + + // Handle filesystem paths + let fsPath = pathArg; + if (fsPath === '.' || fsPath === './') { + fsPath = getPwd(); + } else if (fsPath.startsWith('~/')) { + fsPath = homedir() + fsPath.slice(1); + } else if (!fsPath.startsWith('/')) { + fsPath = resolve(getPwd(), fsPath); + } + + const detected = detectCollectionFromPath(db, fsPath); + if (!detected) { + console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`); + process.exit(1); + } + + const result = db.prepare(` + DELETE FROM path_contexts + WHERE collection_id = ? AND path_prefix = ? + `).run(detected.collectionId, detected.relativePath); + + if (result.changes === 0) { + console.error(`${c.yellow}No context found for: qmd://${detected.collectionName}/${detected.relativePath}${c.reset}`); + process.exit(1); + } + + console.log(`${c.green}✓${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`); + closeDb(); +} + function getDocument(filename: string, fromLine?: number, maxLines?: number): void { const db = getDb(); // Parse :linenum suffix from filename (e.g., "file.md:100") - let filepath = filename; - const colonMatch = filepath.match(/:(\d+)$/); + let inputPath = filename; + const colonMatch = inputPath.match(/:(\d+)$/); if (colonMatch && !fromLine) { fromLine = parseInt(colonMatch[1], 10); - filepath = filepath.slice(0, -colonMatch[0].length); + inputPath = inputPath.slice(0, -colonMatch[0].length); } - // Expand ~ to home directory - if (filepath.startsWith('~/')) { - filepath = homedir() + filepath.slice(1); - } + let doc: { collectionId: number; collectionName: string; path: string; body: string } | null = null; + let virtualPath: string; - // Try exact match on filepath first - let doc = db.prepare(`SELECT filepath, body FROM documents WHERE filepath = ? AND active = 1`).get(filepath) as { filepath: string; body: string } | null; - - // Try exact match on display_path - if (!doc) { - doc = db.prepare(`SELECT filepath, body FROM documents WHERE display_path = ? AND active = 1`).get(filepath) as { filepath: string; body: string } | null; - } - - // Try matching by filename ending (allows partial paths) - if (!doc) { - doc = db.prepare(`SELECT filepath, body FROM documents WHERE filepath LIKE ? AND active = 1 LIMIT 1`).get(`%${filepath}`) as { filepath: string; body: string } | null; - } - - // Try matching by display_path ending - if (!doc) { - doc = db.prepare(`SELECT filepath, body FROM documents WHERE display_path LIKE ? AND active = 1 LIMIT 1`).get(`%${filepath}`) as { filepath: string; body: string } | null; - } - - if (!doc) { - // Suggest similar files using Levenshtein distance - const similar = findSimilarFiles(db, filepath, 5, 5); - console.error(`Document not found: ${filename}`); - if (similar.length > 0) { - console.error(`\nDid you mean one of these?`); - for (const s of similar) { - console.error(` ${s}`); - } + // Handle virtual paths (qmd://collection/path) + if (isVirtualPath(inputPath)) { + const parsed = parseVirtualPath(inputPath); + if (!parsed) { + console.error(`Invalid virtual path: ${inputPath}`); + closeDb(); + process.exit(1); } + + // Try exact match on collection + path + doc = db.prepare(` + SELECT c.id as collectionId, c.name as collectionName, d.path, content.doc as body + FROM documents d + JOIN collections c ON c.id = d.collection_id + JOIN content ON content.hash = d.hash + WHERE c.name = ? AND d.path = ? AND d.active = 1 + `).get(parsed.collectionName, parsed.path) as typeof doc; + + if (!doc) { + // Try fuzzy match by path ending + doc = db.prepare(` + SELECT c.id as collectionId, c.name as collectionName, d.path, content.doc as body + FROM documents d + JOIN collections c ON c.id = d.collection_id + JOIN content ON content.hash = d.hash + WHERE c.name = ? AND d.path LIKE ? AND d.active = 1 + LIMIT 1 + `).get(parsed.collectionName, `%${parsed.path}`) as typeof doc; + } + + virtualPath = inputPath; + } else { + // Handle filesystem paths + let fsPath = inputPath; + + // Expand ~ to home directory + if (fsPath.startsWith('~/')) { + fsPath = homedir() + fsPath.slice(1); + } else if (!fsPath.startsWith('/')) { + // Relative path - resolve from current directory + fsPath = resolve(getPwd(), fsPath); + } + fsPath = getRealPath(fsPath); + + // Try to detect which collection contains this path + const detected = detectCollectionFromPath(db, fsPath); + + if (detected) { + // Found collection - query by collection_id + relative path + doc = db.prepare(` + SELECT c.id as collectionId, c.name as collectionName, d.path, content.doc as body + FROM documents d + JOIN collections c ON c.id = d.collection_id + JOIN content ON content.hash = d.hash + WHERE c.id = ? AND d.path = ? AND d.active = 1 + `).get(detected.collectionId, detected.relativePath) as typeof doc; + } + + // Fuzzy match by filename (last component of path) + if (!doc) { + const filename = inputPath.split('/').pop() || inputPath; + doc = db.prepare(` + SELECT c.id as collectionId, c.name as collectionName, d.path, content.doc as body + FROM documents d + JOIN collections c ON c.id = d.collection_id + JOIN content ON content.hash = d.hash + WHERE d.path LIKE ? AND d.active = 1 + LIMIT 1 + `).get(`%${filename}`) as typeof doc; + } + + if (doc) { + virtualPath = buildVirtualPath(doc.collectionName, doc.path); + } else { + virtualPath = inputPath; + } + } + + if (!doc) { + console.error(`Document not found: ${filename}`); closeDb(); process.exit(1); } // Get context for this file - const context = getContextForFile(db, doc.filepath); + const context = getContextForPath(db, doc.collectionId, doc.path); let output = doc.body; @@ -623,33 +890,83 @@ function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT // Check if it's a comma-separated list or a glob pattern const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?'); - let files: { filepath: string; displayPath: string; bodyLength: number }[]; + let files: { filepath: string; displayPath: string; bodyLength: number; collectionId?: number; path?: string }[]; if (isCommaSeparated) { - // Comma-separated list of files + // Comma-separated list of files (can be virtual paths or relative paths) const names = pattern.split(',').map(s => s.trim()).filter(Boolean); files = []; for (const name of names) { - // Try exact match on display_path first - let doc = db.prepare(`SELECT filepath, display_path, LENGTH(body) as body_length FROM documents WHERE display_path = ? AND active = 1`).get(name) as { filepath: string; display_path: string; body_length: number } | null; - // Try suffix match - if (!doc) { - doc = db.prepare(`SELECT filepath, display_path, LENGTH(body) as body_length FROM documents WHERE display_path LIKE ? AND active = 1 LIMIT 1`).get(`%${name}`) as { filepath: string; display_path: string; body_length: number } | null; - } - if (doc) { - files.push({ filepath: doc.filepath, displayPath: doc.display_path, bodyLength: doc.body_length }); - } else { - // Suggest similar files - const similar = findSimilarFiles(db, name, 5, 3); - console.error(`File not found: ${name}`); - if (similar.length > 0) { - console.error(` Did you mean: ${similar.join(', ')}`); + let doc: { virtual_path: string; body_length: number; collection_id: number; path: string } | null = null; + + // Handle virtual paths + if (isVirtualPath(name)) { + const parsed = parseVirtualPath(name); + if (parsed) { + // Try exact match on collection + path + doc = db.prepare(` + SELECT + 'qmd://' || c.name || '/' || d.path as virtual_path, + LENGTH(content.doc) as body_length, + d.collection_id, + d.path + FROM documents d + JOIN collections c ON c.id = d.collection_id + JOIN content ON content.hash = d.hash + WHERE c.name = ? AND d.path = ? AND d.active = 1 + `).get(parsed.collectionName, parsed.path) as typeof doc; } + } else { + // Try exact match on path + doc = db.prepare(` + SELECT + 'qmd://' || c.name || '/' || d.path as virtual_path, + LENGTH(content.doc) as body_length, + d.collection_id, + d.path + FROM documents d + JOIN collections c ON c.id = d.collection_id + JOIN content ON content.hash = d.hash + WHERE d.path = ? AND d.active = 1 + LIMIT 1 + `).get(name) as typeof doc; + + // Try suffix match + if (!doc) { + doc = db.prepare(` + SELECT + 'qmd://' || c.name || '/' || d.path as virtual_path, + LENGTH(content.doc) as body_length, + d.collection_id, + d.path + FROM documents d + JOIN collections c ON c.id = d.collection_id + JOIN content ON content.hash = d.hash + WHERE d.path LIKE ? AND d.active = 1 + LIMIT 1 + `).get(`%${name}`) as typeof doc; + } + } + + if (doc) { + files.push({ + filepath: doc.virtual_path, + displayPath: doc.virtual_path, + bodyLength: doc.body_length, + collectionId: doc.collection_id, + path: doc.path + }); + } else { + console.error(`File not found: ${name}`); } } } else { - // Glob pattern on display_path - files = matchFilesByGlob(db, pattern); + // Glob pattern - matchFilesByGlob now returns virtual paths + files = matchFilesByGlob(db, pattern).map(f => ({ + ...f, + collectionId: undefined, // Will be fetched later if needed + path: undefined + })); if (files.length === 0) { console.error(`No files matched pattern: ${pattern}`); closeDb(); @@ -661,7 +978,23 @@ function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT const results: { file: string; displayPath: string; title: string; body: string; context: string | null; skipped: boolean; skipReason?: string }[] = []; for (const file of files) { - const context = getContextForFile(db, file.filepath); + // Parse virtual path to get collection info if not already available + let collectionId = file.collectionId; + let path = file.path; + + if (!collectionId || !path) { + const parsed = parseVirtualPath(file.displayPath); + if (parsed) { + const coll = getCollectionByName(db, parsed.collectionName); + if (coll) { + collectionId = coll.id; + path = parsed.path; + } + } + } + + // Get context using collection-scoped function + const context = collectionId && path ? getContextForPath(db, collectionId, path) : null; // Check size limit if (file.bodyLength > maxBytes) { @@ -677,7 +1010,18 @@ function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT continue; } - const doc = db.prepare(`SELECT body, title FROM documents WHERE filepath = ? AND active = 1`).get(file.filepath) as { body: string; title: string } | null; + // Fetch document content - use virtual path to query + const parsed = parseVirtualPath(file.displayPath); + if (!parsed) continue; + + const doc = db.prepare(` + SELECT content.doc as body, d.title + FROM documents d + JOIN collections c ON c.id = d.collection_id + JOIN content ON content.hash = d.hash + WHERE c.name = ? AND d.path = ? AND d.active = 1 + `).get(parsed.collectionName, parsed.path) as { body: string; title: string } | null; + if (!doc) continue; let body = doc.body; @@ -781,18 +1125,6 @@ function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT } } -// Get context for a filepath (finds most specific matching path prefix) -function getContextForFile(db: Database, filepath: string): string | null { - // Find all matching prefixes and return the longest (most specific) one - const result = db.prepare(` - SELECT context FROM path_contexts - WHERE ? LIKE path_prefix || '%' - ORDER BY LENGTH(path_prefix) DESC - LIMIT 1 - `).get(filepath) as { context: string } | null; - return result?.context || null; -} - async function dropCollection(globPattern: string): Promise { const db = getDb(); const pwd = getPwd(); @@ -853,34 +1185,28 @@ async function indexFiles(globPattern: string = DEFAULT_GLOB): Promise { return; } - const insertStmt = db.prepare(`INSERT INTO documents (collection_id, name, title, hash, filepath, display_path, body, created_at, modified_at, active) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 1)`); - const deactivateStmt = db.prepare(`UPDATE documents SET active = 0 WHERE collection_id = ? AND filepath = ? AND active = 1`); - const findActiveStmt = db.prepare(`SELECT id, hash, title, display_path FROM documents WHERE collection_id = ? AND filepath = ? AND active = 1`); - const findActiveAnyCollectionStmt = db.prepare(`SELECT id, collection_id, hash, title, display_path FROM documents WHERE filepath = ? AND active = 1`); + // Prepared statements for new schema + const insertContentStmt = db.prepare(`INSERT OR IGNORE INTO content (hash, doc, created_at) VALUES (?, ?, ?)`); + const insertDocStmt = db.prepare(`INSERT INTO documents (collection_id, path, title, hash, created_at, modified_at, active) VALUES (?, ?, ?, ?, ?, ?, 1)`); + const deactivateStmt = db.prepare(`UPDATE documents SET active = 0 WHERE collection_id = ? AND path = ? AND active = 1`); + const findActiveStmt = db.prepare(`SELECT id, hash, title FROM documents WHERE collection_id = ? AND path = ? AND active = 1`); const updateTitleStmt = db.prepare(`UPDATE documents SET title = ?, modified_at = ? WHERE id = ?`); - const updateDisplayPathStmt = db.prepare(`UPDATE documents SET display_path = ? WHERE id = ?`); - - // Collect all existing display_paths for uniqueness check - const existingDisplayPaths = new Set( - (db.prepare(`SELECT display_path FROM documents WHERE active = 1 AND display_path != ''`).all() as { display_path: string }[]) - .map(r => r.display_path) - ); let indexed = 0, updated = 0, unchanged = 0, processed = 0; - const seenFiles = new Set(); + const seenPaths = new Set(); const startTime = Date.now(); for (const relativeFile of files) { const filepath = getRealPath(resolve(pwd, relativeFile)); - seenFiles.add(filepath); + const path = relativeFile; // Use relative path as-is + seenPaths.add(path); const content = await Bun.file(filepath).text(); const hash = await hashContent(content); - const name = relativeFile.replace(/\.md$/, "").split("/").pop() || relativeFile; const title = extractTitle(content, relativeFile); - // First check if file exists in THIS collection - const existing = findActiveStmt.get(collectionId, filepath) as { id: number; hash: string; title: string; display_path: string } | null; + // Check if document exists in this collection with this path + const existing = findActiveStmt.get(collectionId, path) as { id: number; hash: string; title: string } | null; if (existing) { if (existing.hash === hash) { @@ -891,35 +1217,24 @@ async function indexFiles(globPattern: string = DEFAULT_GLOB): Promise { } else { unchanged++; } - // Update display_path if empty - if (!existing.display_path) { - const displayPath = computeDisplayPath(filepath, pwd, existingDisplayPaths); - updateDisplayPathStmt.run(displayPath, existing.id); - existingDisplayPaths.add(displayPath); - } } else { - // Content changed - deactivate old, insert new - existingDisplayPaths.delete(existing.display_path); - deactivateStmt.run(collectionId, filepath); + // Content changed - insert new content hash and update document + insertContentStmt.run(hash, content, now); + deactivateStmt.run(collectionId, path); updated++; const stat = await Bun.file(filepath).stat(); - const displayPath = computeDisplayPath(filepath, pwd, existingDisplayPaths); - insertStmt.run(collectionId, name, title, hash, filepath, displayPath, content, stat ? new Date(stat.birthtime).toISOString() : now, stat ? new Date(stat.mtime).toISOString() : now); - existingDisplayPaths.add(displayPath); + insertDocStmt.run(collectionId, path, title, hash, + stat ? new Date(stat.birthtime).toISOString() : now, + stat ? new Date(stat.mtime).toISOString() : now); } } else { - // Check if file exists in ANY collection (would violate unique constraint) - const existingAnywhere = findActiveAnyCollectionStmt.get(filepath) as { id: number; collection_id: number; hash: string; title: string; display_path: string } | null; - if (existingAnywhere) { - // File already indexed in another collection - skip it - unchanged++; - } else { - indexed++; - const stat = await Bun.file(filepath).stat(); - const displayPath = computeDisplayPath(filepath, pwd, existingDisplayPaths); - insertStmt.run(collectionId, name, title, hash, filepath, displayPath, content, stat ? new Date(stat.birthtime).toISOString() : now, stat ? new Date(stat.mtime).toISOString() : now); - existingDisplayPaths.add(displayPath); - } + // New document - insert content and document + indexed++; + insertContentStmt.run(hash, content, now); + const stat = await Bun.file(filepath).stat(); + insertDocStmt.run(collectionId, path, title, hash, + stat ? new Date(stat.birthtime).toISOString() : now, + stat ? new Date(stat.mtime).toISOString() : now); } processed++; @@ -932,20 +1247,30 @@ async function indexFiles(globPattern: string = DEFAULT_GLOB): Promise { } // Deactivate documents in this collection that no longer exist - const allActive = db.prepare(`SELECT filepath FROM documents WHERE collection_id = ? AND active = 1`).all(collectionId) as { filepath: string }[]; + const allActive = db.prepare(`SELECT path FROM documents WHERE collection_id = ? AND active = 1`).all(collectionId) as { path: string }[]; let removed = 0; for (const row of allActive) { - if (!seenFiles.has(row.filepath)) { - deactivateStmt.run(collectionId, row.filepath); + if (!seenPaths.has(row.path)) { + deactivateStmt.run(collectionId, row.path); removed++; } } + // Clean up orphaned content hashes (content not referenced by any document) + const cleanupResult = db.prepare(` + DELETE FROM content + WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1) + `).run(); + const orphanedContent = cleanupResult.changes; + // Check if vector index needs updating const needsEmbedding = getHashesNeedingEmbedding(db); progress.clear(); console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`); + if (orphanedContent > 0) { + console.log(`Cleaned up ${orphanedContent} orphaned content hash(es)`); + } if (needsEmbedding > 0) { console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`); @@ -1154,40 +1479,10 @@ function getCollectionIdByName(db: Database, name: string): number | null { return result?.id || null; } -function searchFTS(db: Database, query: string, limit: number = 20, collectionId?: number): SearchResult[] { - const ftsQuery = buildFTS5Query(query); - if (!ftsQuery) return []; +// searchFTS and searchVec are now imported from store.ts with updated schema - // BM25 weights: name=10, body=1 (title matches ranked higher) - let sql = ` - SELECT d.filepath, d.display_path, d.title, d.body, bm25(documents_fts, 10.0, 1.0) as score - FROM documents_fts f - JOIN documents d ON d.id = f.rowid - WHERE documents_fts MATCH ? AND d.active = 1 - `; - const params: (string | number)[] = [ftsQuery]; - - if (collectionId !== undefined) { - sql += ` AND d.collection_id = ?`; - params.push(collectionId); - } - - sql += ` ORDER BY score LIMIT ?`; - params.push(limit); - - const stmt = db.prepare(sql); - const results = stmt.all(...params) as { filepath: string; display_path: string; title: string; body: string; score: number }[]; - return results.map(r => ({ - file: r.filepath, - displayPath: r.display_path, - title: r.title, - body: r.body, - score: normalizeBM25(r.score), - source: "fts" as const, - })); -} - -async function searchVec(db: Database, query: string, model: string, limit: number = 20, collectionId?: number): Promise { +// Removed duplicate searchFTS and searchVec functions - using store.ts versions instead +async function REMOVED_searchVec(db: Database, query: string, model: string, limit: number = 20, collectionId?: number): Promise { const tableExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get(); if (!tableExists) return []; @@ -1788,7 +2083,9 @@ function parseCLI() { function showHelp(): void { console.log("Usage:"); console.log(" qmd add [--drop] [glob] - Add/update collection from $PWD (default: **/*.md)"); - console.log(" qmd add-context - Add context description for files under path"); + console.log(" qmd context add [path] \"text\" - Add context for path (defaults to current dir)"); + console.log(" qmd context list - List all contexts"); + console.log(" qmd context rm - Remove context"); console.log(" qmd get [:line] [-l N] [--from N] - Get document (optionally from line, max N lines)"); console.log(" qmd multi-get [-l N] [--max-bytes N] - Get multiple docs by glob or comma-separated list"); console.log(" qmd status - Show index status and collections"); @@ -1851,24 +2148,96 @@ switch (cli.command) { break; } - case "add-context": { - // qmd add-context OR qmd add-context (uses .) - if (cli.args.length === 0) { - console.error("Usage: qmd add-context "); - console.error(" qmd add-context . \"Description of files in current directory\""); + case "context": { + const subcommand = cli.args[0]; + if (!subcommand) { + console.error("Usage: qmd context "); + console.error(""); + console.error("Commands:"); + console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)"); + console.error(" qmd context add / \"text\" - Add global context to all collections"); + console.error(" qmd context list - List all contexts"); + console.error(" qmd context rm - Remove context"); process.exit(1); } - let pathArg: string; + + switch (subcommand) { + case "add": { + if (cli.args.length < 2) { + console.error("Usage: qmd context add [path] \"text\""); + console.error("Examples:"); + console.error(" qmd context add \"Context for current directory\""); + console.error(" qmd context add . \"Context for current directory\""); + console.error(" qmd context add /subfolder \"Context for subfolder\""); + console.error(" qmd context add / \"Global context for all collections\""); + console.error(" qmd context add qmd://journals/2024 \"Context for 2024 journals\""); + process.exit(1); + } + + let pathArg: string | undefined; + let contextText: string; + + // Check if first arg looks like a path or if it's the context text + const firstArg = cli.args[1]; + const secondArg = cli.args[2]; + + if (secondArg) { + // Two args: path + context + pathArg = firstArg; + contextText = cli.args.slice(2).join(" "); + } else { + // One arg: context only (use current directory) + pathArg = undefined; + contextText = firstArg; + } + + await contextAdd(pathArg, contextText); + break; + } + + case "list": { + contextList(); + break; + } + + case "rm": + case "remove": { + if (cli.args.length < 2) { + console.error("Usage: qmd context rm "); + console.error("Examples:"); + console.error(" qmd context rm /"); + console.error(" qmd context rm qmd://journals/2024"); + process.exit(1); + } + contextRemove(cli.args[1]); + break; + } + + default: + console.error(`Unknown subcommand: ${subcommand}`); + console.error("Available: add, list, rm"); + process.exit(1); + } + break; + } + + // Legacy alias for backwards compatibility + case "add-context": { + console.error(`${c.yellow}Note: 'qmd add-context' is deprecated. Use 'qmd context add' instead.${c.reset}`); + if (cli.args.length === 0) { + console.error("Usage: qmd context add [path] \"text\""); + process.exit(1); + } + let pathArg: string | undefined; let contextText: string; if (cli.args.length === 1) { - // Single arg = context for current directory - pathArg = "."; + pathArg = undefined; contextText = cli.args[0]; } else { pathArg = cli.args[0]; contextText = cli.args.slice(1).join(" "); } - await addContext(pathArg, contextText); + await contextAdd(pathArg, contextText); break; } diff --git a/store.ts b/store.ts index 5c67022..ab275ad 100644 --- a/store.ts +++ b/store.ts @@ -90,6 +90,72 @@ export function getRealPath(path: string): string { return resolve(path); } +// ============================================================================= +// Virtual Path Utilities (qmd://) +// ============================================================================= + +export type VirtualPath = { + collectionName: string; + path: string; // relative path within collection +}; + +/** + * Parse a virtual path like "qmd://collection-name/path/to/file.md" + * into its components. + */ +export function parseVirtualPath(virtualPath: string): VirtualPath | null { + const match = virtualPath.match(/^qmd:\/\/([^\/]+)\/(.+)$/); + if (!match) return null; + return { + collectionName: match[1], + path: match[2], + }; +} + +/** + * Build a virtual path from collection name and relative path. + */ +export function buildVirtualPath(collectionName: string, path: string): string { + return `qmd://${collectionName}/${path}`; +} + +/** + * Check if a path is a virtual path (starts with qmd://). + */ +export function isVirtualPath(path: string): boolean { + return path.startsWith('qmd://'); +} + +/** + * Resolve a virtual path to absolute filesystem path. + */ +export function resolveVirtualPath(db: Database, virtualPath: string): string | null { + const parsed = parseVirtualPath(virtualPath); + if (!parsed) return null; + + const coll = getCollectionByName(db, parsed.collectionName); + if (!coll) return null; + + return resolve(coll.pwd, parsed.path); +} + +/** + * Convert an absolute filesystem path to a virtual path. + * Returns null if the file is not in any indexed collection. + */ +export function toVirtualPath(db: Database, absolutePath: string): string | null { + const doc = db.prepare(` + SELECT c.name, d.path + FROM documents d + JOIN collections c ON c.id = d.collection_id + WHERE c.pwd || '/' || d.path = ? AND d.active = 1 + LIMIT 1 + `).get(absolutePath) as { name: string; path: string } | null; + + if (!doc) return null; + return buildVirtualPath(doc.name, doc.path); +} + // ============================================================================= // Database initialization // ============================================================================= @@ -107,29 +173,74 @@ if (process.platform === "darwin") { function initializeDatabase(db: Database): void { sqliteVec.load(db); db.exec("PRAGMA journal_mode = WAL"); + db.exec("PRAGMA foreign_keys = ON"); - // Collections table + // Check if we need to migrate from old schema + const tables = db.prepare(`SELECT name FROM sqlite_master WHERE type='table'`).all() as { name: string }[]; + const tableNames = tables.map(t => t.name); + const needsMigration = tableNames.includes('documents') && !tableNames.includes('content'); + + if (needsMigration) { + migrateToContentAddressable(db); + return; // Migration will call initializeDatabase again + } + + // Content-addressable storage - the source of truth for document content + db.exec(` + CREATE TABLE IF NOT EXISTS content ( + hash TEXT PRIMARY KEY, + doc TEXT NOT NULL, + created_at TEXT NOT NULL + ) + `); + + // Collections table with name field db.exec(` CREATE TABLE IF NOT EXISTS collections ( id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, pwd TEXT NOT NULL, glob_pattern TEXT NOT NULL, created_at TEXT NOT NULL, - context TEXT, + updated_at TEXT NOT NULL, UNIQUE(pwd, glob_pattern) ) `); - // Path-based context + // Documents table - file system layer mapping virtual paths to content hashes + db.exec(` + CREATE TABLE IF NOT EXISTS documents ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + collection_id INTEGER NOT NULL, + path TEXT NOT NULL, + title TEXT NOT NULL, + hash TEXT NOT NULL, + created_at TEXT NOT NULL, + modified_at TEXT NOT NULL, + active INTEGER NOT NULL DEFAULT 1, + FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE, + FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE, + UNIQUE(collection_id, path) + ) + `); + + db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection_id, active)`); + db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash)`); + db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_path ON documents(path, active)`); + + // Path-based context (collection-scoped, hierarchical) db.exec(` CREATE TABLE IF NOT EXISTS path_contexts ( id INTEGER PRIMARY KEY AUTOINCREMENT, - path_prefix TEXT NOT NULL UNIQUE, + collection_id INTEGER NOT NULL, + path_prefix TEXT NOT NULL, context TEXT NOT NULL, - created_at TEXT NOT NULL + created_at TEXT NOT NULL, + FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE, + UNIQUE(collection_id, path_prefix) ) `); - db.exec(`CREATE INDEX IF NOT EXISTS idx_path_contexts_prefix ON path_contexts(path_prefix)`); + db.exec(`CREATE INDEX IF NOT EXISTS idx_path_contexts_collection ON path_contexts(collection_id, path_prefix)`); // Cache table for Ollama API calls db.exec(` @@ -140,33 +251,6 @@ function initializeDatabase(db: Database): void { ) `); - // Documents table - db.exec(` - CREATE TABLE IF NOT EXISTS documents ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - collection_id INTEGER NOT NULL, - name TEXT NOT NULL, - title TEXT NOT NULL, - hash TEXT NOT NULL, - filepath TEXT NOT NULL, - display_path TEXT NOT NULL DEFAULT '', - body TEXT NOT NULL, - created_at TEXT NOT NULL, - modified_at TEXT NOT NULL, - active INTEGER NOT NULL DEFAULT 1, - FOREIGN KEY (collection_id) REFERENCES collections(id) - ) - `); - - // Migration: add display_path column if missing - const docInfo = db.prepare(`PRAGMA table_info(documents)`).all() as { name: string }[]; - const hasDisplayPath = docInfo.some(col => col.name === 'display_path'); - if (!hasDisplayPath) { - db.exec(`ALTER TABLE documents ADD COLUMN display_path TEXT NOT NULL DEFAULT ''`); - } - - db.exec(`CREATE UNIQUE INDEX IF NOT EXISTS idx_documents_display_path ON documents(display_path) WHERE display_path != '' AND active = 1`); - // Content vectors const cvInfo = db.prepare(`PRAGMA table_info(content_vectors)`).all() as { name: string }[]; const hasSeqColumn = cvInfo.some(col => col.name === 'seq'); @@ -185,39 +269,287 @@ function initializeDatabase(db: Database): void { ) `); - // FTS + // FTS - index path and content (joined from content table) db.exec(` CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5( - name, body, - content='documents', - content_rowid='id', + path, body, tokenize='porter unicode61' ) `); + // Triggers to keep FTS in sync db.exec(` CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents BEGIN - INSERT INTO documents_fts(rowid, name, body) VALUES (new.id, new.name, new.body); + INSERT INTO documents_fts(rowid, path, body) + SELECT new.id, new.path, c.doc + FROM content c + WHERE c.hash = new.hash; END `); db.exec(` CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN - INSERT INTO documents_fts(documents_fts, rowid, name, body) VALUES('delete', old.id, old.name, old.body); + DELETE FROM documents_fts WHERE rowid = old.id; END `); db.exec(` CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents BEGIN - INSERT INTO documents_fts(documents_fts, rowid, name, body) VALUES('delete', old.id, old.name, old.body); - INSERT INTO documents_fts(rowid, name, body) VALUES (new.id, new.name, new.body); + UPDATE documents_fts + SET path = new.path, + body = (SELECT doc FROM content WHERE hash = new.hash) + WHERE rowid = new.id; END `); +} - db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection_id, active)`); - db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash)`); - db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_filepath ON documents(filepath, active)`); - db.exec(`CREATE UNIQUE INDEX IF NOT EXISTS idx_documents_filepath_active ON documents(filepath) WHERE active = 1`); +function migrateToContentAddressable(db: Database): void { + console.log("Migrating database to content-addressable schema..."); + + // Start transaction + db.exec("BEGIN TRANSACTION"); + + try { + // Rename old tables + db.exec("ALTER TABLE documents RENAME TO documents_old"); + db.exec("ALTER TABLE collections RENAME TO collections_old"); + db.exec("ALTER TABLE path_contexts RENAME TO path_contexts_old"); + db.exec("DROP TABLE IF EXISTS documents_fts"); + db.exec("DROP TRIGGER IF EXISTS documents_ai"); + db.exec("DROP TRIGGER IF EXISTS documents_ad"); + db.exec("DROP TRIGGER IF EXISTS documents_au"); + + // Create new schema + db.exec(` + CREATE TABLE content ( + hash TEXT PRIMARY KEY, + doc TEXT NOT NULL, + created_at TEXT NOT NULL + ) + `); + + db.exec(` + CREATE TABLE collections ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, + pwd TEXT NOT NULL, + glob_pattern TEXT NOT NULL, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + UNIQUE(pwd, glob_pattern) + ) + `); + + db.exec(` + CREATE TABLE documents ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + collection_id INTEGER NOT NULL, + path TEXT NOT NULL, + title TEXT NOT NULL, + hash TEXT NOT NULL, + created_at TEXT NOT NULL, + modified_at TEXT NOT NULL, + active INTEGER NOT NULL DEFAULT 1, + FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE, + FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE, + UNIQUE(collection_id, path) + ) + `); + + db.exec(` + CREATE TABLE path_contexts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + collection_id INTEGER NOT NULL, + path_prefix TEXT NOT NULL, + context TEXT NOT NULL, + created_at TEXT NOT NULL, + FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE, + UNIQUE(collection_id, path_prefix) + ) + `); + + // Migrate data: Extract unique content hashes + console.log("Migrating content..."); + db.exec(` + INSERT INTO content (hash, doc, created_at) + SELECT hash, body, MIN(created_at) as created_at + FROM documents_old + WHERE active = 1 + GROUP BY hash + `); + + // Migrate collections: generate names from pwd basename + console.log("Migrating collections..."); + db.exec(` + INSERT INTO collections (id, name, pwd, glob_pattern, created_at, updated_at) + SELECT + id, + CASE + WHEN INSTR(RTRIM(pwd, '/'), '/') > 0 + THEN SUBSTR(RTRIM(pwd, '/'), INSTR(RTRIM(pwd, '/'), '/') + 1) + ELSE RTRIM(pwd, '/') + END as name, + pwd, + glob_pattern, + created_at, + created_at as updated_at + FROM collections_old + `); + + // Handle duplicate collection names by appending collection_id + const duplicates = db.prepare(` + SELECT name, COUNT(*) as cnt + FROM collections + GROUP BY name + HAVING cnt > 1 + `).all() as { name: string; cnt: number }[]; + + for (const dup of duplicates) { + const rows = db.prepare(`SELECT id FROM collections WHERE name = ? ORDER BY id`).all(dup.name) as { id: number }[]; + for (let i = 1; i < rows.length; i++) { + db.prepare(`UPDATE collections SET name = ? WHERE id = ?`).run(`${dup.name}-${rows[i].id}`, rows[i].id); + } + } + + // Migrate documents: convert filepath to relative path within collection + console.log("Migrating documents..."); + const oldDocs = db.prepare(` + SELECT d.id, d.collection_id, d.filepath, d.title, d.hash, d.created_at, d.modified_at, c.pwd + FROM documents_old d + JOIN collections c ON c.id = d.collection_id + WHERE d.active = 1 + `).all() as Array<{ + id: number; + collection_id: number; + filepath: string; + title: string; + hash: string; + created_at: string; + modified_at: string; + pwd: string; + }>; + + const insertDoc = db.prepare(` + INSERT INTO documents (collection_id, path, title, hash, created_at, modified_at, active) + VALUES (?, ?, ?, ?, ?, ?, 1) + `); + + for (const doc of oldDocs) { + // Convert absolute filepath to relative path within collection + let path = doc.filepath; + if (path.startsWith(doc.pwd + '/')) { + path = path.slice(doc.pwd.length + 1); + } else if (path.startsWith(doc.pwd)) { + path = path.slice(doc.pwd.length); + } + // Remove leading slash if present + path = path.replace(/^\/+/, ''); + + try { + insertDoc.run(doc.collection_id, path, doc.title, doc.hash, doc.created_at, doc.modified_at); + } catch (e) { + console.warn(`Skipping duplicate path: ${path} in collection ${doc.collection_id}`); + } + } + + // Migrate path_contexts: associate with collections based on path prefix + console.log("Migrating path contexts..."); + const oldContexts = db.prepare(`SELECT * FROM path_contexts_old`).all() as Array<{ + path_prefix: string; + context: string; + created_at: string; + }>; + + const insertContext = db.prepare(` + INSERT INTO path_contexts (collection_id, path_prefix, context, created_at) + VALUES (?, ?, ?, ?) + `); + + const allCollections = db.prepare(`SELECT id, pwd FROM collections`).all() as Array<{ id: number; pwd: string }>; + + for (const ctx of oldContexts) { + // Find collection(s) that match this path prefix + for (const coll of allCollections) { + if (ctx.path_prefix.startsWith(coll.pwd)) { + // Convert absolute path_prefix to relative within collection + let relPath = ctx.path_prefix; + if (relPath.startsWith(coll.pwd + '/')) { + relPath = relPath.slice(coll.pwd.length + 1); + } else if (relPath.startsWith(coll.pwd)) { + relPath = relPath.slice(coll.pwd.length); + } + relPath = relPath.replace(/^\/+/, ''); + + try { + insertContext.run(coll.id, relPath, ctx.context, ctx.created_at); + } catch (e) { + // Ignore duplicates + } + } + } + } + + // Drop old tables + db.exec("DROP TABLE documents_old"); + db.exec("DROP TABLE collections_old"); + db.exec("DROP TABLE path_contexts_old"); + + // Recreate FTS and triggers + db.exec(` + CREATE VIRTUAL TABLE documents_fts USING fts5( + path, body, + tokenize='porter unicode61' + ) + `); + + db.exec(` + CREATE TRIGGER documents_ai AFTER INSERT ON documents BEGIN + INSERT INTO documents_fts(rowid, path, body) + SELECT new.id, new.path, c.doc + FROM content c + WHERE c.hash = new.hash; + END + `); + + db.exec(` + CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN + DELETE FROM documents_fts WHERE rowid = old.id; + END + `); + + db.exec(` + CREATE TRIGGER documents_au AFTER UPDATE ON documents BEGIN + UPDATE documents_fts + SET path = new.path, + body = (SELECT doc FROM content WHERE hash = new.hash) + WHERE rowid = new.id; + END + `); + + // Populate FTS from migrated data + console.log("Rebuilding full-text search index..."); + db.exec(` + INSERT INTO documents_fts(rowid, path, body) + SELECT d.id, d.path, c.doc + FROM documents d + JOIN content c ON c.hash = d.hash + WHERE d.active = 1 + `); + + // Create indexes + db.exec(`CREATE INDEX idx_documents_collection ON documents(collection_id, active)`); + db.exec(`CREATE INDEX idx_documents_hash ON documents(hash)`); + db.exec(`CREATE INDEX idx_documents_path ON documents(path, active)`); + db.exec(`CREATE INDEX idx_path_contexts_collection ON path_contexts(collection_id, path_prefix)`); + + db.exec("COMMIT"); + console.log("Migration complete!"); + + } catch (e) { + db.exec("ROLLBACK"); + console.error("Migration failed:", e); + throw e; + } } function ensureVecTableInternal(db: Database, dimensions: number): void { @@ -254,7 +586,16 @@ export type Store = { // Context getContextForFile: (filepath: string) => string | null; + getContextForPath: (collectionId: number, path: string) => string | null; getCollectionIdByName: (name: string) => number | null; + getCollectionByName: (name: string) => { id: number; name: string; pwd: string; glob_pattern: string } | null; + + // Virtual paths + parseVirtualPath: typeof parseVirtualPath; + buildVirtualPath: typeof buildVirtualPath; + isVirtualPath: typeof isVirtualPath; + resolveVirtualPath: (virtualPath: string) => string | null; + toVirtualPath: (absolutePath: string) => string | null; // Search searchFTS: (query: string, limit?: number, collectionId?: number) => SearchResult[]; @@ -309,7 +650,16 @@ export function createStore(dbPath?: string): Store { // Context getContextForFile: (filepath: string) => getContextForFile(db, filepath), + getContextForPath: (collectionId: number, path: string) => getContextForPath(db, collectionId, path), getCollectionIdByName: (name: string) => getCollectionIdByName(db, name), + getCollectionByName: (name: string) => getCollectionByName(db, name), + + // Virtual paths + parseVirtualPath, + buildVirtualPath, + isVirtualPath, + resolveVirtualPath: (virtualPath: string) => resolveVirtualPath(db, virtualPath), + toVirtualPath: (absolutePath: string) => toVirtualPath(db, absolutePath), // Search searchFTS: (query: string, limit?: number, collectionId?: number) => searchFTS(db, query, limit, collectionId), @@ -632,38 +982,95 @@ export function findSimilarFiles(db: Database, query: string, maxDistance: numbe } export function matchFilesByGlob(db: Database, pattern: string): { filepath: string; displayPath: string; bodyLength: number }[] { - const allFiles = db.prepare(`SELECT filepath, display_path, LENGTH(body) as body_length FROM documents WHERE active = 1`).all() as { filepath: string; display_path: string; body_length: number }[]; + const allFiles = db.prepare(` + SELECT + 'qmd://' || c.name || '/' || d.path as virtual_path, + LENGTH(content.doc) as body_length, + d.collection_id, + d.path + FROM documents d + JOIN collections c ON c.id = d.collection_id + JOIN content ON content.hash = d.hash + WHERE d.active = 1 + `).all() as { virtual_path: string; body_length: number; collection_id: number; path: string }[]; + const glob = new Glob(pattern); return allFiles - .filter(f => glob.match(f.display_path)) - .map(f => ({ filepath: f.filepath, displayPath: f.display_path, bodyLength: f.body_length })); + .filter(f => glob.match(f.virtual_path) || glob.match(f.path)) + .map(f => ({ + filepath: f.virtual_path, // Use virtual path as filepath + displayPath: f.virtual_path, + bodyLength: f.body_length + })); } // ============================================================================= // Context // ============================================================================= -export function getContextForFile(db: Database, filepath: string): string | null { +/** + * Get context for a file path using hierarchical inheritance. + * Contexts are collection-scoped and inherit from parent directories. + * For example, context at "/talks" applies to "/talks/2024/keynote.md". + * + * @param db Database instance + * @param collectionId Collection ID + * @param path Relative path within the collection + * @returns Context string or null if no context is defined + */ +export function getContextForPath(db: Database, collectionId: number, path: string): string | null { + // Find the most specific (longest) matching path prefix for this collection const result = db.prepare(` SELECT context FROM path_contexts - WHERE ? LIKE path_prefix || '%' + WHERE collection_id = ? + AND (? LIKE path_prefix || '/%' OR ? = path_prefix OR path_prefix = '') ORDER BY LENGTH(path_prefix) DESC LIMIT 1 - `).get(filepath) as { context: string } | null; + `).get(collectionId, path, path) as { context: string } | null; return result?.context || null; } +/** + * Legacy function for backward compatibility - resolves filepath to collection+path first + */ +export function getContextForFile(db: Database, filepath: string): string | null { + // Try to find the document to get its collection_id and path + const doc = db.prepare(` + SELECT d.collection_id, d.path + FROM documents d + JOIN collections c ON c.id = d.collection_id + WHERE c.pwd || '/' || d.path = ? AND d.active = 1 + LIMIT 1 + `).get(filepath) as { collection_id: number; path: string } | null; + + if (!doc) return null; + return getContextForPath(db, doc.collection_id, doc.path); +} + +/** + * Get collection ID by its name (exact match). + */ export function getCollectionIdByName(db: Database, name: string): number | null { - // Search both pwd and glob_pattern columns for the name const result = db.prepare(` SELECT id FROM collections - WHERE pwd LIKE ? OR glob_pattern LIKE ? - ORDER BY LENGTH(pwd) DESC + WHERE name = ? LIMIT 1 - `).get(`%${name}%`, `%${name}%`) as { id: number } | null; + `).get(name) as { id: number } | null; return result?.id || null; } +/** + * Get collection by name. + */ +export function getCollectionByName(db: Database, name: string): { id: number; name: string; pwd: string; glob_pattern: string } | null { + const result = db.prepare(` + SELECT id, name, pwd, glob_pattern FROM collections + WHERE name = ? + LIMIT 1 + `).get(name) as { id: number; name: string; pwd: string; glob_pattern: string } | null; + return result; +} + // ============================================================================= // FTS Search // ============================================================================= @@ -686,9 +1093,16 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle if (!ftsQuery) return []; let sql = ` - SELECT d.filepath, d.display_path, d.title, d.body, bm25(documents_fts, 10.0, 1.0) as score + SELECT + 'qmd://' || c.name || '/' || d.path as filepath, + 'qmd://' || c.name || '/' || d.path as display_path, + d.title, + content.doc as body, + bm25(documents_fts, 10.0, 1.0) as score FROM documents_fts f JOIN documents d ON d.id = f.rowid + JOIN collections c ON c.id = d.collection_id + JOIN content ON content.hash = d.hash WHERE documents_fts MATCH ? AND d.active = 1 `; const params: (string | number)[] = [ftsQuery]; @@ -727,10 +1141,19 @@ export async function searchVec(db: Database, query: string, model: string, limi // sqlite-vec requires "k = ?" for KNN queries let sql = ` - SELECT v.hash_seq, v.distance, d.filepath, d.display_path, d.title, d.body, cv.pos + SELECT + v.hash_seq, + v.distance, + 'qmd://' || c.name || '/' || d.path as filepath, + 'qmd://' || c.name || '/' || d.path as display_path, + d.title, + content.doc as body, + cv.pos FROM vectors_vec v JOIN content_vectors cv ON cv.hash || '_' || cv.seq = v.hash_seq JOIN documents d ON d.hash = cv.hash AND d.active = 1 + JOIN collections c ON c.id = d.collection_id + JOIN content ON content.hash = d.hash WHERE v.embedding MATCH ? AND k = ? `; diff --git a/texts b/texts new file mode 120000 index 0000000..c7dc311 --- /dev/null +++ b/texts @@ -0,0 +1 @@ +/Users/tobi/src/github.com/Shopify/archive/obsidian/archive/Articles/By Tobi/ \ No newline at end of file