Update get and multi-get commands for virtual paths

- Update getDocument() to support qmd:// virtual paths and filesystem paths
- Update multiGet() to handle virtual paths in patterns and comma-separated lists
- Update matchFilesByGlob() in store.ts to return virtual paths
- Remove duplicate getContextForFile() function from qmd.ts
- Use collection-scoped getContextForPath() instead of legacy function
- All get and multi-get tests now passing

Closes qmd-vro

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Tobi Lutke 2025-12-12 15:47:42 -05:00
parent bf65655f84
commit 99aee71903
No known key found for this signature in database
12 changed files with 1340 additions and 242 deletions

29
.beads/.gitignore vendored Normal file
View File

@ -0,0 +1,29 @@
# SQLite databases
*.db
*.db?*
*.db-journal
*.db-wal
*.db-shm
# Daemon runtime files
daemon.lock
daemon.log
daemon.pid
bd.sock
# Legacy database files
db.sqlite
bd.db
# Merge artifacts (temporary files from 3-way merge)
beads.base.jsonl
beads.base.meta.json
beads.left.jsonl
beads.left.meta.json
beads.right.jsonl
beads.right.meta.json
# Keep JSONL exports and config (source of truth for git)
!issues.jsonl
!metadata.json
!config.json

1
.beads/.local_version Normal file
View File

@ -0,0 +1 @@
0.29.0

81
.beads/README.md Normal file
View File

@ -0,0 +1,81 @@
# Beads - AI-Native Issue Tracking
Welcome to Beads! This repository uses **Beads** for issue tracking - a modern, AI-native tool designed to live directly in your codebase alongside your code.
## What is Beads?
Beads is issue tracking that lives in your repo, making it perfect for AI coding agents and developers who want their issues close to their code. No web UI required - everything works through the CLI and integrates seamlessly with git.
**Learn more:** [github.com/steveyegge/beads](https://github.com/steveyegge/beads)
## Quick Start
### Essential Commands
```bash
# Create new issues
bd create "Add user authentication"
# View all issues
bd list
# View issue details
bd show <issue-id>
# Update issue status
bd update <issue-id> --status in_progress
bd update <issue-id> --status done
# Sync with git remote
bd sync
```
### Working with Issues
Issues in Beads are:
- **Git-native**: Stored in `.beads/issues.jsonl` and synced like code
- **AI-friendly**: CLI-first design works perfectly with AI coding agents
- **Branch-aware**: Issues can follow your branch workflow
- **Always in sync**: Auto-syncs with your commits
## Why Beads?
✨ **AI-Native Design**
- Built specifically for AI-assisted development workflows
- CLI-first interface works seamlessly with AI coding agents
- No context switching to web UIs
🚀 **Developer Focused**
- Issues live in your repo, right next to your code
- Works offline, syncs when you push
- Fast, lightweight, and stays out of your way
🔧 **Git Integration**
- Automatic sync with git commits
- Branch-aware issue tracking
- Intelligent JSONL merge resolution
## Get Started with Beads
Try Beads in your own projects:
```bash
# Install Beads
curl -sSL https://raw.githubusercontent.com/steveyegge/beads/main/scripts/install.sh | bash
# Initialize in your repo
bd init
# Create your first issue
bd create "Try out Beads"
```
## Learn More
- **Documentation**: [github.com/steveyegge/beads/docs](https://github.com/steveyegge/beads/tree/main/docs)
- **Quick Start Guide**: Run `bd quickstart`
- **Examples**: [github.com/steveyegge/beads/examples](https://github.com/steveyegge/beads/tree/main/examples)
---
*Beads: Issue tracking that moves at the speed of thought* ⚡

62
.beads/config.yaml Normal file
View File

@ -0,0 +1,62 @@
# Beads Configuration File
# This file configures default behavior for all bd commands in this repository
# All settings can also be set via environment variables (BD_* prefix)
# or overridden with command-line flags
# Issue prefix for this repository (used by bd init)
# If not set, bd init will auto-detect from directory name
# Example: issue-prefix: "myproject" creates issues like "myproject-1", "myproject-2", etc.
# issue-prefix: ""
# Use no-db mode: load from JSONL, no SQLite, write back after each command
# When true, bd will use .beads/issues.jsonl as the source of truth
# instead of SQLite database
# no-db: false
# Disable daemon for RPC communication (forces direct database access)
# no-daemon: false
# Disable auto-flush of database to JSONL after mutations
# no-auto-flush: false
# Disable auto-import from JSONL when it's newer than database
# no-auto-import: false
# Enable JSON output by default
# json: false
# Default actor for audit trails (overridden by BD_ACTOR or --actor)
# actor: ""
# Path to database (overridden by BEADS_DB or --db)
# db: ""
# Auto-start daemon if not running (can also use BEADS_AUTO_START_DAEMON)
# auto-start-daemon: true
# Debounce interval for auto-flush (can also use BEADS_FLUSH_DEBOUNCE)
# flush-debounce: "5s"
# Git branch for beads commits (bd sync will commit to this branch)
# IMPORTANT: Set this for team projects so all clones use the same sync branch.
# This setting persists across clones (unlike database config which is gitignored).
# Can also use BEADS_SYNC_BRANCH env var for local override.
# If not set, bd sync will require you to run 'bd config set sync.branch <branch>'.
# sync-branch: "beads-sync"
# Multi-repo configuration (experimental - bd-307)
# Allows hydrating from multiple repositories and routing writes to the correct JSONL
# repos:
# primary: "." # Primary repo (where this database lives)
# additional: # Additional repos to hydrate from (read-only)
# - ~/beads-planning # Personal planning repo
# - ~/work-planning # Work planning repo
# Integration settings (access with 'bd config get/set')
# These are stored in the database, not in this file:
# - jira.url
# - jira.project
# - linear.url
# - linear.api-key
# - github.org
# - github.repo

View File

@ -1,12 +1,13 @@
{"id":"qmd-4ru","title":"Update document retrieval for new schema","description":"Functions like getDocument, findDocument, getMultipleDocuments need to work with new schema (path instead of filepath, content joins, virtual paths).","status":"in_progress","priority":0,"issue_type":"task","created_at":"2025-12-12T15:29:53.911881-05:00","updated_at":"2025-12-12T15:30:10.835834-05:00","dependencies":[{"issue_id":"qmd-4ru","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.912607-05:00","created_by":"daemon"}]}
{"id":"qmd-ama","title":"Refactor database system","description":"All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection, path, hash,\n┃ created_at, updated_at. (collection,path)\n┃\n┃\n\n┃ All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection_id, path, hash,\n┃ created_at, updated_at. (collection,path) is unique. There is also collection which stores PWD\n┃ + glob pattern, name (\\w+). Every document is treated as path qmd://collection.name/","notes":"## Completed\n- ✅ Implemented content-addressable storage (content table with hash→doc mapping)\n- ✅ Refactored documents table as file system layer (collection_id, path, hash)\n- ✅ Added collection names (e.g., \"pages\", \"journals\", \"archive\")\n- ✅ Implemented virtual paths (qmd://collection-name/path/to/file.md)\n- ✅ Added hierarchical context support (collection-scoped)\n- ✅ Successfully migrated existing database\n- ✅ Updated search functions to work with new schema\n- ✅ Updated indexing logic to use content-addressable storage\n- ✅ Orphaned content hash cleanup\n\n## Still TODO\n- Fix migration SQL to properly extract basename (currently needs manual fix)\n- Implement `qmd collection add . --name \u003cname\u003e --mask '**/*.md'`\n- Implement `qmd ls [path]` for exploring virtual file tree","status":"in_progress","priority":2,"issue_type":"task","created_at":"2025-12-10T10:57:35.497489-05:00","updated_at":"2025-12-12T15:26:27.345436-05:00"}
{"id":"qmd-ama","title":"Refactor database system","description":"All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection, path, hash,\n┃ created_at, updated_at. (collection,path)\n┃\n┃\n\n┃ All documents should be stored as content addressable hash, e.g. hash, doc, created_at,\n┃ updated_at. documents should be a file system layer on top e.g. collection_id, path, hash,\n┃ created_at, updated_at. (collection,path) is unique. There is also collection which stores PWD\n┃ + glob pattern, name (\\w+). Every document is treated as path qmd://collection.name/","notes":"## Completed\n- ✅ Implemented content-addressable storage (content table with hash→doc mapping)\n- ✅ Refactored documents table as file system layer (collection_id, path, hash)\n- ✅ Added collection names (e.g., \"pages\", \"journals\", \"archive\")\n- ✅ Implemented virtual paths (qmd://collection-name/path/to/file.md)\n- ✅ Added hierarchical context support (collection-scoped)\n- ✅ Successfully migrated existing database\n- ✅ Updated search functions to work with new schema\n- ✅ Updated indexing logic to use content-addressable storage\n- ✅ Orphaned content hash cleanup\n\n## Still TODO\n- Fix migration SQL to properly extract basename (currently needs manual fix)\n- Implement `qmd collection add . --name \u003cname\u003e --mask '**/*.md'`\n- Implement `qmd ls [path]` for exploring virtual file tree","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-10T10:57:35.497489-05:00","updated_at":"2025-12-12T15:39:48.879143-05:00","closed_at":"2025-12-12T15:39:48.879143-05:00"}
{"id":"qmd-bx1","title":"Fix migration SQL for proper basename extraction","description":"The migration currently generates collection names incorrectly (uses full path instead of basename). Need to fix the SQL in migrateToContentAddressable to properly extract the directory basename.","status":"open","priority":1,"issue_type":"bug","created_at":"2025-12-12T15:29:53.757723-05:00","updated_at":"2025-12-12T15:29:53.757723-05:00","dependencies":[{"issue_id":"qmd-bx1","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.758524-05:00","created_by":"daemon"}]}
{"id":"qmd-c0m","title":"Comprehensive CLI review and consistency pass","description":"Review entire CLI command structure:\n- Consistent naming (add vs create, remove vs delete)\n- Consistent flag usage (--name, --mask, etc)\n- Update help text for all commands\n- Ensure virtual paths work everywhere\n- Test all commands end-to-end","status":"open","priority":1,"issue_type":"task","created_at":"2025-12-12T15:29:38.083564-05:00","updated_at":"2025-12-12T15:29:38.083564-05:00"}
{"id":"qmd-deh","title":"Refactor database introduce qmd collection *","description":"","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-10T10:56:04.516137-05:00","updated_at":"2025-12-10T10:56:04.516137-05:00"}
{"id":"qmd-dmi","title":"Implement 'qmd collection' commands","description":"Add explicit collection management:\n- qmd collection add . --name \u003cname\u003e --mask '**/*.md'\n- qmd collection list\n- qmd collection remove \u003cname\u003e\n\nThis gives users control over collection names and patterns.","status":"open","priority":1,"issue_type":"feature","created_at":"2025-12-12T15:29:53.810666-05:00","updated_at":"2025-12-12T15:29:53.810666-05:00","dependencies":[{"issue_id":"qmd-dmi","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.811294-05:00","created_by":"daemon"}]}
{"id":"qmd-e2c","title":"Implement 'qmd ls' command","description":"Add command to explore virtual file tree:\n- qmd ls → list all collections\n- qmd ls \u003ccollection\u003e → list files in collection\n- qmd ls \u003ccollection\u003e/\u003cpath\u003e → list files under path\nOutput: flat list of qmd:// paths","status":"open","priority":1,"issue_type":"feature","created_at":"2025-12-12T15:29:53.859804-05:00","updated_at":"2025-12-12T15:29:53.859804-05:00","dependencies":[{"issue_id":"qmd-e2c","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.860535-05:00","created_by":"daemon"}]}
{"id":"qmd-j9z","title":"Add unit tests for content addressable hashes","description":"add same file from multiple places and verify that they both point at same hash. drop one collection and the content stays.","status":"open","priority":3,"issue_type":"task","created_at":"2025-12-12T15:39:15.459504-05:00","updated_at":"2025-12-12T15:39:15.459504-05:00"}
{"id":"qmd-p1h","title":"Create collection add|remove","description":"","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-10T10:57:00.717864-05:00","updated_at":"2025-12-10T10:57:00.717864-05:00"}
{"id":"qmd-rhd","title":"Fix 'qmd status' output for new schema","description":"Update status to show collections by name, cleaner context display, virtual path examples.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-12T15:29:54.020596-05:00","updated_at":"2025-12-12T15:29:54.020596-05:00","dependencies":[{"issue_id":"qmd-rhd","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:54.021095-05:00","created_by":"daemon"}]}
{"id":"qmd-s1y","title":"Update 'qmd add-context' for collection scoping","description":"Update add-context to work with collection-scoped contexts using new path_contexts schema.","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-12T15:29:54.076582-05:00","updated_at":"2025-12-12T15:29:54.076582-05:00"}
{"id":"qmd-vro","title":"Update 'qmd get' to support virtual paths","description":"Allow qmd get to accept both virtual paths (qmd://journals/...) and filesystem paths, plus fuzzy matching by filename.","status":"open","priority":0,"issue_type":"task","created_at":"2025-12-12T15:29:53.963113-05:00","updated_at":"2025-12-12T15:29:53.963113-05:00","dependencies":[{"issue_id":"qmd-vro","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.963641-05:00","created_by":"daemon"}]}
{"id":"qmd-s1y","title":"Update 'qmd add-context' for collection scoping","description":"Update add-context to work with collection-scoped contexts using new path_contexts schema.","notes":"Refactoring to:\n- qmd context add [path] \"text\" (defaults to current collection if in one)\n- qmd context list\n- qmd context rm \u003cpath\u003e\n- Support \"/\" for global/system context\n- Auto-detect collection from pwd","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-12T15:29:54.076582-05:00","updated_at":"2025-12-12T15:37:47.683263-05:00","closed_at":"2025-12-12T15:37:47.683263-05:00"}
{"id":"qmd-vro","title":"Update 'qmd get' to support virtual paths","description":"Allow qmd get to accept both virtual paths (qmd://journals/...) and filesystem paths, plus fuzzy matching by filename.","status":"closed","priority":0,"issue_type":"task","created_at":"2025-12-12T15:29:53.963113-05:00","updated_at":"2025-12-12T15:47:29.178955-05:00","closed_at":"2025-12-12T15:47:29.178955-05:00","dependencies":[{"issue_id":"qmd-vro","depends_on_id":"qmd-ama","type":"discovered-from","created_at":"2025-12-12T15:29:53.963641-05:00","created_by":"daemon"}]}
{"id":"qmd-x19","title":"Update 'qmd add-context' for collection-scoped contexts","description":"Update add-context to work with collections:\n- qmd add-context \u003ccollection\u003e/\u003cpath\u003e \"context description\"\n- Support both virtual and filesystem paths\n- Update to use new path_contexts schema","status":"open","priority":2,"issue_type":"task","created_at":"2025-12-12T15:29:38.142575-05:00","updated_at":"2025-12-12T15:29:38.142575-05:00"}

3
.gitattributes vendored Normal file
View File

@ -0,0 +1,3 @@
# Use bd merge for beads JSONL files
.beads/issues.jsonl merge=beads

View File

@ -1,19 +1,47 @@
# QMD - Quick Markdown Search
**Note**: This project uses [bd (beads)](https://github.com/steveyegge/beads) for issue tracking. Use `bd` commands instead of markdown TODOs. See AGENTS.md for workflow details.
Use Bun instead of Node.js (`bun` not `node`, `bun install` not `npm install`).
## Commands
```sh
qmd add . # Index markdown files in current directory
qmd status # Show index status and collections
qmd update-all # Re-index all collections
qmd embed # Generate vector embeddings (requires Ollama)
qmd search <query> # BM25 full-text search
qmd vsearch <query> # Vector similarity search
qmd query <query> # Hybrid search with reranking (best quality)
qmd get <file> # Get document content (fuzzy matches if not found)
qmd multi-get <pattern> # Get multiple docs by glob or comma-separated list
qmd add . # Index markdown files in current directory
qmd context add [path] "text" # Add context for path (defaults to current dir)
qmd context list # List all contexts
qmd context rm <path> # Remove context
qmd status # Show index status and collections
qmd update # Re-index all collections
qmd embed # Generate vector embeddings (requires Ollama)
qmd search <query> # BM25 full-text search
qmd vsearch <query> # Vector similarity search
qmd query <query> # Hybrid search with reranking (best quality)
qmd get <file> # Get document content (fuzzy matches if not found)
qmd multi-get <pattern> # Get multiple docs by glob or comma-separated list
```
## Context Management
```sh
# Add context to current directory (auto-detects collection)
qmd context add "Description of these files"
# Add context to a specific path
qmd context add /subfolder "Description for subfolder"
# Add global context to all collections (system message)
qmd context add / "Always include this context"
# Add context using virtual paths
qmd context add qmd://journals/2024 "Journal entries from 2024"
# List all contexts
qmd context list
# Remove context
qmd context rm qmd://journals/2024
qmd context rm / # Remove global context
```
## Options

1
archive Symbolic link
View File

@ -0,0 +1 @@
/Users/tobi/src/github.com/Shopify/archive/obsidian/archive/Context

View File

@ -415,3 +415,102 @@ describe("CLI Search with Collection Filter", () => {
expect(stdout.toLowerCase()).toContain("meeting");
});
});
describe("CLI Context Management", () => {
let localDbPath: string;
beforeEach(async () => {
// Use a fresh database for this test suite
localDbPath = getFreshDbPath();
// Index some files first
await runQmd(["add", "."], { dbPath: localDbPath });
});
test("add global context with /", async () => {
const { stdout, exitCode } = await runQmd([
"context",
"add",
"/",
"Global system context",
], { dbPath: localDbPath });
expect(exitCode).toBe(0);
expect(stdout).toContain("✓ Added global context");
expect(stdout).toContain("Global system context");
});
test("list contexts", async () => {
// Add a global context first
await runQmd([
"context",
"add",
"/",
"Test context",
], { dbPath: localDbPath });
const { stdout, exitCode } = await runQmd([
"context",
"list",
], { dbPath: localDbPath });
expect(exitCode).toBe(0);
expect(stdout).toContain("Configured Contexts");
expect(stdout).toContain("Test context");
});
test("add context to virtual path", async () => {
// Collection name should be "fixtures" (basename of the fixtures directory)
const { stdout, exitCode } = await runQmd([
"context",
"add",
"qmd://fixtures/notes",
"Context for notes subdirectory",
], { dbPath: localDbPath });
expect(exitCode).toBe(0);
expect(stdout).toContain("✓ Added context for: qmd://fixtures/notes");
});
test("remove global context", async () => {
// Add a global context first
await runQmd([
"context",
"add",
"/",
"Global context to remove",
], { dbPath: localDbPath });
const { stdout, exitCode } = await runQmd([
"context",
"rm",
"/",
], { dbPath: localDbPath });
expect(exitCode).toBe(0);
expect(stdout).toContain("✓ Removed");
});
test("remove virtual path context", async () => {
// Add a context first
await runQmd([
"context",
"add",
"qmd://fixtures/notes",
"Context to remove",
], { dbPath: localDbPath });
const { stdout, exitCode } = await runQmd([
"context",
"rm",
"qmd://fixtures/notes",
], { dbPath: localDbPath });
expect(exitCode).toBe(0);
expect(stdout).toContain("✓ Removed context for: qmd://fixtures/notes");
});
test("fails to remove non-existent context", async () => {
const { stdout, stderr, exitCode } = await runQmd([
"context",
"rm",
"qmd://nonexistent/path",
], { dbPath: localDbPath });
expect(exitCode).toBe(1);
expect(stderr || stdout).toContain("not found");
});
});

715
qmd.ts
View File

@ -17,7 +17,9 @@ import {
reciprocalRankFusion,
extractSnippet,
getContextForFile,
getContextForPath,
getCollectionIdByName,
getCollectionByName,
findSimilarFiles,
matchFilesByGlob,
getHashesNeedingEmbedding,
@ -35,6 +37,11 @@ import {
getCachedResult,
setCachedResult,
getIndexHealth,
parseVirtualPath,
buildVirtualPath,
isVirtualPath,
resolveVirtualPath,
toVirtualPath,
OLLAMA_URL,
DEFAULT_EMBED_MODEL,
DEFAULT_QUERY_MODEL,
@ -338,13 +345,35 @@ async function rerank(query: string, documents: { file: string; text: string }[]
return results.sort((a, b) => b.score - a.score);
}
function getOrCreateCollection(db: Database, pwd: string, globPattern: string): number {
function getOrCreateCollection(db: Database, pwd: string, globPattern: string, name?: string): number {
const now = new Date().toISOString();
// Use INSERT OR IGNORE to handle race conditions, then SELECT
db.prepare(`INSERT OR IGNORE INTO collections (pwd, glob_pattern, created_at) VALUES (?, ?, ?)`).run(pwd, globPattern, now);
const existing = db.prepare(`SELECT id FROM collections WHERE pwd = ? AND glob_pattern = ?`).get(pwd, globPattern) as { id: number };
return existing.id;
// Generate collection name from pwd basename if not provided
if (!name) {
const parts = pwd.split('/').filter(Boolean);
name = parts[parts.length - 1] || 'root';
}
// Check if collection with this pwd+glob already exists
const existing = db.prepare(`SELECT id FROM collections WHERE pwd = ? AND glob_pattern = ?`).get(pwd, globPattern) as { id: number } | null;
if (existing) return existing.id;
// Try to insert with generated name
try {
const result = db.prepare(`INSERT INTO collections (name, pwd, glob_pattern, created_at, updated_at) VALUES (?, ?, ?, ?, ?)`).run(name, pwd, globPattern, now, now);
return result.lastInsertRowid as number;
} catch (e) {
// Name collision - append a unique suffix
const allCollections = db.prepare(`SELECT name FROM collections WHERE name LIKE ?`).all(`${name}%`) as { name: string }[];
let suffix = 2;
let uniqueName = `${name}-${suffix}`;
while (allCollections.some(c => c.name === uniqueName)) {
suffix++;
uniqueName = `${name}-${suffix}`;
}
const result = db.prepare(`INSERT INTO collections (name, pwd, glob_pattern, created_at, updated_at) VALUES (?, ?, ?, ?, ?)`).run(uniqueName, pwd, globPattern, now, now);
return result.lastInsertRowid as number;
}
}
function cleanupDuplicateCollections(db: Database): void {
@ -521,82 +550,320 @@ async function updateCollections(): Promise<void> {
console.log(`${c.green}✓ All collections updated.${c.reset}`);
}
async function addContext(pathArg: string, contextText: string): Promise<void> {
/**
* Detect which collection (if any) contains the given filesystem path.
* Returns { collectionId, collectionName, relativePath } or null if not in any collection.
*/
function detectCollectionFromPath(db: Database, fsPath: string): { collectionId: number; collectionName: string; relativePath: string } | null {
const realPath = getRealPath(fsPath);
// Find collections that this path is under
const collections = db.prepare(`
SELECT id, name, pwd
FROM collections
WHERE ? LIKE pwd || '/%' OR ? = pwd
ORDER BY LENGTH(pwd) DESC
LIMIT 1
`).get(realPath, realPath) as { id: number; name: string; pwd: string } | null;
if (!collections) return null;
// Calculate relative path
let relativePath = realPath;
if (relativePath.startsWith(collections.pwd + '/')) {
relativePath = relativePath.slice(collections.pwd.length + 1);
} else if (relativePath === collections.pwd) {
relativePath = '';
}
return {
collectionId: collections.id,
collectionName: collections.name,
relativePath
};
}
async function contextAdd(pathArg: string | undefined, contextText: string): Promise<void> {
const db = getDb();
const now = new Date().toISOString();
// Resolve path - could be relative, absolute, or use ~
let pathPrefix = pathArg;
if (pathPrefix === '.' || pathPrefix === './') {
pathPrefix = getPwd();
} else if (pathPrefix.startsWith('~/')) {
pathPrefix = homedir() + pathPrefix.slice(1);
} else if (!pathPrefix.startsWith('/')) {
pathPrefix = resolve(getPwd(), pathPrefix);
// Handle "/" as global/root context (applies to all collections)
if (pathArg === '/') {
// Find all collections and add context to each
const collections = db.prepare(`SELECT id, name FROM collections`).all() as { id: number; name: string }[];
for (const coll of collections) {
db.prepare(`
INSERT INTO path_contexts (collection_id, path_prefix, context, created_at)
VALUES (?, '', ?, ?)
ON CONFLICT(collection_id, path_prefix) DO UPDATE SET context = excluded.context
`).run(coll.id, contextText, now);
}
console.log(`${c.green}${c.reset} Added global context to ${collections.length} collection(s)`);
console.log(`${c.dim}Context: ${contextText}${c.reset}`);
closeDb();
return;
}
// Get realpath and normalize: remove trailing slash
pathPrefix = getRealPath(pathPrefix).replace(/\/$/, '');
// Resolve path - defaults to current directory if not provided
let fsPath = pathArg || '.';
if (fsPath === '.' || fsPath === './') {
fsPath = getPwd();
} else if (fsPath.startsWith('~/')) {
fsPath = homedir() + fsPath.slice(1);
} else if (!fsPath.startsWith('/') && !fsPath.startsWith('qmd://')) {
fsPath = resolve(getPwd(), fsPath);
}
// Insert or update
db.prepare(`INSERT INTO path_contexts (path_prefix, context, created_at) VALUES (?, ?, ?)
ON CONFLICT(path_prefix) DO UPDATE SET context = excluded.context`).run(pathPrefix, contextText, now);
// Handle virtual paths (qmd://collection/path)
if (isVirtualPath(fsPath)) {
const parsed = parseVirtualPath(fsPath);
if (!parsed) {
console.error(`${c.yellow}Invalid virtual path: ${fsPath}${c.reset}`);
process.exit(1);
}
console.log(`${c.green}${c.reset} Added context for: ${shortPath(pathPrefix)}`);
const coll = getCollectionByName(db, parsed.collectionName);
if (!coll) {
console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
process.exit(1);
}
db.prepare(`
INSERT INTO path_contexts (collection_id, path_prefix, context, created_at)
VALUES (?, ?, ?, ?)
ON CONFLICT(collection_id, path_prefix) DO UPDATE SET context = excluded.context
`).run(coll.id, parsed.path, contextText, now);
console.log(`${c.green}${c.reset} Added context for: qmd://${parsed.collectionName}/${parsed.path || ''}`);
console.log(`${c.dim}Context: ${contextText}${c.reset}`);
closeDb();
return;
}
// Detect collection from filesystem path
const detected = detectCollectionFromPath(db, fsPath);
if (!detected) {
console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
console.error(`${c.dim}Run 'qmd status' to see indexed collections${c.reset}`);
process.exit(1);
}
db.prepare(`
INSERT INTO path_contexts (collection_id, path_prefix, context, created_at)
VALUES (?, ?, ?, ?)
ON CONFLICT(collection_id, path_prefix) DO UPDATE SET context = excluded.context
`).run(detected.collectionId, detected.relativePath, contextText, now);
const displayPath = detected.relativePath ? `qmd://${detected.collectionName}/${detected.relativePath}` : `qmd://${detected.collectionName}/`;
console.log(`${c.green}${c.reset} Added context for: ${displayPath}`);
console.log(`${c.dim}Context: ${contextText}${c.reset}`);
closeDb();
}
function contextList(): void {
const db = getDb();
const contexts = db.prepare(`
SELECT c.name as collection_name, pc.path_prefix, pc.context
FROM path_contexts pc
JOIN collections c ON c.id = pc.collection_id
ORDER BY c.name, LENGTH(pc.path_prefix) DESC, pc.path_prefix
`).all() as { collection_name: string; path_prefix: string; context: string }[];
if (contexts.length === 0) {
console.log(`${c.dim}No contexts configured. Use 'qmd context add' to add one.${c.reset}`);
closeDb();
return;
}
console.log(`\n${c.bold}Configured Contexts${c.reset}\n`);
let lastCollection = '';
for (const ctx of contexts) {
if (ctx.collection_name !== lastCollection) {
console.log(`${c.cyan}${ctx.collection_name}${c.reset}`);
lastCollection = ctx.collection_name;
}
const path = ctx.path_prefix || '/';
const displayPath = ctx.path_prefix ? ` ${path}` : ' / (root)';
console.log(`${displayPath}`);
console.log(` ${c.dim}${ctx.context}${c.reset}`);
}
closeDb();
}
function contextRemove(pathArg: string): void {
const db = getDb();
if (pathArg === '/') {
// Remove all root contexts
const result = db.prepare(`DELETE FROM path_contexts WHERE path_prefix = ''`).run();
console.log(`${c.green}${c.reset} Removed ${result.changes} global context(s)`);
closeDb();
return;
}
// Handle virtual paths
if (isVirtualPath(pathArg)) {
const parsed = parseVirtualPath(pathArg);
if (!parsed) {
console.error(`${c.yellow}Invalid virtual path: ${pathArg}${c.reset}`);
process.exit(1);
}
const coll = getCollectionByName(db, parsed.collectionName);
if (!coll) {
console.error(`${c.yellow}Collection not found: ${parsed.collectionName}${c.reset}`);
process.exit(1);
}
const result = db.prepare(`
DELETE FROM path_contexts
WHERE collection_id = ? AND path_prefix = ?
`).run(coll.id, parsed.path);
if (result.changes === 0) {
console.error(`${c.yellow}No context found for: ${pathArg}${c.reset}`);
process.exit(1);
}
console.log(`${c.green}${c.reset} Removed context for: ${pathArg}`);
closeDb();
return;
}
// Handle filesystem paths
let fsPath = pathArg;
if (fsPath === '.' || fsPath === './') {
fsPath = getPwd();
} else if (fsPath.startsWith('~/')) {
fsPath = homedir() + fsPath.slice(1);
} else if (!fsPath.startsWith('/')) {
fsPath = resolve(getPwd(), fsPath);
}
const detected = detectCollectionFromPath(db, fsPath);
if (!detected) {
console.error(`${c.yellow}Path is not in any indexed collection: ${fsPath}${c.reset}`);
process.exit(1);
}
const result = db.prepare(`
DELETE FROM path_contexts
WHERE collection_id = ? AND path_prefix = ?
`).run(detected.collectionId, detected.relativePath);
if (result.changes === 0) {
console.error(`${c.yellow}No context found for: qmd://${detected.collectionName}/${detected.relativePath}${c.reset}`);
process.exit(1);
}
console.log(`${c.green}${c.reset} Removed context for: qmd://${detected.collectionName}/${detected.relativePath}`);
closeDb();
}
function getDocument(filename: string, fromLine?: number, maxLines?: number): void {
const db = getDb();
// Parse :linenum suffix from filename (e.g., "file.md:100")
let filepath = filename;
const colonMatch = filepath.match(/:(\d+)$/);
let inputPath = filename;
const colonMatch = inputPath.match(/:(\d+)$/);
if (colonMatch && !fromLine) {
fromLine = parseInt(colonMatch[1], 10);
filepath = filepath.slice(0, -colonMatch[0].length);
inputPath = inputPath.slice(0, -colonMatch[0].length);
}
// Expand ~ to home directory
if (filepath.startsWith('~/')) {
filepath = homedir() + filepath.slice(1);
}
let doc: { collectionId: number; collectionName: string; path: string; body: string } | null = null;
let virtualPath: string;
// Try exact match on filepath first
let doc = db.prepare(`SELECT filepath, body FROM documents WHERE filepath = ? AND active = 1`).get(filepath) as { filepath: string; body: string } | null;
// Try exact match on display_path
if (!doc) {
doc = db.prepare(`SELECT filepath, body FROM documents WHERE display_path = ? AND active = 1`).get(filepath) as { filepath: string; body: string } | null;
}
// Try matching by filename ending (allows partial paths)
if (!doc) {
doc = db.prepare(`SELECT filepath, body FROM documents WHERE filepath LIKE ? AND active = 1 LIMIT 1`).get(`%${filepath}`) as { filepath: string; body: string } | null;
}
// Try matching by display_path ending
if (!doc) {
doc = db.prepare(`SELECT filepath, body FROM documents WHERE display_path LIKE ? AND active = 1 LIMIT 1`).get(`%${filepath}`) as { filepath: string; body: string } | null;
}
if (!doc) {
// Suggest similar files using Levenshtein distance
const similar = findSimilarFiles(db, filepath, 5, 5);
console.error(`Document not found: ${filename}`);
if (similar.length > 0) {
console.error(`\nDid you mean one of these?`);
for (const s of similar) {
console.error(` ${s}`);
}
// Handle virtual paths (qmd://collection/path)
if (isVirtualPath(inputPath)) {
const parsed = parseVirtualPath(inputPath);
if (!parsed) {
console.error(`Invalid virtual path: ${inputPath}`);
closeDb();
process.exit(1);
}
// Try exact match on collection + path
doc = db.prepare(`
SELECT c.id as collectionId, c.name as collectionName, d.path, content.doc as body
FROM documents d
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash
WHERE c.name = ? AND d.path = ? AND d.active = 1
`).get(parsed.collectionName, parsed.path) as typeof doc;
if (!doc) {
// Try fuzzy match by path ending
doc = db.prepare(`
SELECT c.id as collectionId, c.name as collectionName, d.path, content.doc as body
FROM documents d
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash
WHERE c.name = ? AND d.path LIKE ? AND d.active = 1
LIMIT 1
`).get(parsed.collectionName, `%${parsed.path}`) as typeof doc;
}
virtualPath = inputPath;
} else {
// Handle filesystem paths
let fsPath = inputPath;
// Expand ~ to home directory
if (fsPath.startsWith('~/')) {
fsPath = homedir() + fsPath.slice(1);
} else if (!fsPath.startsWith('/')) {
// Relative path - resolve from current directory
fsPath = resolve(getPwd(), fsPath);
}
fsPath = getRealPath(fsPath);
// Try to detect which collection contains this path
const detected = detectCollectionFromPath(db, fsPath);
if (detected) {
// Found collection - query by collection_id + relative path
doc = db.prepare(`
SELECT c.id as collectionId, c.name as collectionName, d.path, content.doc as body
FROM documents d
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash
WHERE c.id = ? AND d.path = ? AND d.active = 1
`).get(detected.collectionId, detected.relativePath) as typeof doc;
}
// Fuzzy match by filename (last component of path)
if (!doc) {
const filename = inputPath.split('/').pop() || inputPath;
doc = db.prepare(`
SELECT c.id as collectionId, c.name as collectionName, d.path, content.doc as body
FROM documents d
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash
WHERE d.path LIKE ? AND d.active = 1
LIMIT 1
`).get(`%${filename}`) as typeof doc;
}
if (doc) {
virtualPath = buildVirtualPath(doc.collectionName, doc.path);
} else {
virtualPath = inputPath;
}
}
if (!doc) {
console.error(`Document not found: ${filename}`);
closeDb();
process.exit(1);
}
// Get context for this file
const context = getContextForFile(db, doc.filepath);
const context = getContextForPath(db, doc.collectionId, doc.path);
let output = doc.body;
@ -623,33 +890,83 @@ function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT
// Check if it's a comma-separated list or a glob pattern
const isCommaSeparated = pattern.includes(',') && !pattern.includes('*') && !pattern.includes('?');
let files: { filepath: string; displayPath: string; bodyLength: number }[];
let files: { filepath: string; displayPath: string; bodyLength: number; collectionId?: number; path?: string }[];
if (isCommaSeparated) {
// Comma-separated list of files
// Comma-separated list of files (can be virtual paths or relative paths)
const names = pattern.split(',').map(s => s.trim()).filter(Boolean);
files = [];
for (const name of names) {
// Try exact match on display_path first
let doc = db.prepare(`SELECT filepath, display_path, LENGTH(body) as body_length FROM documents WHERE display_path = ? AND active = 1`).get(name) as { filepath: string; display_path: string; body_length: number } | null;
// Try suffix match
if (!doc) {
doc = db.prepare(`SELECT filepath, display_path, LENGTH(body) as body_length FROM documents WHERE display_path LIKE ? AND active = 1 LIMIT 1`).get(`%${name}`) as { filepath: string; display_path: string; body_length: number } | null;
}
if (doc) {
files.push({ filepath: doc.filepath, displayPath: doc.display_path, bodyLength: doc.body_length });
} else {
// Suggest similar files
const similar = findSimilarFiles(db, name, 5, 3);
console.error(`File not found: ${name}`);
if (similar.length > 0) {
console.error(` Did you mean: ${similar.join(', ')}`);
let doc: { virtual_path: string; body_length: number; collection_id: number; path: string } | null = null;
// Handle virtual paths
if (isVirtualPath(name)) {
const parsed = parseVirtualPath(name);
if (parsed) {
// Try exact match on collection + path
doc = db.prepare(`
SELECT
'qmd://' || c.name || '/' || d.path as virtual_path,
LENGTH(content.doc) as body_length,
d.collection_id,
d.path
FROM documents d
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash
WHERE c.name = ? AND d.path = ? AND d.active = 1
`).get(parsed.collectionName, parsed.path) as typeof doc;
}
} else {
// Try exact match on path
doc = db.prepare(`
SELECT
'qmd://' || c.name || '/' || d.path as virtual_path,
LENGTH(content.doc) as body_length,
d.collection_id,
d.path
FROM documents d
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash
WHERE d.path = ? AND d.active = 1
LIMIT 1
`).get(name) as typeof doc;
// Try suffix match
if (!doc) {
doc = db.prepare(`
SELECT
'qmd://' || c.name || '/' || d.path as virtual_path,
LENGTH(content.doc) as body_length,
d.collection_id,
d.path
FROM documents d
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash
WHERE d.path LIKE ? AND d.active = 1
LIMIT 1
`).get(`%${name}`) as typeof doc;
}
}
if (doc) {
files.push({
filepath: doc.virtual_path,
displayPath: doc.virtual_path,
bodyLength: doc.body_length,
collectionId: doc.collection_id,
path: doc.path
});
} else {
console.error(`File not found: ${name}`);
}
}
} else {
// Glob pattern on display_path
files = matchFilesByGlob(db, pattern);
// Glob pattern - matchFilesByGlob now returns virtual paths
files = matchFilesByGlob(db, pattern).map(f => ({
...f,
collectionId: undefined, // Will be fetched later if needed
path: undefined
}));
if (files.length === 0) {
console.error(`No files matched pattern: ${pattern}`);
closeDb();
@ -661,7 +978,23 @@ function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT
const results: { file: string; displayPath: string; title: string; body: string; context: string | null; skipped: boolean; skipReason?: string }[] = [];
for (const file of files) {
const context = getContextForFile(db, file.filepath);
// Parse virtual path to get collection info if not already available
let collectionId = file.collectionId;
let path = file.path;
if (!collectionId || !path) {
const parsed = parseVirtualPath(file.displayPath);
if (parsed) {
const coll = getCollectionByName(db, parsed.collectionName);
if (coll) {
collectionId = coll.id;
path = parsed.path;
}
}
}
// Get context using collection-scoped function
const context = collectionId && path ? getContextForPath(db, collectionId, path) : null;
// Check size limit
if (file.bodyLength > maxBytes) {
@ -677,7 +1010,18 @@ function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT
continue;
}
const doc = db.prepare(`SELECT body, title FROM documents WHERE filepath = ? AND active = 1`).get(file.filepath) as { body: string; title: string } | null;
// Fetch document content - use virtual path to query
const parsed = parseVirtualPath(file.displayPath);
if (!parsed) continue;
const doc = db.prepare(`
SELECT content.doc as body, d.title
FROM documents d
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash
WHERE c.name = ? AND d.path = ? AND d.active = 1
`).get(parsed.collectionName, parsed.path) as { body: string; title: string } | null;
if (!doc) continue;
let body = doc.body;
@ -781,18 +1125,6 @@ function multiGet(pattern: string, maxLines?: number, maxBytes: number = DEFAULT
}
}
// Get context for a filepath (finds most specific matching path prefix)
function getContextForFile(db: Database, filepath: string): string | null {
// Find all matching prefixes and return the longest (most specific) one
const result = db.prepare(`
SELECT context FROM path_contexts
WHERE ? LIKE path_prefix || '%'
ORDER BY LENGTH(path_prefix) DESC
LIMIT 1
`).get(filepath) as { context: string } | null;
return result?.context || null;
}
async function dropCollection(globPattern: string): Promise<void> {
const db = getDb();
const pwd = getPwd();
@ -853,34 +1185,28 @@ async function indexFiles(globPattern: string = DEFAULT_GLOB): Promise<void> {
return;
}
const insertStmt = db.prepare(`INSERT INTO documents (collection_id, name, title, hash, filepath, display_path, body, created_at, modified_at, active) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 1)`);
const deactivateStmt = db.prepare(`UPDATE documents SET active = 0 WHERE collection_id = ? AND filepath = ? AND active = 1`);
const findActiveStmt = db.prepare(`SELECT id, hash, title, display_path FROM documents WHERE collection_id = ? AND filepath = ? AND active = 1`);
const findActiveAnyCollectionStmt = db.prepare(`SELECT id, collection_id, hash, title, display_path FROM documents WHERE filepath = ? AND active = 1`);
// Prepared statements for new schema
const insertContentStmt = db.prepare(`INSERT OR IGNORE INTO content (hash, doc, created_at) VALUES (?, ?, ?)`);
const insertDocStmt = db.prepare(`INSERT INTO documents (collection_id, path, title, hash, created_at, modified_at, active) VALUES (?, ?, ?, ?, ?, ?, 1)`);
const deactivateStmt = db.prepare(`UPDATE documents SET active = 0 WHERE collection_id = ? AND path = ? AND active = 1`);
const findActiveStmt = db.prepare(`SELECT id, hash, title FROM documents WHERE collection_id = ? AND path = ? AND active = 1`);
const updateTitleStmt = db.prepare(`UPDATE documents SET title = ?, modified_at = ? WHERE id = ?`);
const updateDisplayPathStmt = db.prepare(`UPDATE documents SET display_path = ? WHERE id = ?`);
// Collect all existing display_paths for uniqueness check
const existingDisplayPaths = new Set<string>(
(db.prepare(`SELECT display_path FROM documents WHERE active = 1 AND display_path != ''`).all() as { display_path: string }[])
.map(r => r.display_path)
);
let indexed = 0, updated = 0, unchanged = 0, processed = 0;
const seenFiles = new Set<string>();
const seenPaths = new Set<string>();
const startTime = Date.now();
for (const relativeFile of files) {
const filepath = getRealPath(resolve(pwd, relativeFile));
seenFiles.add(filepath);
const path = relativeFile; // Use relative path as-is
seenPaths.add(path);
const content = await Bun.file(filepath).text();
const hash = await hashContent(content);
const name = relativeFile.replace(/\.md$/, "").split("/").pop() || relativeFile;
const title = extractTitle(content, relativeFile);
// First check if file exists in THIS collection
const existing = findActiveStmt.get(collectionId, filepath) as { id: number; hash: string; title: string; display_path: string } | null;
// Check if document exists in this collection with this path
const existing = findActiveStmt.get(collectionId, path) as { id: number; hash: string; title: string } | null;
if (existing) {
if (existing.hash === hash) {
@ -891,35 +1217,24 @@ async function indexFiles(globPattern: string = DEFAULT_GLOB): Promise<void> {
} else {
unchanged++;
}
// Update display_path if empty
if (!existing.display_path) {
const displayPath = computeDisplayPath(filepath, pwd, existingDisplayPaths);
updateDisplayPathStmt.run(displayPath, existing.id);
existingDisplayPaths.add(displayPath);
}
} else {
// Content changed - deactivate old, insert new
existingDisplayPaths.delete(existing.display_path);
deactivateStmt.run(collectionId, filepath);
// Content changed - insert new content hash and update document
insertContentStmt.run(hash, content, now);
deactivateStmt.run(collectionId, path);
updated++;
const stat = await Bun.file(filepath).stat();
const displayPath = computeDisplayPath(filepath, pwd, existingDisplayPaths);
insertStmt.run(collectionId, name, title, hash, filepath, displayPath, content, stat ? new Date(stat.birthtime).toISOString() : now, stat ? new Date(stat.mtime).toISOString() : now);
existingDisplayPaths.add(displayPath);
insertDocStmt.run(collectionId, path, title, hash,
stat ? new Date(stat.birthtime).toISOString() : now,
stat ? new Date(stat.mtime).toISOString() : now);
}
} else {
// Check if file exists in ANY collection (would violate unique constraint)
const existingAnywhere = findActiveAnyCollectionStmt.get(filepath) as { id: number; collection_id: number; hash: string; title: string; display_path: string } | null;
if (existingAnywhere) {
// File already indexed in another collection - skip it
unchanged++;
} else {
indexed++;
const stat = await Bun.file(filepath).stat();
const displayPath = computeDisplayPath(filepath, pwd, existingDisplayPaths);
insertStmt.run(collectionId, name, title, hash, filepath, displayPath, content, stat ? new Date(stat.birthtime).toISOString() : now, stat ? new Date(stat.mtime).toISOString() : now);
existingDisplayPaths.add(displayPath);
}
// New document - insert content and document
indexed++;
insertContentStmt.run(hash, content, now);
const stat = await Bun.file(filepath).stat();
insertDocStmt.run(collectionId, path, title, hash,
stat ? new Date(stat.birthtime).toISOString() : now,
stat ? new Date(stat.mtime).toISOString() : now);
}
processed++;
@ -932,20 +1247,30 @@ async function indexFiles(globPattern: string = DEFAULT_GLOB): Promise<void> {
}
// Deactivate documents in this collection that no longer exist
const allActive = db.prepare(`SELECT filepath FROM documents WHERE collection_id = ? AND active = 1`).all(collectionId) as { filepath: string }[];
const allActive = db.prepare(`SELECT path FROM documents WHERE collection_id = ? AND active = 1`).all(collectionId) as { path: string }[];
let removed = 0;
for (const row of allActive) {
if (!seenFiles.has(row.filepath)) {
deactivateStmt.run(collectionId, row.filepath);
if (!seenPaths.has(row.path)) {
deactivateStmt.run(collectionId, row.path);
removed++;
}
}
// Clean up orphaned content hashes (content not referenced by any document)
const cleanupResult = db.prepare(`
DELETE FROM content
WHERE hash NOT IN (SELECT DISTINCT hash FROM documents WHERE active = 1)
`).run();
const orphanedContent = cleanupResult.changes;
// Check if vector index needs updating
const needsEmbedding = getHashesNeedingEmbedding(db);
progress.clear();
console.log(`\nIndexed: ${indexed} new, ${updated} updated, ${unchanged} unchanged, ${removed} removed`);
if (orphanedContent > 0) {
console.log(`Cleaned up ${orphanedContent} orphaned content hash(es)`);
}
if (needsEmbedding > 0) {
console.log(`\nRun 'qmd embed' to update embeddings (${needsEmbedding} unique hashes need vectors)`);
@ -1154,40 +1479,10 @@ function getCollectionIdByName(db: Database, name: string): number | null {
return result?.id || null;
}
function searchFTS(db: Database, query: string, limit: number = 20, collectionId?: number): SearchResult[] {
const ftsQuery = buildFTS5Query(query);
if (!ftsQuery) return [];
// searchFTS and searchVec are now imported from store.ts with updated schema
// BM25 weights: name=10, body=1 (title matches ranked higher)
let sql = `
SELECT d.filepath, d.display_path, d.title, d.body, bm25(documents_fts, 10.0, 1.0) as score
FROM documents_fts f
JOIN documents d ON d.id = f.rowid
WHERE documents_fts MATCH ? AND d.active = 1
`;
const params: (string | number)[] = [ftsQuery];
if (collectionId !== undefined) {
sql += ` AND d.collection_id = ?`;
params.push(collectionId);
}
sql += ` ORDER BY score LIMIT ?`;
params.push(limit);
const stmt = db.prepare(sql);
const results = stmt.all(...params) as { filepath: string; display_path: string; title: string; body: string; score: number }[];
return results.map(r => ({
file: r.filepath,
displayPath: r.display_path,
title: r.title,
body: r.body,
score: normalizeBM25(r.score),
source: "fts" as const,
}));
}
async function searchVec(db: Database, query: string, model: string, limit: number = 20, collectionId?: number): Promise<SearchResult[]> {
// Removed duplicate searchFTS and searchVec functions - using store.ts versions instead
async function REMOVED_searchVec(db: Database, query: string, model: string, limit: number = 20, collectionId?: number): Promise<SearchResult[]> {
const tableExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get();
if (!tableExists) return [];
@ -1788,7 +2083,9 @@ function parseCLI() {
function showHelp(): void {
console.log("Usage:");
console.log(" qmd add [--drop] [glob] - Add/update collection from $PWD (default: **/*.md)");
console.log(" qmd add-context <path> <text> - Add context description for files under path");
console.log(" qmd context add [path] \"text\" - Add context for path (defaults to current dir)");
console.log(" qmd context list - List all contexts");
console.log(" qmd context rm <path> - Remove context");
console.log(" qmd get <file>[:line] [-l N] [--from N] - Get document (optionally from line, max N lines)");
console.log(" qmd multi-get <pattern> [-l N] [--max-bytes N] - Get multiple docs by glob or comma-separated list");
console.log(" qmd status - Show index status and collections");
@ -1851,24 +2148,96 @@ switch (cli.command) {
break;
}
case "add-context": {
// qmd add-context <path> <context> OR qmd add-context <context> (uses .)
if (cli.args.length === 0) {
console.error("Usage: qmd add-context <path> <context>");
console.error(" qmd add-context . \"Description of files in current directory\"");
case "context": {
const subcommand = cli.args[0];
if (!subcommand) {
console.error("Usage: qmd context <add|list|rm>");
console.error("");
console.error("Commands:");
console.error(" qmd context add [path] \"text\" - Add context (defaults to current dir)");
console.error(" qmd context add / \"text\" - Add global context to all collections");
console.error(" qmd context list - List all contexts");
console.error(" qmd context rm <path> - Remove context");
process.exit(1);
}
let pathArg: string;
switch (subcommand) {
case "add": {
if (cli.args.length < 2) {
console.error("Usage: qmd context add [path] \"text\"");
console.error("Examples:");
console.error(" qmd context add \"Context for current directory\"");
console.error(" qmd context add . \"Context for current directory\"");
console.error(" qmd context add /subfolder \"Context for subfolder\"");
console.error(" qmd context add / \"Global context for all collections\"");
console.error(" qmd context add qmd://journals/2024 \"Context for 2024 journals\"");
process.exit(1);
}
let pathArg: string | undefined;
let contextText: string;
// Check if first arg looks like a path or if it's the context text
const firstArg = cli.args[1];
const secondArg = cli.args[2];
if (secondArg) {
// Two args: path + context
pathArg = firstArg;
contextText = cli.args.slice(2).join(" ");
} else {
// One arg: context only (use current directory)
pathArg = undefined;
contextText = firstArg;
}
await contextAdd(pathArg, contextText);
break;
}
case "list": {
contextList();
break;
}
case "rm":
case "remove": {
if (cli.args.length < 2) {
console.error("Usage: qmd context rm <path>");
console.error("Examples:");
console.error(" qmd context rm /");
console.error(" qmd context rm qmd://journals/2024");
process.exit(1);
}
contextRemove(cli.args[1]);
break;
}
default:
console.error(`Unknown subcommand: ${subcommand}`);
console.error("Available: add, list, rm");
process.exit(1);
}
break;
}
// Legacy alias for backwards compatibility
case "add-context": {
console.error(`${c.yellow}Note: 'qmd add-context' is deprecated. Use 'qmd context add' instead.${c.reset}`);
if (cli.args.length === 0) {
console.error("Usage: qmd context add [path] \"text\"");
process.exit(1);
}
let pathArg: string | undefined;
let contextText: string;
if (cli.args.length === 1) {
// Single arg = context for current directory
pathArg = ".";
pathArg = undefined;
contextText = cli.args[0];
} else {
pathArg = cli.args[0];
contextText = cli.args.slice(1).join(" ");
}
await addContext(pathArg, contextText);
await contextAdd(pathArg, contextText);
break;
}

537
store.ts
View File

@ -90,6 +90,72 @@ export function getRealPath(path: string): string {
return resolve(path);
}
// =============================================================================
// Virtual Path Utilities (qmd://)
// =============================================================================
export type VirtualPath = {
collectionName: string;
path: string; // relative path within collection
};
/**
* Parse a virtual path like "qmd://collection-name/path/to/file.md"
* into its components.
*/
export function parseVirtualPath(virtualPath: string): VirtualPath | null {
const match = virtualPath.match(/^qmd:\/\/([^\/]+)\/(.+)$/);
if (!match) return null;
return {
collectionName: match[1],
path: match[2],
};
}
/**
* Build a virtual path from collection name and relative path.
*/
export function buildVirtualPath(collectionName: string, path: string): string {
return `qmd://${collectionName}/${path}`;
}
/**
* Check if a path is a virtual path (starts with qmd://).
*/
export function isVirtualPath(path: string): boolean {
return path.startsWith('qmd://');
}
/**
* Resolve a virtual path to absolute filesystem path.
*/
export function resolveVirtualPath(db: Database, virtualPath: string): string | null {
const parsed = parseVirtualPath(virtualPath);
if (!parsed) return null;
const coll = getCollectionByName(db, parsed.collectionName);
if (!coll) return null;
return resolve(coll.pwd, parsed.path);
}
/**
* Convert an absolute filesystem path to a virtual path.
* Returns null if the file is not in any indexed collection.
*/
export function toVirtualPath(db: Database, absolutePath: string): string | null {
const doc = db.prepare(`
SELECT c.name, d.path
FROM documents d
JOIN collections c ON c.id = d.collection_id
WHERE c.pwd || '/' || d.path = ? AND d.active = 1
LIMIT 1
`).get(absolutePath) as { name: string; path: string } | null;
if (!doc) return null;
return buildVirtualPath(doc.name, doc.path);
}
// =============================================================================
// Database initialization
// =============================================================================
@ -107,29 +173,74 @@ if (process.platform === "darwin") {
function initializeDatabase(db: Database): void {
sqliteVec.load(db);
db.exec("PRAGMA journal_mode = WAL");
db.exec("PRAGMA foreign_keys = ON");
// Collections table
// Check if we need to migrate from old schema
const tables = db.prepare(`SELECT name FROM sqlite_master WHERE type='table'`).all() as { name: string }[];
const tableNames = tables.map(t => t.name);
const needsMigration = tableNames.includes('documents') && !tableNames.includes('content');
if (needsMigration) {
migrateToContentAddressable(db);
return; // Migration will call initializeDatabase again
}
// Content-addressable storage - the source of truth for document content
db.exec(`
CREATE TABLE IF NOT EXISTS content (
hash TEXT PRIMARY KEY,
doc TEXT NOT NULL,
created_at TEXT NOT NULL
)
`);
// Collections table with name field
db.exec(`
CREATE TABLE IF NOT EXISTS collections (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
pwd TEXT NOT NULL,
glob_pattern TEXT NOT NULL,
created_at TEXT NOT NULL,
context TEXT,
updated_at TEXT NOT NULL,
UNIQUE(pwd, glob_pattern)
)
`);
// Path-based context
// Documents table - file system layer mapping virtual paths to content hashes
db.exec(`
CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
collection_id INTEGER NOT NULL,
path TEXT NOT NULL,
title TEXT NOT NULL,
hash TEXT NOT NULL,
created_at TEXT NOT NULL,
modified_at TEXT NOT NULL,
active INTEGER NOT NULL DEFAULT 1,
FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE,
FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE,
UNIQUE(collection_id, path)
)
`);
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection_id, active)`);
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash)`);
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_path ON documents(path, active)`);
// Path-based context (collection-scoped, hierarchical)
db.exec(`
CREATE TABLE IF NOT EXISTS path_contexts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
path_prefix TEXT NOT NULL UNIQUE,
collection_id INTEGER NOT NULL,
path_prefix TEXT NOT NULL,
context TEXT NOT NULL,
created_at TEXT NOT NULL
created_at TEXT NOT NULL,
FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE,
UNIQUE(collection_id, path_prefix)
)
`);
db.exec(`CREATE INDEX IF NOT EXISTS idx_path_contexts_prefix ON path_contexts(path_prefix)`);
db.exec(`CREATE INDEX IF NOT EXISTS idx_path_contexts_collection ON path_contexts(collection_id, path_prefix)`);
// Cache table for Ollama API calls
db.exec(`
@ -140,33 +251,6 @@ function initializeDatabase(db: Database): void {
)
`);
// Documents table
db.exec(`
CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
collection_id INTEGER NOT NULL,
name TEXT NOT NULL,
title TEXT NOT NULL,
hash TEXT NOT NULL,
filepath TEXT NOT NULL,
display_path TEXT NOT NULL DEFAULT '',
body TEXT NOT NULL,
created_at TEXT NOT NULL,
modified_at TEXT NOT NULL,
active INTEGER NOT NULL DEFAULT 1,
FOREIGN KEY (collection_id) REFERENCES collections(id)
)
`);
// Migration: add display_path column if missing
const docInfo = db.prepare(`PRAGMA table_info(documents)`).all() as { name: string }[];
const hasDisplayPath = docInfo.some(col => col.name === 'display_path');
if (!hasDisplayPath) {
db.exec(`ALTER TABLE documents ADD COLUMN display_path TEXT NOT NULL DEFAULT ''`);
}
db.exec(`CREATE UNIQUE INDEX IF NOT EXISTS idx_documents_display_path ON documents(display_path) WHERE display_path != '' AND active = 1`);
// Content vectors
const cvInfo = db.prepare(`PRAGMA table_info(content_vectors)`).all() as { name: string }[];
const hasSeqColumn = cvInfo.some(col => col.name === 'seq');
@ -185,39 +269,287 @@ function initializeDatabase(db: Database): void {
)
`);
// FTS
// FTS - index path and content (joined from content table)
db.exec(`
CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
name, body,
content='documents',
content_rowid='id',
path, body,
tokenize='porter unicode61'
)
`);
// Triggers to keep FTS in sync
db.exec(`
CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents BEGIN
INSERT INTO documents_fts(rowid, name, body) VALUES (new.id, new.name, new.body);
INSERT INTO documents_fts(rowid, path, body)
SELECT new.id, new.path, c.doc
FROM content c
WHERE c.hash = new.hash;
END
`);
db.exec(`
CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
INSERT INTO documents_fts(documents_fts, rowid, name, body) VALUES('delete', old.id, old.name, old.body);
DELETE FROM documents_fts WHERE rowid = old.id;
END
`);
db.exec(`
CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents BEGIN
INSERT INTO documents_fts(documents_fts, rowid, name, body) VALUES('delete', old.id, old.name, old.body);
INSERT INTO documents_fts(rowid, name, body) VALUES (new.id, new.name, new.body);
UPDATE documents_fts
SET path = new.path,
body = (SELECT doc FROM content WHERE hash = new.hash)
WHERE rowid = new.id;
END
`);
}
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_collection ON documents(collection_id, active)`);
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_hash ON documents(hash)`);
db.exec(`CREATE INDEX IF NOT EXISTS idx_documents_filepath ON documents(filepath, active)`);
db.exec(`CREATE UNIQUE INDEX IF NOT EXISTS idx_documents_filepath_active ON documents(filepath) WHERE active = 1`);
function migrateToContentAddressable(db: Database): void {
console.log("Migrating database to content-addressable schema...");
// Start transaction
db.exec("BEGIN TRANSACTION");
try {
// Rename old tables
db.exec("ALTER TABLE documents RENAME TO documents_old");
db.exec("ALTER TABLE collections RENAME TO collections_old");
db.exec("ALTER TABLE path_contexts RENAME TO path_contexts_old");
db.exec("DROP TABLE IF EXISTS documents_fts");
db.exec("DROP TRIGGER IF EXISTS documents_ai");
db.exec("DROP TRIGGER IF EXISTS documents_ad");
db.exec("DROP TRIGGER IF EXISTS documents_au");
// Create new schema
db.exec(`
CREATE TABLE content (
hash TEXT PRIMARY KEY,
doc TEXT NOT NULL,
created_at TEXT NOT NULL
)
`);
db.exec(`
CREATE TABLE collections (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
pwd TEXT NOT NULL,
glob_pattern TEXT NOT NULL,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
UNIQUE(pwd, glob_pattern)
)
`);
db.exec(`
CREATE TABLE documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
collection_id INTEGER NOT NULL,
path TEXT NOT NULL,
title TEXT NOT NULL,
hash TEXT NOT NULL,
created_at TEXT NOT NULL,
modified_at TEXT NOT NULL,
active INTEGER NOT NULL DEFAULT 1,
FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE,
FOREIGN KEY (hash) REFERENCES content(hash) ON DELETE CASCADE,
UNIQUE(collection_id, path)
)
`);
db.exec(`
CREATE TABLE path_contexts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
collection_id INTEGER NOT NULL,
path_prefix TEXT NOT NULL,
context TEXT NOT NULL,
created_at TEXT NOT NULL,
FOREIGN KEY (collection_id) REFERENCES collections(id) ON DELETE CASCADE,
UNIQUE(collection_id, path_prefix)
)
`);
// Migrate data: Extract unique content hashes
console.log("Migrating content...");
db.exec(`
INSERT INTO content (hash, doc, created_at)
SELECT hash, body, MIN(created_at) as created_at
FROM documents_old
WHERE active = 1
GROUP BY hash
`);
// Migrate collections: generate names from pwd basename
console.log("Migrating collections...");
db.exec(`
INSERT INTO collections (id, name, pwd, glob_pattern, created_at, updated_at)
SELECT
id,
CASE
WHEN INSTR(RTRIM(pwd, '/'), '/') > 0
THEN SUBSTR(RTRIM(pwd, '/'), INSTR(RTRIM(pwd, '/'), '/') + 1)
ELSE RTRIM(pwd, '/')
END as name,
pwd,
glob_pattern,
created_at,
created_at as updated_at
FROM collections_old
`);
// Handle duplicate collection names by appending collection_id
const duplicates = db.prepare(`
SELECT name, COUNT(*) as cnt
FROM collections
GROUP BY name
HAVING cnt > 1
`).all() as { name: string; cnt: number }[];
for (const dup of duplicates) {
const rows = db.prepare(`SELECT id FROM collections WHERE name = ? ORDER BY id`).all(dup.name) as { id: number }[];
for (let i = 1; i < rows.length; i++) {
db.prepare(`UPDATE collections SET name = ? WHERE id = ?`).run(`${dup.name}-${rows[i].id}`, rows[i].id);
}
}
// Migrate documents: convert filepath to relative path within collection
console.log("Migrating documents...");
const oldDocs = db.prepare(`
SELECT d.id, d.collection_id, d.filepath, d.title, d.hash, d.created_at, d.modified_at, c.pwd
FROM documents_old d
JOIN collections c ON c.id = d.collection_id
WHERE d.active = 1
`).all() as Array<{
id: number;
collection_id: number;
filepath: string;
title: string;
hash: string;
created_at: string;
modified_at: string;
pwd: string;
}>;
const insertDoc = db.prepare(`
INSERT INTO documents (collection_id, path, title, hash, created_at, modified_at, active)
VALUES (?, ?, ?, ?, ?, ?, 1)
`);
for (const doc of oldDocs) {
// Convert absolute filepath to relative path within collection
let path = doc.filepath;
if (path.startsWith(doc.pwd + '/')) {
path = path.slice(doc.pwd.length + 1);
} else if (path.startsWith(doc.pwd)) {
path = path.slice(doc.pwd.length);
}
// Remove leading slash if present
path = path.replace(/^\/+/, '');
try {
insertDoc.run(doc.collection_id, path, doc.title, doc.hash, doc.created_at, doc.modified_at);
} catch (e) {
console.warn(`Skipping duplicate path: ${path} in collection ${doc.collection_id}`);
}
}
// Migrate path_contexts: associate with collections based on path prefix
console.log("Migrating path contexts...");
const oldContexts = db.prepare(`SELECT * FROM path_contexts_old`).all() as Array<{
path_prefix: string;
context: string;
created_at: string;
}>;
const insertContext = db.prepare(`
INSERT INTO path_contexts (collection_id, path_prefix, context, created_at)
VALUES (?, ?, ?, ?)
`);
const allCollections = db.prepare(`SELECT id, pwd FROM collections`).all() as Array<{ id: number; pwd: string }>;
for (const ctx of oldContexts) {
// Find collection(s) that match this path prefix
for (const coll of allCollections) {
if (ctx.path_prefix.startsWith(coll.pwd)) {
// Convert absolute path_prefix to relative within collection
let relPath = ctx.path_prefix;
if (relPath.startsWith(coll.pwd + '/')) {
relPath = relPath.slice(coll.pwd.length + 1);
} else if (relPath.startsWith(coll.pwd)) {
relPath = relPath.slice(coll.pwd.length);
}
relPath = relPath.replace(/^\/+/, '');
try {
insertContext.run(coll.id, relPath, ctx.context, ctx.created_at);
} catch (e) {
// Ignore duplicates
}
}
}
}
// Drop old tables
db.exec("DROP TABLE documents_old");
db.exec("DROP TABLE collections_old");
db.exec("DROP TABLE path_contexts_old");
// Recreate FTS and triggers
db.exec(`
CREATE VIRTUAL TABLE documents_fts USING fts5(
path, body,
tokenize='porter unicode61'
)
`);
db.exec(`
CREATE TRIGGER documents_ai AFTER INSERT ON documents BEGIN
INSERT INTO documents_fts(rowid, path, body)
SELECT new.id, new.path, c.doc
FROM content c
WHERE c.hash = new.hash;
END
`);
db.exec(`
CREATE TRIGGER documents_ad AFTER DELETE ON documents BEGIN
DELETE FROM documents_fts WHERE rowid = old.id;
END
`);
db.exec(`
CREATE TRIGGER documents_au AFTER UPDATE ON documents BEGIN
UPDATE documents_fts
SET path = new.path,
body = (SELECT doc FROM content WHERE hash = new.hash)
WHERE rowid = new.id;
END
`);
// Populate FTS from migrated data
console.log("Rebuilding full-text search index...");
db.exec(`
INSERT INTO documents_fts(rowid, path, body)
SELECT d.id, d.path, c.doc
FROM documents d
JOIN content c ON c.hash = d.hash
WHERE d.active = 1
`);
// Create indexes
db.exec(`CREATE INDEX idx_documents_collection ON documents(collection_id, active)`);
db.exec(`CREATE INDEX idx_documents_hash ON documents(hash)`);
db.exec(`CREATE INDEX idx_documents_path ON documents(path, active)`);
db.exec(`CREATE INDEX idx_path_contexts_collection ON path_contexts(collection_id, path_prefix)`);
db.exec("COMMIT");
console.log("Migration complete!");
} catch (e) {
db.exec("ROLLBACK");
console.error("Migration failed:", e);
throw e;
}
}
function ensureVecTableInternal(db: Database, dimensions: number): void {
@ -254,7 +586,16 @@ export type Store = {
// Context
getContextForFile: (filepath: string) => string | null;
getContextForPath: (collectionId: number, path: string) => string | null;
getCollectionIdByName: (name: string) => number | null;
getCollectionByName: (name: string) => { id: number; name: string; pwd: string; glob_pattern: string } | null;
// Virtual paths
parseVirtualPath: typeof parseVirtualPath;
buildVirtualPath: typeof buildVirtualPath;
isVirtualPath: typeof isVirtualPath;
resolveVirtualPath: (virtualPath: string) => string | null;
toVirtualPath: (absolutePath: string) => string | null;
// Search
searchFTS: (query: string, limit?: number, collectionId?: number) => SearchResult[];
@ -309,7 +650,16 @@ export function createStore(dbPath?: string): Store {
// Context
getContextForFile: (filepath: string) => getContextForFile(db, filepath),
getContextForPath: (collectionId: number, path: string) => getContextForPath(db, collectionId, path),
getCollectionIdByName: (name: string) => getCollectionIdByName(db, name),
getCollectionByName: (name: string) => getCollectionByName(db, name),
// Virtual paths
parseVirtualPath,
buildVirtualPath,
isVirtualPath,
resolveVirtualPath: (virtualPath: string) => resolveVirtualPath(db, virtualPath),
toVirtualPath: (absolutePath: string) => toVirtualPath(db, absolutePath),
// Search
searchFTS: (query: string, limit?: number, collectionId?: number) => searchFTS(db, query, limit, collectionId),
@ -632,38 +982,95 @@ export function findSimilarFiles(db: Database, query: string, maxDistance: numbe
}
export function matchFilesByGlob(db: Database, pattern: string): { filepath: string; displayPath: string; bodyLength: number }[] {
const allFiles = db.prepare(`SELECT filepath, display_path, LENGTH(body) as body_length FROM documents WHERE active = 1`).all() as { filepath: string; display_path: string; body_length: number }[];
const allFiles = db.prepare(`
SELECT
'qmd://' || c.name || '/' || d.path as virtual_path,
LENGTH(content.doc) as body_length,
d.collection_id,
d.path
FROM documents d
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash
WHERE d.active = 1
`).all() as { virtual_path: string; body_length: number; collection_id: number; path: string }[];
const glob = new Glob(pattern);
return allFiles
.filter(f => glob.match(f.display_path))
.map(f => ({ filepath: f.filepath, displayPath: f.display_path, bodyLength: f.body_length }));
.filter(f => glob.match(f.virtual_path) || glob.match(f.path))
.map(f => ({
filepath: f.virtual_path, // Use virtual path as filepath
displayPath: f.virtual_path,
bodyLength: f.body_length
}));
}
// =============================================================================
// Context
// =============================================================================
export function getContextForFile(db: Database, filepath: string): string | null {
/**
* Get context for a file path using hierarchical inheritance.
* Contexts are collection-scoped and inherit from parent directories.
* For example, context at "/talks" applies to "/talks/2024/keynote.md".
*
* @param db Database instance
* @param collectionId Collection ID
* @param path Relative path within the collection
* @returns Context string or null if no context is defined
*/
export function getContextForPath(db: Database, collectionId: number, path: string): string | null {
// Find the most specific (longest) matching path prefix for this collection
const result = db.prepare(`
SELECT context FROM path_contexts
WHERE ? LIKE path_prefix || '%'
WHERE collection_id = ?
AND (? LIKE path_prefix || '/%' OR ? = path_prefix OR path_prefix = '')
ORDER BY LENGTH(path_prefix) DESC
LIMIT 1
`).get(filepath) as { context: string } | null;
`).get(collectionId, path, path) as { context: string } | null;
return result?.context || null;
}
/**
* Legacy function for backward compatibility - resolves filepath to collection+path first
*/
export function getContextForFile(db: Database, filepath: string): string | null {
// Try to find the document to get its collection_id and path
const doc = db.prepare(`
SELECT d.collection_id, d.path
FROM documents d
JOIN collections c ON c.id = d.collection_id
WHERE c.pwd || '/' || d.path = ? AND d.active = 1
LIMIT 1
`).get(filepath) as { collection_id: number; path: string } | null;
if (!doc) return null;
return getContextForPath(db, doc.collection_id, doc.path);
}
/**
* Get collection ID by its name (exact match).
*/
export function getCollectionIdByName(db: Database, name: string): number | null {
// Search both pwd and glob_pattern columns for the name
const result = db.prepare(`
SELECT id FROM collections
WHERE pwd LIKE ? OR glob_pattern LIKE ?
ORDER BY LENGTH(pwd) DESC
WHERE name = ?
LIMIT 1
`).get(`%${name}%`, `%${name}%`) as { id: number } | null;
`).get(name) as { id: number } | null;
return result?.id || null;
}
/**
* Get collection by name.
*/
export function getCollectionByName(db: Database, name: string): { id: number; name: string; pwd: string; glob_pattern: string } | null {
const result = db.prepare(`
SELECT id, name, pwd, glob_pattern FROM collections
WHERE name = ?
LIMIT 1
`).get(name) as { id: number; name: string; pwd: string; glob_pattern: string } | null;
return result;
}
// =============================================================================
// FTS Search
// =============================================================================
@ -686,9 +1093,16 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle
if (!ftsQuery) return [];
let sql = `
SELECT d.filepath, d.display_path, d.title, d.body, bm25(documents_fts, 10.0, 1.0) as score
SELECT
'qmd://' || c.name || '/' || d.path as filepath,
'qmd://' || c.name || '/' || d.path as display_path,
d.title,
content.doc as body,
bm25(documents_fts, 10.0, 1.0) as score
FROM documents_fts f
JOIN documents d ON d.id = f.rowid
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash
WHERE documents_fts MATCH ? AND d.active = 1
`;
const params: (string | number)[] = [ftsQuery];
@ -727,10 +1141,19 @@ export async function searchVec(db: Database, query: string, model: string, limi
// sqlite-vec requires "k = ?" for KNN queries
let sql = `
SELECT v.hash_seq, v.distance, d.filepath, d.display_path, d.title, d.body, cv.pos
SELECT
v.hash_seq,
v.distance,
'qmd://' || c.name || '/' || d.path as filepath,
'qmd://' || c.name || '/' || d.path as display_path,
d.title,
content.doc as body,
cv.pos
FROM vectors_vec v
JOIN content_vectors cv ON cv.hash || '_' || cv.seq = v.hash_seq
JOIN documents d ON d.hash = cv.hash AND d.active = 1
JOIN collections c ON c.id = d.collection_id
JOIN content ON content.hash = d.hash
WHERE v.embedding MATCH ? AND k = ?
`;

1
texts Symbolic link
View File

@ -0,0 +1 @@
/Users/tobi/src/github.com/Shopify/archive/obsidian/archive/Articles/By Tobi/