diff --git a/CHANGELOG.md b/CHANGELOG.md index 1558305..2ec30c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,10 +2,19 @@ ## [Unreleased] +### Changes + +- CLI: add `qmd sync` for SSH/rsync-based QMD source-file and YAML-config + synchronization between a local machine and remote QMD host. The sync path + uses resumable rsync transfers, conflict-copy preservation, and keeps SQLite + indexes out of the transport so each side can re-index independently. +- CLI: add `qmd sync --update` to refresh local and remote indexes after a + successful sync, with optional `--embed` for explicit embedding refreshes. + ### Fixes - Embedding: default to an external OpenAI-compatible embeddings API - (`nvidia/llama-3.2-nv-embedqa-1b-v2`) and require + (`nvidia/llama-nemotron-embed-1b-v2`) and require `QMD_ENABLE_LOCAL_MODELS=1` for local node-llama-cpp embedding, reranking, and query expansion models. - Embedding: use approximate token counts in external embedding mode so diff --git a/README.md b/README.md index a51d8e9..f8e2ee7 100644 --- a/README.md +++ b/README.md @@ -489,7 +489,7 @@ by default. Configure it with: ```sh export NVIDIA_API_KEY="..." export QMD_EMBED_API_BASE_URL="https://integrate.api.nvidia.com/v1" -export QMD_EMBED_MODEL="nvidia/llama-3.2-nv-embedqa-1b-v2" +export QMD_EMBED_MODEL="nvidia/llama-nemotron-embed-1b-v2" ``` QMD reads `NVIDIA_API_KEY` when `QMD_EMBED_API_KEY` is not set and sends @@ -936,7 +936,7 @@ Query ──► LLM Expansion ──► [Original, Variant 1, Variant 2] Models are configured in `src/llm.ts`: ```typescript -const DEFAULT_EMBED_MODEL = "nvidia/llama-3.2-nv-embedqa-1b-v2"; +const DEFAULT_EMBED_MODEL = "nvidia/llama-nemotron-embed-1b-v2"; const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf"; const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf"; ``` @@ -945,7 +945,7 @@ YAML configuration can override those defaults; see `example-index.yml` for a co ```yaml models: - embed: nvidia/llama-3.2-nv-embedqa-1b-v2 + embed: nvidia/llama-nemotron-embed-1b-v2 # Optional local models, used only when QMD_ENABLE_LOCAL_MODELS=1: # rerank: hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf # generate: hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf diff --git a/example-index.yml b/example-index.yml index 62f8279..27428e7 100644 --- a/example-index.yml +++ b/example-index.yml @@ -13,7 +13,7 @@ global_context: "If you see a relevant [[WikiWord]], you can search for that Wik # Set NVIDIA_API_KEY, QMD_EMBED_API_KEY, or OPENAI_API_KEY in the environment for API auth. # Local GGUF models are disabled unless QMD_ENABLE_LOCAL_MODELS=1 is set. models: - embed: nvidia/llama-3.2-nv-embedqa-1b-v2 + embed: nvidia/llama-nemotron-embed-1b-v2 # Optional local embedding model instead of the external API: # embed: hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf # Optional local rerank/generation models: diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts index 3ee10aa..20a293e 100755 --- a/src/cli/qmd.ts +++ b/src/cli/qmd.ts @@ -2524,6 +2524,15 @@ function parseCLI() { http: { type: "boolean" }, daemon: { type: "boolean" }, port: { type: "string" }, + // Sync options + host: { type: "string" }, + "remote-user": { type: "string" }, + "remote-qmd-user": { type: "string" }, + "remote-home": { type: "string" }, + "dry-run": { type: "boolean" }, + delete: { type: "boolean" }, + update: { type: "boolean" }, + embed: { type: "boolean" }, }, allowPositionals: true, strict: false, // Allow unknown options to pass through @@ -2714,6 +2723,7 @@ function showHelp(): void { console.log(" qmd multi-get - Batch fetch via glob or comma-separated list"); console.log(" qmd skill show/install - Show or install the packaged QMD skill"); console.log(" qmd mcp - Start the MCP server (stdio transport for AI agents)"); + console.log(" qmd sync [--dry-run] - Secure two-way sync with a remote QMD host"); console.log(" qmd bench - Run search quality benchmarks against a fixture file"); console.log(""); console.log("Collections & context:"); @@ -2728,6 +2738,7 @@ function showHelp(): void { console.log(" --max-docs-per-batch - Cap docs loaded into memory per embedding batch"); console.log(" --max-batch-mb - Cap UTF-8 MB loaded into memory per embedding batch"); console.log(" qmd cleanup - Clear caches, vacuum DB"); + console.log(" qmd sync --dry-run - Preview SSH/rsync sync against the default remote"); console.log(""); console.log("Query syntax (qmd query):"); console.log(" QMD queries are either a single expand query (no prefix) or a multi-line"); @@ -2774,6 +2785,16 @@ function showHelp(): void { console.log(" --index - Use a named index (default: index)"); console.log(" QMD_EDITOR_URI - Editor link template for clickable TTY search output"); console.log(""); + console.log("Sync options:"); + console.log(" --host - SSH target (default root@xworkmate-bridge.svc.plus)"); + console.log(" --remote-qmd-user - Remote QMD owner (default ubuntu)"); + console.log(" --remote-home - Remote QMD home (default /home/ubuntu)"); + console.log(" --dry-run - Preview without writing files"); + console.log(" --delete - Allow rsync deletes after conflict filtering"); + console.log(" --update - Run qmd update locally and remotely after successful sync"); + console.log(" --embed - With --update, run qmd embed locally and remotely"); + console.log(" --yes - Reserved for non-interactive apply flows"); + console.log(""); console.log("Search options:"); console.log(" -n - Max results (default 5, or 20 for --files/--json)"); console.log(" --all - Return all matches (pair with --min-score)"); @@ -2847,6 +2868,31 @@ if (isMain) { process.exit(0); } + if (cli.values.help && cli.command === "sync") { + console.log("Usage: qmd sync [options]"); + console.log(""); + console.log("Synchronize QMD source files and YAML config with a remote QMD host."); + console.log("SQLite indexes are intentionally not synced; run qmd update/embed manually after sync."); + console.log(""); + console.log("Options:"); + console.log(" --host SSH target (default root@xworkmate-bridge.svc.plus)"); + console.log(" --remote-qmd-user Remote QMD owner (default ubuntu)"); + console.log(" --remote-home Remote QMD home (default /home/ubuntu)"); + console.log(" -c, --collection Limit sync to one or more collection names; skips config sync"); + console.log(" --dry-run Preview rsync changes without writing files"); + console.log(" --delete Allow deletes after conflict filtering"); + console.log(" --update Run qmd update locally and remotely after successful sync"); + console.log(" --embed With --update, run qmd embed locally and remotely"); + console.log(" --json Print a machine-readable summary"); + console.log(""); + console.log("Examples:"); + console.log(" qmd sync --dry-run"); + console.log(" qmd sync --dry-run --update"); + console.log(" qmd sync --host root@xworkmate-bridge.svc.plus --remote-qmd-user ubuntu"); + console.log(" qmd sync --collection openclaw-workspace"); + process.exit(0); + } + if (!cli.command || cli.values.help) { showHelp(); process.exit(cli.values.help ? 0 : 1); @@ -3123,6 +3169,42 @@ if (isMain) { } break; + case "sync": { + try { + const { runQmdSync, formatSyncSummary } = await import("../sync.js"); + const summary = await runQmdSync({ + host: cli.values.host as string | undefined, + remoteUser: cli.values["remote-user"] as string | undefined, + remoteQmdUser: cli.values["remote-qmd-user"] as string | undefined, + remoteHome: cli.values["remote-home"] as string | undefined, + collection: Array.isArray(cli.opts.collection) + ? cli.opts.collection + : cli.opts.collection + ? [cli.opts.collection] + : undefined, + dryRun: Boolean(cli.values["dry-run"]), + delete: Boolean(cli.values.delete), + update: Boolean(cli.values.update), + embed: Boolean(cli.values.embed), + yes: Boolean(cli.values.yes), + json: cli.opts.format === "json", + localQmdCommand: [process.execPath, fileURLToPath(import.meta.url)], + }); + if (cli.opts.format === "json") { + console.log(JSON.stringify(summary, null, 2)); + } else { + console.log(formatSyncSummary(summary)); + } + if (summary.failed) { + process.exit(1); + } + } catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); + } + break; + } + case "pull": { const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh); const isLocalModelUri = (uri: string) => diff --git a/src/llm.ts b/src/llm.ts index e8be977..bf49ed2 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -193,7 +193,7 @@ export type RerankDocument = { // Embeddings use NVIDIA's OpenAI-compatible API by default. // Set QMD_ENABLE_LOCAL_MODELS=1 before using any local node-llama-cpp GGUF models. -const DEFAULT_EMBED_MODEL = "nvidia/llama-3.2-nv-embedqa-1b-v2"; +const DEFAULT_EMBED_MODEL = "nvidia/llama-nemotron-embed-1b-v2"; const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf"; // const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf"; const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf"; diff --git a/src/store.ts b/src/store.ts index b0f4ee3..80b94a3 100644 --- a/src/store.ts +++ b/src/store.ts @@ -27,6 +27,8 @@ import { formatDocForEmbedding, withLLMSessionForLlm, DEFAULT_EMBED_MODEL_URI, + DEFAULT_RERANK_MODEL_URI, + DEFAULT_GENERATE_MODEL_URI, localModelsEnabled, type RerankDocument, type ILLMSession, @@ -44,8 +46,8 @@ import type { const HOME = process.env.HOME || process.env.USERPROFILE || "/tmp"; export const DEFAULT_EMBED_MODEL = DEFAULT_EMBED_MODEL_URI; -export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0"; -export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B"; +export const DEFAULT_RERANK_MODEL = DEFAULT_RERANK_MODEL_URI; +export const DEFAULT_QUERY_MODEL = DEFAULT_GENERATE_MODEL_URI; export const DEFAULT_GLOB = "**/*.md"; export const DEFAULT_MULTI_GET_MAX_BYTES = 10 * 1024; // 10KB export const DEFAULT_EMBED_MAX_DOCS_PER_BATCH = 64; diff --git a/src/sync.ts b/src/sync.ts new file mode 100644 index 0000000..f5683f8 --- /dev/null +++ b/src/sync.ts @@ -0,0 +1,868 @@ +import { execFile } from "child_process"; +import { mkdtemp, mkdir, rm, writeFile } from "fs/promises"; +import { existsSync, mkdirSync, rmSync, writeFileSync } from "fs"; +import { join, dirname } from "path"; +import { homedir, hostname, tmpdir } from "os"; +import YAML from "yaml"; +import type { CollectionConfig } from "./collections.js"; +import { getConfigPath, loadConfig } from "./collections.js"; + +export type SyncOptions = { + host?: string; + remoteUser?: string; + remoteQmdUser?: string; + remoteHome?: string; + collection?: string[]; + dryRun?: boolean; + delete?: boolean; + update?: boolean; + embed?: boolean; + yes?: boolean; + json?: boolean; + localQmdCommand?: string[]; + runCommand?: CommandRunner; +}; + +export type CommandRunner = ( + command: string, + args: string[], + options?: { cwd?: string } +) => Promise; + +export type CommandResult = { + stdout: string; + stderr: string; + exitCode: number; +}; + +export type SyncCollectionPlan = { + name: string; + direction: "bidirectional" | "download-mirror" | "upload-mirror"; + localPath: string; + remotePath: string; + pattern?: string; + localConfigured: boolean; + remoteConfigured: boolean; +}; + +export type SyncDependencyStatus = { + qmdVersion?: string; + rsync: boolean; + flock: boolean; + warnings: string[]; +}; + +export type SyncRsyncResult = { + label: string; + phase: "preflight" | "apply"; + direction: "download" | "upload"; + source: string; + destination: string; + itemized: string[]; + skipped: boolean; + reason?: string; +}; + +export type PostSyncResult = { + side: "local" | "remote"; + action: "update" | "embed"; + command: string[]; + skipped: boolean; + reason?: string; + exitCode?: number; + stdout?: string; + stderr?: string; +}; + +export type SyncConflict = { + collection: string; + path: string; + localConflictPath: string; + remoteConflictPath: string; +}; + +export type SyncSummary = { + dryRun: boolean; + host: string; + remoteQmdUser: string; + localConfigPath: string; + remoteConfigPath: string; + collections: SyncCollectionPlan[]; + dependencies: SyncDependencyStatus; + rsync: SyncRsyncResult[]; + conflicts: SyncConflict[]; + postSync: PostSyncResult[]; + failed: boolean; + warnings: string[]; + nextSteps: string[]; +}; + +const DEFAULT_HOST = "root@xworkmate-bridge.svc.plus"; +const DEFAULT_REMOTE_QMD_USER = "ubuntu"; +const DEFAULT_REMOTE_HOME = "/home/ubuntu"; +const REMOTE_CONFIG_RELATIVE = ".config/qmd/index.yml"; +const REMOTE_CACHE_RELATIVE = ".cache/qmd"; + +export const QMD_SYNC_EXCLUDES = [ + ".git/", + "node_modules/", + "vendor/", + "dist/", + "build/", + ".cache/", + ".qmd-rsync-partial/", + ".qmd-rsync-tmp/", + "*.sqlite", + "*.sqlite-wal", + "*.sqlite-shm", + "models/", +]; + +export function getDefaultSyncOptions(options: SyncOptions = {}): Required> & { + collection?: string[]; + localQmdCommand: string[]; + runCommand: CommandRunner; +} { + return { + host: options.host || DEFAULT_HOST, + remoteUser: options.remoteUser || "", + remoteQmdUser: options.remoteQmdUser || DEFAULT_REMOTE_QMD_USER, + remoteHome: options.remoteHome || DEFAULT_REMOTE_HOME, + collection: options.collection, + dryRun: Boolean(options.dryRun), + delete: Boolean(options.delete), + update: Boolean(options.update), + embed: Boolean(options.embed), + yes: Boolean(options.yes), + json: Boolean(options.json), + localQmdCommand: options.localQmdCommand?.length ? options.localQmdCommand : ["qmd"], + runCommand: options.runCommand || defaultRunCommand, + }; +} + +export function shellQuote(value: string): string { + return `'${value.replace(/'/g, `'\\''`)}'`; +} + +export function remoteUserCommand(user: string, command: string): string { + return `sudo -u ${shellQuote(user)} sh -lc ${shellQuote(command)}`; +} + +export function remoteRsyncPath(user: string): string { + return `sudo -u ${shellQuote(user)} rsync`; +} + +export function getLocalSyncDataRoot(host: string): string { + const base = process.env.XDG_DATA_HOME || join(homedir(), ".local", "share"); + return join(base, "qmd", "sync", sanitizePathSegment(host)); +} + +export function getRemoteSyncDataRoot(remoteHome: string, localHost: string = hostname()): string { + return `${remoteHome}/.local/share/qmd/sync/${sanitizePathSegment(localHost)}`; +} + +export function sanitizePathSegment(value: string): string { + const sanitized = value.replace(/[^a-zA-Z0-9._-]+/g, "_").replace(/^_+|_+$/g, ""); + return sanitized || "default"; +} + +export function parseConfigYaml(raw: string, label: string): CollectionConfig { + try { + const parsed = YAML.parse(raw || "collections: {}\n") as CollectionConfig | null; + return { ...parsed, collections: parsed?.collections || {} }; + } catch (error) { + throw new Error(`Failed to parse ${label}: ${error instanceof Error ? error.message : String(error)}`); + } +} + +export function buildCollectionPlans(params: { + localConfig: CollectionConfig; + remoteConfig: CollectionConfig; + host: string; + remoteHome: string; + collectionNames?: string[]; +}): SyncCollectionPlan[] { + const localCollections = params.localConfig.collections || {}; + const remoteCollections = params.remoteConfig.collections || {}; + const names = params.collectionNames?.length + ? params.collectionNames + : Array.from(new Set([...Object.keys(localCollections), ...Object.keys(remoteCollections)])).sort(); + + const localMirrorRoot = getLocalSyncDataRoot(params.host); + const remoteMirrorRoot = getRemoteSyncDataRoot(params.remoteHome); + + return names.map((name) => { + const local = localCollections[name]; + const remote = remoteCollections[name]; + if (local && remote) { + return { + name, + direction: "bidirectional", + localPath: local.path, + remotePath: remote.path, + pattern: local.pattern || remote.pattern, + localConfigured: true, + remoteConfigured: true, + }; + } + if (remote) { + return { + name, + direction: "download-mirror", + localPath: join(localMirrorRoot, name), + remotePath: remote.path, + pattern: remote.pattern, + localConfigured: false, + remoteConfigured: true, + }; + } + if (local) { + return { + name, + direction: "upload-mirror", + localPath: local.path, + remotePath: `${remoteMirrorRoot}/${name}`, + pattern: local.pattern, + localConfigured: true, + remoteConfigured: false, + }; + } + return { + name, + direction: "bidirectional", + localPath: join(localMirrorRoot, name), + remotePath: `${remoteMirrorRoot}/${name}`, + pattern: undefined, + localConfigured: false, + remoteConfigured: false, + }; + }); +} + +export function includePatternsForCollection(pattern?: string): string[] { + if (!pattern) return []; + if (pattern === "**/*.md") return ["*/", "*.md"]; + return []; +} + +export function buildRsyncArgs(params: { + source: string; + destination: string; + remoteQmdUser: string; + dryRun?: boolean; + delete?: boolean; + excludes?: string[]; + includes?: string[]; + excludeFrom?: string; + preserveFilePath?: boolean; + tempDir?: string; +}): string[] { + const args = [ + "-az", + "--itemize-changes", + "--partial", + "--partial-dir=.qmd-rsync-partial", + "--delay-updates", + "--rsync-path", + remoteRsyncPath(params.remoteQmdUser), + ]; + + if (!params.dryRun) { + args.push("--temp-dir", params.tempDir || ".qmd-rsync-tmp"); + } + if (params.dryRun) args.push("--dry-run"); + if (params.delete) args.push("--delete"); + for (const pattern of params.includes || []) { + args.push("--include", pattern); + } + if (params.includes?.length) { + args.push("--exclude", "*"); + } + for (const pattern of params.excludes || QMD_SYNC_EXCLUDES) { + args.push("--exclude", pattern); + } + if (params.excludeFrom) { + args.push("--exclude-from", params.excludeFrom); + } + args.push( + formatRsyncEndpoint(params.preserveFilePath ? params.source : ensureTrailingSlash(params.source)), + formatRsyncEndpoint(params.preserveFilePath ? params.destination : ensureTrailingSlash(params.destination)), + ); + return args; +} + +export function parseRsyncItemized(stdout: string): string[] { + return stdout.split(/\r?\n/) + .map(line => line.trim()) + .filter(Boolean) + .filter(line => !line.endsWith("/")) + .map(line => { + const match = line.match(/^.{11}\s+(.+)$/); + return match?.[1] || ""; + }) + .filter(path => path.length > 0) + .filter(path => !path.startsWith(".qmd-rsync-")); +} + +export function detectConflicts(collection: string, downloadPaths: string[], uploadPaths: string[], timestamp: string): SyncConflict[] { + const uploads = new Set(uploadPaths); + return downloadPaths + .filter(path => uploads.has(path)) + .map(path => ({ + collection, + path, + localConflictPath: `${path}.conflict.remote.${timestamp}`, + remoteConflictPath: `${path}.conflict.local.${timestamp}`, + })); +} + +export function formatSyncSummary(summary: SyncSummary): string { + const lines: string[] = []; + lines.push("QMD Sync"); + lines.push(""); + lines.push(`Host: ${summary.host}`); + lines.push(`Remote user: ${summary.remoteQmdUser}`); + lines.push(`Local config: ${summary.localConfigPath}`); + lines.push(`Remote config: ${summary.remoteConfigPath}`); + lines.push(`Mode: ${summary.dryRun ? "dry-run" : "apply"}`); + lines.push(""); + lines.push("Collections:"); + for (const plan of summary.collections) { + lines.push(` ${plan.name}: ${plan.direction}`); + lines.push(` local: ${plan.localPath}${plan.localConfigured ? "" : " (mirror)"}`); + lines.push(` remote: ${plan.remotePath}${plan.remoteConfigured ? "" : " (mirror)"}`); + } + if (summary.warnings.length > 0 || summary.dependencies.warnings.length > 0) { + lines.push(""); + lines.push("Warnings:"); + for (const warning of [...summary.dependencies.warnings, ...summary.warnings]) { + lines.push(` ${warning}`); + } + } + lines.push(""); + lines.push("Rsync:"); + for (const result of summary.rsync) { + const count = result.itemized.length; + const suffix = result.skipped ? ` skipped (${result.reason})` : `${count} item(s)`; + lines.push(` ${result.label} ${result.phase} ${result.direction}: ${suffix}`); + } + if (summary.postSync.length > 0) { + lines.push(""); + lines.push("Post-sync:"); + for (const result of summary.postSync) { + const command = result.command.join(" "); + if (result.skipped) { + lines.push(` ${result.side} ${result.action}: skipped (${result.reason})`); + } else { + lines.push(` ${result.side} ${result.action}: exit ${result.exitCode ?? 0} (${command})`); + } + } + } + lines.push(""); + lines.push(`Conflicts: ${summary.conflicts.length}`); + for (const conflict of summary.conflicts) { + lines.push(` ${conflict.collection}/${conflict.path}`); + lines.push(` local copy: ${conflict.localConflictPath}`); + lines.push(` remote copy: ${conflict.remoteConflictPath}`); + } + lines.push(""); + lines.push("Next steps:"); + for (const step of summary.nextSteps) { + lines.push(` ${step}`); + } + return `${lines.join("\n")}\n`; +} + +export async function runQmdSync(options: SyncOptions = {}): Promise { + const opts = getDefaultSyncOptions(options); + const localConfig = loadConfig(); + const localConfigPath = getConfigPath(); + const remoteConfigPath = `${opts.remoteHome}/${REMOTE_CONFIG_RELATIVE}`; + const remoteCachePath = `${opts.remoteHome}/${REMOTE_CACHE_RELATIVE}`; + const warnings: string[] = []; + + const localLockDir = getLocalLockDir(); + acquireLocalLock(localLockDir); + let tempDir: string | undefined; + try { + const dependencies = await probeRemote(opts.runCommand, opts.host, opts.remoteQmdUser, opts.remoteHome); + const remoteConfigRaw = await readRemoteConfig(opts.runCommand, opts.host, opts.remoteQmdUser, remoteConfigPath); + const remoteConfig = parseConfigYaml(remoteConfigRaw, remoteConfigPath); + const plans = buildCollectionPlans({ + localConfig, + remoteConfig, + host: opts.host, + remoteHome: opts.remoteHome, + collectionNames: opts.collection, + }); + + const missing = plans.filter(plan => !plan.localConfigured && !plan.remoteConfigured).map(plan => plan.name); + for (const name of missing) { + warnings.push(`collection not found locally or remotely: ${name}`); + } + + const summary: SyncSummary = { + dryRun: opts.dryRun, + host: opts.host, + remoteQmdUser: opts.remoteQmdUser, + localConfigPath, + remoteConfigPath, + collections: plans, + dependencies, + rsync: [], + conflicts: [], + postSync: [], + failed: false, + warnings, + nextSteps: [], + }; + + tempDir = await mkdtemp(join(tmpdir(), "qmd-sync-")); + const excludeFrom = join(tempDir, "conflicts.exclude"); + + if (!opts.dryRun) { + for (const plan of plans) { + mkdirSync(plan.localPath, { recursive: true }); + mkdirSync(join(plan.localPath, ".qmd-rsync-tmp"), { recursive: true }); + } + mkdirSync(join(dirname(localConfigPath), ".qmd-rsync-tmp"), { recursive: true }); + await ensureRemoteDirs(opts.runCommand, opts.host, opts.remoteQmdUser, [ + dirname(remoteConfigPath), + remoteCachePath, + ...plans.map(plan => plan.remotePath), + `${dirname(remoteConfigPath)}/.qmd-rsync-tmp`, + ...plans.map(plan => `${plan.remotePath}/.qmd-rsync-tmp`), + ]); + await runRemoteLockProbe(opts.runCommand, opts.host, opts.remoteQmdUser, `${remoteCachePath}/sync.lock`); + } + + const configPlan: SyncCollectionPlan = { + name: "config", + direction: "bidirectional", + localPath: dirname(localConfigPath), + remotePath: dirname(remoteConfigPath), + pattern: undefined, + localConfigured: true, + remoteConfigured: true, + }; + const allPlans = [ + ...(opts.collection?.length ? [] : [configPlan]), + ...plans.filter(plan => plan.localConfigured || plan.remoteConfigured), + ]; + + for (const plan of allPlans) { + const preflight = await dryRunPair(opts, plan); + summary.rsync.push(...preflight.results); + const conflicts = detectConflicts(plan.name, preflight.downloadPaths, preflight.uploadPaths, timestampForConflict()); + summary.conflicts.push(...conflicts); + await writeFile(excludeFrom, conflicts.map(c => c.path).join("\n")); + + if (!opts.dryRun && conflicts.length > 0) { + await syncConflictCopies(opts, plan, conflicts); + } + if (!opts.dryRun) { + const applyResults = await applyPair(opts, plan, excludeFrom); + summary.rsync.push(...applyResults); + } + } + + const failedApply = summary.rsync.some(result => result.phase === "apply" && result.skipped); + if (failedApply) { + summary.failed = true; + summary.postSync.push(...plannedPostSync(opts, "sync failed; update/embed not run")); + } else { + summary.postSync.push(...await runPostSync(opts)); + if (summary.postSync.some(result => !result.skipped && (result.exitCode ?? 0) !== 0)) { + summary.failed = true; + } + } + + summary.nextSteps = buildNextSteps(opts, summary); + return summary; + } finally { + releaseLocalLock(localLockDir); + if (tempDir) await rm(tempDir, { recursive: true, force: true }); + } +} + +async function dryRunPair(opts: ReturnType, plan: SyncCollectionPlan): Promise<{ + results: SyncRsyncResult[]; + downloadPaths: string[]; + uploadPaths: string[]; +}> { + const download = await runRsync(opts, { + label: plan.name, + phase: "preflight", + direction: "download", + source: `${opts.host}:${plan.remotePath}`, + destination: plan.localPath, + dryRun: true, + pattern: plan.pattern, + }); + const upload = await runRsync(opts, { + label: plan.name, + phase: "preflight", + direction: "upload", + source: plan.localPath, + destination: `${opts.host}:${plan.remotePath}`, + dryRun: true, + pattern: plan.pattern, + }); + return { + results: [download, upload], + downloadPaths: download.itemized, + uploadPaths: upload.itemized, + }; +} + +async function applyPair(opts: ReturnType, plan: SyncCollectionPlan, excludeFrom: string): Promise { + const download = await runRsync(opts, { + label: plan.name, + phase: "apply", + direction: "download", + source: `${opts.host}:${plan.remotePath}`, + destination: plan.localPath, + dryRun: false, + excludeFrom, + pattern: plan.pattern, + }); + const upload = await runRsync(opts, { + label: plan.name, + phase: "apply", + direction: "upload", + source: plan.localPath, + destination: `${opts.host}:${plan.remotePath}`, + dryRun: false, + excludeFrom, + pattern: plan.pattern, + }); + return [download, upload]; +} + +async function runRsync(opts: ReturnType, params: { + label: string; + phase: "preflight" | "apply"; + direction: "download" | "upload"; + source: string; + destination: string; + dryRun: boolean; + excludeFrom?: string; + pattern?: string; +}): Promise { + const args = buildRsyncArgs({ + source: params.source, + destination: params.destination, + remoteQmdUser: opts.remoteQmdUser, + dryRun: params.dryRun, + delete: opts.delete, + excludeFrom: params.excludeFrom, + tempDir: params.direction === "download" + ? `${params.destination.replace(/\/$/, "")}/.qmd-rsync-tmp` + : `${stripRemotePrefix(params.destination).replace(/\/$/, "")}/.qmd-rsync-tmp`, + includes: includePatternsForCollection(params.label === "config" ? undefined : params.pattern), + }); + if (isMissingLocalSource(params.source)) { + return { + label: params.label, + phase: params.phase, + direction: params.direction, + source: params.source, + destination: params.destination, + itemized: [], + skipped: true, + reason: `local source does not exist: ${params.source}`, + }; + } + const result = await opts.runCommand("rsync", args); + if (result.exitCode !== 0) { + return { + label: params.label, + phase: params.phase, + direction: params.direction, + source: params.source, + destination: params.destination, + itemized: [], + skipped: true, + reason: (result.stderr || result.stdout || `rsync exited ${result.exitCode}`).trim(), + }; + } + return { + label: params.label, + phase: params.phase, + direction: params.direction, + source: params.source, + destination: params.destination, + itemized: parseRsyncItemized(result.stdout), + skipped: false, + }; +} + +async function syncConflictCopies(opts: ReturnType, plan: SyncCollectionPlan, conflicts: SyncConflict[]): Promise { + for (const conflict of conflicts) { + const localSource = join(plan.localPath, conflict.path); + const localConflictDestination = join(plan.localPath, conflict.localConflictPath); + const remoteSource = `${opts.host}:${plan.remotePath}/${conflict.path}`; + const remoteConflictDestination = `${opts.host}:${plan.remotePath}/${conflict.remoteConflictPath}`; + mkdirSync(dirname(localConflictDestination), { recursive: true }); + mkdirSync(join(dirname(localConflictDestination), ".qmd-rsync-tmp"), { recursive: true }); + await opts.runCommand("rsync", buildRsyncArgs({ + source: remoteSource, + destination: localConflictDestination, + remoteQmdUser: opts.remoteQmdUser, + dryRun: false, + delete: false, + preserveFilePath: true, + tempDir: `${dirname(localConflictDestination)}/.qmd-rsync-tmp`, + })); + await ensureRemoteDirs(opts.runCommand, opts.host, opts.remoteQmdUser, [remoteDirname(`${plan.remotePath}/${conflict.remoteConflictPath}`)]); + await ensureRemoteDirs(opts.runCommand, opts.host, opts.remoteQmdUser, [`${remoteDirname(`${plan.remotePath}/${conflict.remoteConflictPath}`)}/.qmd-rsync-tmp`]); + await opts.runCommand("rsync", buildRsyncArgs({ + source: localSource, + destination: remoteConflictDestination, + remoteQmdUser: opts.remoteQmdUser, + dryRun: false, + delete: false, + preserveFilePath: true, + tempDir: `${remoteDirname(`${plan.remotePath}/${conflict.remoteConflictPath}`)}/.qmd-rsync-tmp`, + })); + } +} + +export function buildPostSyncCommands(opts: ReturnType): PostSyncResult[] { + if (!opts.update) { + if (opts.embed) { + return [{ + side: "local", + action: "embed", + command: [...opts.localQmdCommand, "embed"], + skipped: true, + reason: "--embed requires --update", + }]; + } + return []; + } + + const localUpdate = [...opts.localQmdCommand, "update"]; + const remoteUpdateCommand = "qmd update"; + const results: PostSyncResult[] = [ + { + side: "local", + action: "update", + command: localUpdate, + skipped: opts.dryRun, + ...(opts.dryRun ? { reason: "dry-run" } : {}), + }, + { + side: "remote", + action: "update", + command: ["ssh", opts.host, remoteUserCommand(opts.remoteQmdUser, remoteUpdateCommand)], + skipped: opts.dryRun, + ...(opts.dryRun ? { reason: "dry-run" } : {}), + }, + ]; + + if (opts.embed) { + results.push( + { + side: "local", + action: "embed", + command: [...opts.localQmdCommand, "embed"], + skipped: opts.dryRun, + ...(opts.dryRun ? { reason: "dry-run" } : {}), + }, + { + side: "remote", + action: "embed", + command: ["ssh", opts.host, remoteUserCommand(opts.remoteQmdUser, "qmd embed")], + skipped: opts.dryRun, + ...(opts.dryRun ? { reason: "dry-run" } : {}), + }, + ); + } + + return results; +} + +function plannedPostSync(opts: ReturnType, reason: string): PostSyncResult[] { + return buildPostSyncCommands(opts).map(result => ({ + ...result, + skipped: true, + reason: result.reason || reason, + })); +} + +async function runPostSync(opts: ReturnType): Promise { + const planned = buildPostSyncCommands(opts); + const results: PostSyncResult[] = []; + + for (const step of planned) { + if (step.skipped) { + results.push(step); + continue; + } + const [command, ...args] = step.command; + if (!command) { + results.push({ ...step, skipped: true, reason: "empty command" }); + continue; + } + const result = await opts.runCommand(command, args); + const completed: PostSyncResult = { + ...step, + exitCode: result.exitCode, + stdout: truncateOutput(result.stdout), + stderr: truncateOutput(result.stderr), + }; + results.push(completed); + if (result.exitCode !== 0) { + break; + } + } + + if (results.length < planned.length) { + for (const step of planned.slice(results.length)) { + results.push({ ...step, skipped: true, reason: "previous post-sync step failed" }); + } + } + + return results; +} + +function buildNextSteps(opts: ReturnType, summary: SyncSummary): string[] { + if (summary.failed) { + return ["Review failed rsync or post-sync steps before rerunning qmd sync."]; + } + if (opts.dryRun && opts.update) { + return ["Dry-run only; run qmd sync --update without --dry-run to refresh indexes on both sides."]; + } + if (opts.update && opts.embed) { + return ["Indexes and embeddings were refreshed on both sides."]; + } + if (opts.update) { + return ["Indexes were refreshed on both sides.", "Run qmd embed manually when vector embeddings should be refreshed."]; + } + return [ + "Run qmd update manually on each side after reviewing synced files.", + "Run qmd embed manually when vector embeddings should be refreshed.", + ]; +} + +function truncateOutput(value: string): string { + const trimmed = value.trim(); + if (trimmed.length <= 4000) return trimmed; + return `${trimmed.slice(0, 4000)}\n... truncated ...`; +} + +async function probeRemote(runCommand: CommandRunner, host: string, remoteQmdUser: string, remoteHome: string): Promise { + const command = [ + "set -eu", + "command -v rsync >/dev/null 2>&1 && echo rsync=1 || echo rsync=0", + "command -v flock >/dev/null 2>&1 && echo flock=1 || echo flock=0", + "command -v qmd >/dev/null 2>&1 && qmd --version 2>/dev/null || true", + `test -d ${shellQuote(remoteHome)} || echo missing_home=1`, + ].join("; "); + const result = await runCommand("ssh", [host, remoteUserCommand(remoteQmdUser, command)]); + if (result.exitCode !== 0) { + throw new Error(`Remote probe failed: ${(result.stderr || result.stdout).trim()}`); + } + const lines = result.stdout.split(/\r?\n/).map(line => line.trim()).filter(Boolean); + const warnings: string[] = []; + const rsync = lines.includes("rsync=1"); + const flock = lines.includes("flock=1"); + const qmdVersion = lines.find(line => line.startsWith("qmd ")); + if (!rsync) warnings.push("remote rsync is missing"); + if (!flock) warnings.push("remote flock is missing"); + if (!qmdVersion) warnings.push("remote qmd version could not be detected"); + if (lines.includes("missing_home=1")) warnings.push(`remote home does not exist: ${remoteHome}`); + return { qmdVersion, rsync, flock, warnings }; +} + +async function readRemoteConfig(runCommand: CommandRunner, host: string, remoteQmdUser: string, remoteConfigPath: string): Promise { + const command = `test -f ${shellQuote(remoteConfigPath)} && cat ${shellQuote(remoteConfigPath)} || printf 'collections: {}\\n'`; + const result = await runCommand("ssh", [host, remoteUserCommand(remoteQmdUser, command)]); + if (result.exitCode !== 0) { + throw new Error(`Remote config read failed: ${(result.stderr || result.stdout).trim()}`); + } + return result.stdout; +} + +async function ensureRemoteDirs(runCommand: CommandRunner, host: string, remoteQmdUser: string, paths: string[]): Promise { + const unique = Array.from(new Set(paths.filter(Boolean))); + if (unique.length === 0) return; + const command = `mkdir -p ${unique.map(shellQuote).join(" ")}`; + const result = await runCommand("ssh", [host, remoteUserCommand(remoteQmdUser, command)]); + if (result.exitCode !== 0) { + throw new Error(`Remote directory creation failed: ${(result.stderr || result.stdout).trim()}`); + } +} + +async function runRemoteLockProbe(runCommand: CommandRunner, host: string, remoteQmdUser: string, lockPath: string): Promise { + const command = `mkdir -p ${shellQuote(dirname(lockPath))} && (flock -n 9 || exit 75) 9>${shellQuote(lockPath)}`; + const result = await runCommand("ssh", [host, remoteUserCommand(remoteQmdUser, command)]); + if (result.exitCode !== 0) { + throw new Error(`Remote sync lock is busy or unavailable: ${(result.stderr || result.stdout).trim()}`); + } +} + +function getLocalLockDir(): string { + const cacheRoot = process.env.XDG_CACHE_HOME || join(homedir(), ".cache"); + return join(cacheRoot, "qmd", "sync.lock.d"); +} + +function acquireLocalLock(lockDir: string): void { + mkdirSync(dirname(lockDir), { recursive: true }); + if (existsSync(lockDir)) { + throw new Error(`Local sync lock is busy: ${lockDir}`); + } + mkdirSync(lockDir); + writeFileSync(join(lockDir, "owner"), `${process.pid}\n`); +} + +function releaseLocalLock(lockDir: string): void { + rmSync(lockDir, { recursive: true, force: true }); +} + +function defaultRunCommand(command: string, args: string[], options?: { cwd?: string }): Promise { + return new Promise((resolve) => { + execFile(command, args, { cwd: options?.cwd, maxBuffer: 50 * 1024 * 1024 }, (error, stdout, stderr) => { + const code = typeof (error as NodeJS.ErrnoException | null)?.code === "number" + ? Number((error as NodeJS.ErrnoException).code) + : error + ? 1 + : 0; + resolve({ stdout: String(stdout || ""), stderr: String(stderr || ""), exitCode: code }); + }); + }); +} + +function ensureTrailingSlash(path: string): string { + return path.endsWith("/") ? path : `${path}/`; +} + +function formatRsyncEndpoint(endpoint: string): string { + const colon = endpoint.indexOf(":"); + if (colon <= 0) return endpoint; + const host = endpoint.slice(0, colon); + const path = endpoint.slice(colon + 1); + if (!path || path.startsWith("'")) return endpoint; + if (/^[A-Za-z0-9_@%+=:,./-]+$/.test(path)) return endpoint; + return `${host}:${shellQuote(path)}`; +} + +function timestampForConflict(): string { + return new Date().toISOString().replace(/[-:]/g, "").replace(/\..+$/, "Z"); +} + +function remoteDirname(path: string): string { + const idx = path.lastIndexOf("/"); + return idx <= 0 ? "/" : path.slice(0, idx); +} + +function stripRemotePrefix(path: string): string { + const idx = path.indexOf(":"); + return idx >= 0 ? path.slice(idx + 1) : path; +} + +function isMissingLocalSource(path: string): boolean { + if (path.includes(":")) return false; + return !existsSync(path); +} diff --git a/test/llm.test.ts b/test/llm.test.ts index 8ee11a2..d27a8bf 100644 --- a/test/llm.test.ts +++ b/test/llm.test.ts @@ -150,7 +150,7 @@ describe("LlamaCpp expand context size config", () => { }); describe("LlamaCpp model resolution (config > env > default)", () => { - const HARDCODED_EMBED = "nvidia/llama-3.2-nv-embedqa-1b-v2"; + const HARDCODED_EMBED = "nvidia/llama-nemotron-embed-1b-v2"; const HARDCODED_RERANK = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf"; const HARDCODED_GENERATE = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf"; @@ -204,7 +204,7 @@ describe("LlamaCpp model resolution (config > env > default)", () => { const fetchMock = vi.spyOn(globalThis, "fetch").mockResolvedValue({ ok: true, json: async () => ({ - model: "nvidia/llama-3.2-nv-embedqa-1b-v2", + model: "nvidia/llama-nemotron-embed-1b-v2", data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }], }), } as Response); @@ -217,13 +217,13 @@ describe("LlamaCpp model resolution (config > env > default)", () => { })); const [, init] = fetchMock.mock.calls[0]!; expect(JSON.parse((init as RequestInit).body as string)).toEqual({ - model: "nvidia/llama-3.2-nv-embedqa-1b-v2", + model: "nvidia/llama-nemotron-embed-1b-v2", input: ["hello"], input_type: "passage", }); expect(result).toEqual({ embedding: [0.1, 0.2, 0.3], - model: "nvidia/llama-3.2-nv-embedqa-1b-v2", + model: "nvidia/llama-nemotron-embed-1b-v2", }); } finally { fetchMock.mockRestore(); @@ -248,17 +248,17 @@ describe("LlamaCpp model resolution (config > env > default)", () => { const fetchMock = vi.spyOn(globalThis, "fetch").mockResolvedValue({ ok: true, json: async () => ({ - model: "nvidia/llama-3.2-nv-embedqa-1b-v2", + model: "nvidia/llama-nemotron-embed-1b-v2", data: [{ index: 0, embedding: [0.1, 0.2, 0.3] }], }), } as Response); try { - const llm = new LlamaCpp({ embedModel: "nvidia/llama-3.2-nv-embedqa-1b-v2" }); + const llm = new LlamaCpp({ embedModel: "nvidia/llama-nemotron-embed-1b-v2" }); await llm.embed("hello", { isQuery: true }); const [, init] = fetchMock.mock.calls[0]!; expect(JSON.parse((init as RequestInit).body as string)).toEqual({ - model: "nvidia/llama-3.2-nv-embedqa-1b-v2", + model: "nvidia/llama-nemotron-embed-1b-v2", input: ["hello"], input_type: "query", }); @@ -323,7 +323,7 @@ describe("LlamaCpp model resolution (config > env > default)", () => { }); test("external embedding token counting does not load a local tokenizer", async () => { - const llm = new LlamaCpp({ embedModel: "nvidia/llama-3.2-nv-embedqa-1b-v2" }) as any; + const llm = new LlamaCpp({ embedModel: "nvidia/llama-nemotron-embed-1b-v2" }) as any; llm.ensureEmbedContext = vi.fn(async () => { throw new Error("should not load local tokenizer"); }); @@ -439,15 +439,26 @@ describe("LlamaCpp.getDeviceInfo", () => { describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { const LOCAL_EMBED_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf"; const llm = new LlamaCpp({ embedModel: LOCAL_EMBED_MODEL }); + const prevEnableLocalModels = process.env.QMD_ENABLE_LOCAL_MODELS; + const it = (name: string, fn: () => Promise | void) => test(name, fn, 30000); + + beforeAll(() => { + process.env.QMD_ENABLE_LOCAL_MODELS = "1"; + }); afterAll(async () => { // Ensure native resources are released to avoid ggml-metal asserts on process exit. await llm.dispose(); await disposeDefaultLlamaCpp(); + if (prevEnableLocalModels === undefined) { + delete process.env.QMD_ENABLE_LOCAL_MODELS; + } else { + process.env.QMD_ENABLE_LOCAL_MODELS = prevEnableLocalModels; + } }); describe("embed", () => { - test("returns embedding with correct dimensions", async () => { + it("returns embedding with correct dimensions", async () => { const result = await llm.embed("Hello world"); expect(result).not.toBeNull(); @@ -457,7 +468,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { expect(result!.embedding.length).toBe(768); }); - test("returns consistent embeddings for same input", async () => { + it("returns consistent embeddings for same input", async () => { const result1 = await llm.embed("test text"); const result2 = await llm.embed("test text"); @@ -470,7 +481,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { } }); - test("returns different embeddings for different inputs", async () => { + it("returns different embeddings for different inputs", async () => { const result1 = await llm.embed("cats are great"); const result2 = await llm.embed("database optimization"); @@ -495,7 +506,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { }); describe("embedBatch", () => { - test("returns embeddings for multiple texts", async () => { + it("returns embeddings for multiple texts", async () => { const texts = ["Hello world", "Test text", "Another document"]; const results = await llm.embedBatch(texts); @@ -506,7 +517,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { } }); - test("returns same results as individual embed calls", async () => { + it("returns same results as individual embed calls", async () => { const texts = ["cats are great", "dogs are awesome"]; // Get batch embeddings @@ -525,12 +536,12 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { } }); - test("handles empty array", async () => { + it("handles empty array", async () => { const results = await llm.embedBatch([]); expect(results).toHaveLength(0); }); - test("batch is faster than sequential", async () => { + it("batch is faster than sequential", async () => { const texts = Array(10).fill(null).map((_, i) => `Document number ${i} with content`); // Time batch @@ -550,7 +561,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { expect(batchTime).toBeLessThanOrEqual(seqTime * 3); }); - test("handles concurrent embedBatch calls on fresh instance without race condition", async () => { + it("handles concurrent embedBatch calls on fresh instance without race condition", async () => { // This test verifies the fix for a race condition where concurrent calls to // ensureEmbedContext() could create multiple contexts. Without the promise guard, // each concurrent embedBatch call sees embedContext === null and creates its own @@ -614,7 +625,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { }); describe("rerank", () => { - test("scores capital of France question correctly", async () => { + it("scores capital of France question correctly", async () => { const query = "What is the capital of France?"; const documents: RerankDocument[] = [ { file: "butterflies.txt", text: "Butterflies indeed fly through the garden." }, @@ -638,7 +649,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { expect(result.results[2]!.score).toBeLessThan(0.6); }); - test("scores authentication query correctly", async () => { + it("scores authentication query correctly", async () => { const query = "How do I configure authentication?"; const documents: RerankDocument[] = [ { file: "weather.md", text: "The weather today is sunny with mild temperatures." }, @@ -662,7 +673,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { expect(bottomTwo).toContain("pizza.md"); }); - test("handles programming queries correctly", async () => { + it("handles programming queries correctly", async () => { const query = "How do I handle errors in JavaScript?"; const documents: RerankDocument[] = [ { file: "cooking.md", text: "To make a good pasta, boil water and add salt." }, @@ -681,18 +692,18 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { expect(result.results[2]!.file).toBe("cooking.md"); }); - test("handles empty document list", async () => { + it("handles empty document list", async () => { const result = await llm.rerank("test query", []); expect(result.results).toHaveLength(0); }); - test("handles single document", async () => { + it("handles single document", async () => { const result = await llm.rerank("test", [{ file: "doc.md", text: "content" }]); expect(result.results).toHaveLength(1); expect(result.results[0]!.file).toBe("doc.md"); }); - test("preserves original file paths", async () => { + it("preserves original file paths", async () => { const documents: RerankDocument[] = [ { file: "path/to/doc1.md", text: "content one" }, { file: "another/path/doc2.md", text: "content two" }, @@ -704,7 +715,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { expect(files).toEqual(["another/path/doc2.md", "path/to/doc1.md"]); }); - test("returns scores between 0 and 1", async () => { + it("returns scores between 0 and 1", async () => { const documents: RerankDocument[] = [ { file: "a.md", text: "The quick brown fox jumps over the lazy dog." }, { file: "b.md", text: "Machine learning algorithms process data efficiently." }, @@ -719,7 +730,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { } }); - test("batch reranks multiple documents efficiently", async () => { + it("batch reranks multiple documents efficiently", async () => { // Create 10 documents to verify batch processing works const documents: RerankDocument[] = Array(10) .fill(null) @@ -744,7 +755,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { console.log(`Batch rerank of 10 docs took ${elapsed}ms`); }); - test("uses fewer active rerank contexts for small batches", async () => { + it("uses fewer active rerank contexts for small batches", async () => { const freshLlm = new LlamaCpp({}); const calls: number[] = []; const fakeModel = { @@ -772,7 +783,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { expect(calls).toEqual([0, 1]); }); - test("truncates and reranks document exceeding 2048 token context size", async () => { + it("truncates and reranks document exceeding 2048 token context size", async () => { // The reranker context is created with contextSize=2048. Documents that // exceed the token budget (contextSize - template overhead - query tokens) // should be silently truncated rather than crashing. @@ -813,7 +824,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { }); describe("expandQuery", () => { - test("returns query expansions with correct types", async () => { + it("returns query expansions with correct types", async () => { const result = await llm.expandQuery("test query"); // Result is Queryable[] containing lex, vec, and/or hyde entries @@ -826,7 +837,7 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { } }, 30000); // 30s timeout for model loading - test("can exclude lexical queries", async () => { + it("can exclude lexical queries", async () => { const result = await llm.expandQuery("authentication setup", { includeLexical: false }); // Should not contain any 'lex' type entries @@ -841,8 +852,23 @@ describe.skipIf(!!process.env.CI)("LlamaCpp Integration", () => { // ============================================================================= describe.skipIf(!!process.env.CI)("LLM Session Management", () => { + const prevEnableLocalModels = process.env.QMD_ENABLE_LOCAL_MODELS; + const it = (name: string, fn: () => Promise | void) => test(name, fn, 30000); + + beforeAll(() => { + process.env.QMD_ENABLE_LOCAL_MODELS = "1"; + }); + + afterAll(() => { + if (prevEnableLocalModels === undefined) { + delete process.env.QMD_ENABLE_LOCAL_MODELS; + } else { + process.env.QMD_ENABLE_LOCAL_MODELS = prevEnableLocalModels; + } + }); + describe("withLLMSession", () => { - test("session provides access to LLM operations", async () => { + it("session provides access to LLM operations", async () => { const result = await withLLMSession(async (session) => { expect(session.isValid).toBe(true); const embedding = await session.embed("test text"); @@ -853,7 +879,7 @@ describe.skipIf(!!process.env.CI)("LLM Session Management", () => { expect(result).toBe("success"); }); - test("session is invalid after release", async () => { + it("session is invalid after release", async () => { let capturedSession: ILLMSession | null = null; await withLLMSession(async (session) => { @@ -866,7 +892,7 @@ describe.skipIf(!!process.env.CI)("LLM Session Management", () => { expect(capturedSession!.isValid).toBe(false); }); - test("session prevents idle unload during operations", async () => { + it("session prevents idle unload during operations", async () => { await withLLMSession(async (session) => { // While inside a session, canUnloadLLM should return false expect(canUnloadLLM()).toBe(false); @@ -882,7 +908,7 @@ describe.skipIf(!!process.env.CI)("LLM Session Management", () => { expect(canUnloadLLM()).toBe(true); }); - test("nested sessions increment ref count", async () => { + it("nested sessions increment ref count", async () => { await withLLMSession(async (outerSession) => { expect(canUnloadLLM()).toBe(false); @@ -901,7 +927,7 @@ describe.skipIf(!!process.env.CI)("LLM Session Management", () => { expect(canUnloadLLM()).toBe(true); }); - test("session embedBatch works correctly", async () => { + it("session embedBatch works correctly", async () => { await withLLMSession(async (session) => { const texts = ["Hello world", "Test text", "Another document"]; const results = await session.embedBatch(texts); @@ -914,7 +940,7 @@ describe.skipIf(!!process.env.CI)("LLM Session Management", () => { }); }); - test("session rerank works correctly", async () => { + it("session rerank works correctly", async () => { await withLLMSession(async (session) => { const documents: RerankDocument[] = [ { file: "a.txt", text: "The capital of France is Paris." }, @@ -929,7 +955,7 @@ describe.skipIf(!!process.env.CI)("LLM Session Management", () => { }); }); - test("max duration aborts session after timeout", async () => { + it("max duration aborts session after timeout", async () => { let aborted = false; try { @@ -951,7 +977,7 @@ describe.skipIf(!!process.env.CI)("LLM Session Management", () => { expect(aborted).toBe(true); }, 5000); - test("external abort signal propagates to session", async () => { + it("external abort signal propagates to session", async () => { const abortController = new AbortController(); let sessionAborted = false; @@ -979,14 +1005,14 @@ describe.skipIf(!!process.env.CI)("LLM Session Management", () => { expect(sessionAborted).toBe(true); }, 5000); - test("session provides abort signal for monitoring", async () => { + it("session provides abort signal for monitoring", async () => { await withLLMSession(async (session) => { expect(session.signal).toBeInstanceOf(AbortSignal); expect(session.signal.aborted).toBe(false); }); }); - test("returns value from callback", async () => { + it("returns value from callback", async () => { const result = await withLLMSession(async (session) => { await session.embed("test"); return { status: "complete", count: 42 }; @@ -995,7 +1021,7 @@ describe.skipIf(!!process.env.CI)("LLM Session Management", () => { expect(result).toEqual({ status: "complete", count: 42 }); }); - test("propagates errors from callback", async () => { + it("propagates errors from callback", async () => { const customError = new Error("Custom test error"); await expect( diff --git a/test/sync.test.ts b/test/sync.test.ts new file mode 100644 index 0000000..591d417 --- /dev/null +++ b/test/sync.test.ts @@ -0,0 +1,364 @@ +import { afterEach, describe, expect, test } from "vitest"; +import { mkdtemp, mkdir, rm, writeFile } from "fs/promises"; +import { tmpdir } from "os"; +import { join } from "path"; +import { + buildPostSyncCommands, + buildCollectionPlans, + buildRsyncArgs, + getDefaultSyncOptions, + detectConflicts, + includePatternsForCollection, + parseConfigYaml, + parseRsyncItemized, + remoteRsyncPath, + runQmdSync, + shellQuote, + type CommandRunner, +} from "../src/sync.js"; + +const originalEnv = { ...process.env }; + +afterEach(() => { + process.env = { ...originalEnv }; +}); + +describe("qmd sync config and collection planning", () => { + test("parses empty or missing collection config", () => { + expect(parseConfigYaml("", "empty")).toEqual({ collections: {} }); + expect(parseConfigYaml("global_context: hello\n", "ctx")).toEqual({ + global_context: "hello", + collections: {}, + }); + }); + + test("builds bidirectional and one-sided mirror plans", () => { + const plans = buildCollectionPlans({ + host: "root@example.com", + remoteHome: "/home/ubuntu", + localConfig: { + collections: { + docs: { path: "/local/docs", pattern: "**/*.md" }, + localOnly: { path: "/local/only", pattern: "**/*.md" }, + }, + }, + remoteConfig: { + collections: { + docs: { path: "/remote/docs", pattern: "**/*.md" }, + remoteOnly: { path: "/remote/only", pattern: "**/*.md" }, + }, + }, + }); + + expect(plans.find(p => p.name === "docs")).toMatchObject({ + direction: "bidirectional", + localPath: "/local/docs", + remotePath: "/remote/docs", + pattern: "**/*.md", + localConfigured: true, + remoteConfigured: true, + }); + expect(plans.find(p => p.name === "remoteOnly")).toMatchObject({ + direction: "download-mirror", + remotePath: "/remote/only", + localConfigured: false, + remoteConfigured: true, + }); + expect(plans.find(p => p.name === "localOnly")).toMatchObject({ + direction: "upload-mirror", + localPath: "/local/only", + localConfigured: true, + remoteConfigured: false, + }); + }); +}); + +describe("qmd sync collection masks", () => { + test("maps markdown collection masks to rsync includes", () => { + expect(includePatternsForCollection("**/*.md")).toEqual(["*/", "*.md"]); + expect(includePatternsForCollection("**/*.txt")).toEqual([]); + }); + + test("uses include rules before exclude-all for markdown collections", () => { + const args = buildRsyncArgs({ + source: "/local/docs", + destination: "root@example.com:/remote/docs", + remoteQmdUser: "ubuntu", + includes: includePatternsForCollection("**/*.md"), + dryRun: true, + }); + + expect(args).toContain("--include"); + expect(args).toContain("*/"); + expect(args).toContain("*.md"); + const excludeAllIndex = args.findIndex((arg, index) => arg === "--exclude" && args[index + 1] === "*"); + const includeMdIndex = args.findIndex((arg) => arg === "*.md"); + expect(includeMdIndex).toBeGreaterThan(-1); + expect(excludeAllIndex).toBeGreaterThan(includeMdIndex); + }); +}); + +describe("qmd sync rsync command generation", () => { + test("quotes remote rsync path under the QMD user", () => { + expect(remoteRsyncPath("ubuntu")).toBe("sudo -u 'ubuntu' rsync"); + expect(shellQuote("a'b")).toBe("'a'\\''b'"); + }); + + test("uses resumable rsync options and remote user switching in dry-run", () => { + const args = buildRsyncArgs({ + source: "/local/docs", + destination: "root@example.com:/remote/docs", + remoteQmdUser: "ubuntu", + dryRun: true, + delete: true, + excludeFrom: "/tmp/conflicts", + }); + + expect(args).toContain("--dry-run"); + expect(args).toContain("--delete"); + expect(args).toContain("--partial"); + expect(args).toContain("--partial-dir=.qmd-rsync-partial"); + expect(args).toContain("--delay-updates"); + expect(args).not.toContain("--temp-dir"); + expect(args).toContain("--rsync-path"); + expect(args).toContain("sudo -u 'ubuntu' rsync"); + expect(args).toContain("--exclude-from"); + expect(args).toContain("/tmp/conflicts"); + expect(args.at(-2)).toBe("/local/docs/"); + expect(args.at(-1)).toBe("root@example.com:/remote/docs/"); + }); + + test("uses an explicit temp directory for apply mode", () => { + const args = buildRsyncArgs({ + source: "/local/docs", + destination: "root@example.com:/remote/docs", + remoteQmdUser: "ubuntu", + tempDir: "/remote/docs/.qmd-rsync-tmp", + }); + + expect(args).toContain("--temp-dir"); + expect(args).toContain("/remote/docs/.qmd-rsync-tmp"); + }); + + test("preserves exact file paths for conflict copies", () => { + const args = buildRsyncArgs({ + source: "/local/docs/file.md", + destination: "root@example.com:/remote/docs/file.md.conflict.local.20260525Z", + remoteQmdUser: "ubuntu", + preserveFilePath: true, + }); + + expect(args.at(-2)).toBe("/local/docs/file.md"); + expect(args.at(-1)).toBe("root@example.com:/remote/docs/file.md.conflict.local.20260525Z"); + }); + + test("shell-quotes remote endpoints with spaces without requiring modern rsync -s", () => { + const args = buildRsyncArgs({ + source: "/local/Obsidian Vault", + destination: "root@example.com:/remote/Obsidian Vault", + remoteQmdUser: "ubuntu", + dryRun: true, + }); + + expect(args).not.toContain("-s"); + expect(args.at(-2)).toBe("/local/Obsidian Vault/"); + expect(args.at(-1)).toBe("root@example.com:'/remote/Obsidian Vault/'"); + }); +}); + +describe("qmd sync dry-run parsing and conflicts", () => { + test("parses rsync itemize output into relative paths", () => { + const output = [ + ">f.st...... notes/a.md", + "cd+++++++++ new-dir/", + ">f+++++++++ new-dir/b.md", + ">f.st...... .qmd-rsync-partial/tmp", + "", + ].join("\n"); + + expect(parseRsyncItemized(output)).toEqual(["notes/a.md", "new-dir/b.md"]); + }); + + test("detects two-way modified paths and names conflict copies", () => { + const conflicts = detectConflicts( + "docs", + ["a.md", "same.md"], + ["same.md", "b.md"], + "20260525T010203Z", + ); + + expect(conflicts).toEqual([{ + collection: "docs", + path: "same.md", + localConflictPath: "same.md.conflict.remote.20260525T010203Z", + remoteConflictPath: "same.md.conflict.local.20260525T010203Z", + }]); + }); +}); + +describe("qmd sync update freshness", () => { + test("builds local and remote post-sync commands with sudo remote user", () => { + const opts = getDefaultSyncOptions({ + host: "root@example.com", + remoteQmdUser: "ubuntu", + update: true, + embed: true, + localQmdCommand: ["bun", "src/cli/qmd.ts"], + }); + + expect(buildPostSyncCommands(opts).map(step => ({ + side: step.side, + action: step.action, + command: step.command, + skipped: step.skipped, + }))).toEqual([ + { side: "local", action: "update", command: ["bun", "src/cli/qmd.ts", "update"], skipped: false }, + { side: "remote", action: "update", command: ["ssh", "root@example.com", "sudo -u 'ubuntu' sh -lc 'qmd update'"], skipped: false }, + { side: "local", action: "embed", command: ["bun", "src/cli/qmd.ts", "embed"], skipped: false }, + { side: "remote", action: "embed", command: ["ssh", "root@example.com", "sudo -u 'ubuntu' sh -lc 'qmd embed'"], skipped: false }, + ]); + }); + + test("dry-run --update plans update commands without executing them", async () => { + const env = await createSyncTestEnv(); + const calls: Array<{ command: string; args: string[] }> = []; + const summary = await runQmdSync({ + host: "root@example.com", + remoteQmdUser: "ubuntu", + remoteHome: "/home/ubuntu", + dryRun: true, + update: true, + localQmdCommand: ["qmd-test"], + runCommand: fakeRunner(calls), + }); + + expect(summary.failed).toBe(false); + expect(summary.postSync).toHaveLength(2); + expect(summary.postSync.every(step => step.skipped && step.reason === "dry-run")).toBe(true); + expect(calls.some(call => call.command === "qmd-test")).toBe(false); + expect(calls.some(call => call.command === "ssh" && call.args.join(" ").includes("qmd update"))).toBe(false); + await env.cleanup(); + }); + + test("--collection limits sync to collection paths and skips config apply", async () => { + const env = await createSyncTestEnv(); + const calls: Array<{ command: string; args: string[] }> = []; + const summary = await runQmdSync({ + host: "root@example.com", + remoteQmdUser: "ubuntu", + remoteHome: "/home/ubuntu", + collection: ["docs"], + runCommand: fakeRunner(calls), + }); + + expect(summary.rsync.map(result => result.label)).toEqual(["docs", "docs", "docs", "docs"]); + expect(calls + .filter(call => call.command === "rsync") + .some(call => call.args.join(" ").includes(".config/qmd"))).toBe(false); + await env.cleanup(); + }); + + test("apply rsync failure marks sync failed and skips update/embed", async () => { + const env = await createSyncTestEnv(); + const calls: Array<{ command: string; args: string[] }> = []; + const summary = await runQmdSync({ + host: "root@example.com", + remoteQmdUser: "ubuntu", + remoteHome: "/home/ubuntu", + update: true, + embed: true, + localQmdCommand: ["qmd-test"], + runCommand: fakeRunner(calls, { failApplyRsync: true }), + }); + + expect(summary.failed).toBe(true); + expect(summary.postSync).toHaveLength(4); + expect(summary.postSync.every(step => step.skipped && step.reason === "sync failed; update/embed not run")).toBe(true); + expect(calls.some(call => call.command === "qmd-test")).toBe(false); + await env.cleanup(); + }); + + test("successful apply runs local and remote update before embed", async () => { + const env = await createSyncTestEnv(); + const calls: Array<{ command: string; args: string[] }> = []; + const summary = await runQmdSync({ + host: "root@example.com", + remoteQmdUser: "ubuntu", + remoteHome: "/home/ubuntu", + update: true, + embed: true, + localQmdCommand: ["qmd-test"], + runCommand: fakeRunner(calls), + }); + + expect(summary.failed).toBe(false); + expect(summary.postSync.map(step => `${step.side}:${step.action}:${step.exitCode}`)).toEqual([ + "local:update:0", + "remote:update:0", + "local:embed:0", + "remote:embed:0", + ]); + const executed = calls + .filter(call => call.command === "qmd-test" || call.args.join(" ").includes("qmd update") || call.args.join(" ").includes("qmd embed")) + .map(call => [call.command, ...call.args].join(" ")); + expect(executed).toEqual([ + "qmd-test update", + "ssh root@example.com sudo -u 'ubuntu' sh -lc 'qmd update'", + "qmd-test embed", + "ssh root@example.com sudo -u 'ubuntu' sh -lc 'qmd embed'", + ]); + await env.cleanup(); + }); +}); + +async function createSyncTestEnv(): Promise<{ cleanup: () => Promise }> { + const root = await mkdtemp(join(tmpdir(), "qmd-sync-test-")); + const configDir = join(root, "config"); + const cacheDir = join(root, "cache"); + const dataDir = join(root, "data"); + const docsDir = join(root, "docs"); + await mkdir(configDir, { recursive: true }); + await mkdir(cacheDir, { recursive: true }); + await mkdir(dataDir, { recursive: true }); + await mkdir(docsDir, { recursive: true }); + await writeFile(join(docsDir, "local.md"), "# Local\n"); + await writeFile(join(configDir, "index.yml"), `collections:\n docs:\n path: ${JSON.stringify(docsDir)}\n pattern: "**/*.md"\n`); + process.env.QMD_CONFIG_DIR = configDir; + process.env.XDG_CACHE_HOME = cacheDir; + process.env.XDG_DATA_HOME = dataDir; + return { + cleanup: async () => { + await rm(root, { recursive: true, force: true }); + }, + }; +} + +function fakeRunner( + calls: Array<{ command: string; args: string[] }>, + options: { failApplyRsync?: boolean } = {}, +): CommandRunner { + return async (command, args) => { + calls.push({ command, args }); + if (command === "ssh") { + const remoteCommand = args.join(" "); + if (remoteCommand.includes("command -v rsync")) { + return { exitCode: 0, stdout: "rsync=1\nflock=1\nqmd 2.1.0\n", stderr: "" }; + } + if (remoteCommand.includes("cat") && remoteCommand.includes("index.yml")) { + return { exitCode: 0, stdout: "collections:\n docs:\n path: /remote/docs\n pattern: \"**/*.md\"\n", stderr: "" }; + } + return { exitCode: 0, stdout: "remote ok\n", stderr: "" }; + } + if (command === "rsync") { + const isDryRun = args.includes("--dry-run"); + if (!isDryRun && options.failApplyRsync) { + return { exitCode: 23, stdout: "", stderr: "rsync failed" }; + } + return { exitCode: 0, stdout: "", stderr: "" }; + } + if (command === "qmd-test") { + return { exitCode: 0, stdout: `${args[0]} ok\n`, stderr: "" }; + } + return { exitCode: 0, stdout: "", stderr: "" }; + }; +}