From d9348f43a09b7518dd814d74f80c623195c1b430 Mon Sep 17 00:00:00 2001 From: Tobi Lutke Date: Tue, 19 May 2026 14:27:33 -0400 Subject: [PATCH] feat: add local init and doctor diagnostics --- src/cli/qmd.ts | 157 ++++++++++++++++++++------ src/collections.ts | 3 +- src/llm.ts | 193 +++++++++++++++++++++++++------- test/cli.test.ts | 146 +++++++++++++++++++++++- test/collections-config.test.ts | 15 ++- test/llm.test.ts | 57 ++++++++++ 6 files changed, 488 insertions(+), 83 deletions(-) diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts index 11e8e7c..8851fdb 100755 --- a/src/cli/qmd.ts +++ b/src/cli/qmd.ts @@ -5,7 +5,7 @@ import { execSync, spawn as nodeSpawn } from "child_process"; import { fileURLToPath } from "url"; import { basename, dirname, join as pathJoin, relative as relativePath, resolve as pathResolve } from "path"; import { parseArgs } from "util"; -import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, readSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs"; +import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs"; import { createInterface } from "readline/promises"; import { getPwd, @@ -81,7 +81,7 @@ import { type ReindexResult, type ChunkStrategy, } from "../store.js"; -import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels } from "../llm.js"; +import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels, inspectGgufFile } from "../llm.js"; import { formatSearchResults, formatDocuments, @@ -107,6 +107,8 @@ import { getLocalDbPath, getConfigPath, configExists, + type CollectionConfig, + type ModelsConfig, } from "../collections.js"; // NOTE: enableProductionMode() is intentionally NOT called at module scope here. @@ -393,6 +395,47 @@ function formatBytes(bytes: number): string { return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`; } +function sameDirectory(a: string, b: string): boolean { + try { + return realpathSync(a) === realpathSync(b); + } catch { + return pathResolve(a) === pathResolve(b); + } +} + +function initLocalIndex(): void { + const cwd = getPwd(); + if (sameDirectory(cwd, homedir())) { + throw new Error("Refusing to initialize a local index in $HOME. The global index is automatically created; run `qmd collection add ` for the global index, or run `qmd init` inside a project folder."); + } + + const qmdDir = pathJoin(cwd, ".qmd"); + const ymlPath = pathJoin(qmdDir, "index.yml"); + const yamlPath = pathJoin(qmdDir, "index.yaml"); + const configPath = existsSync(yamlPath) ? yamlPath : ymlPath; + const dbPath = pathJoin(qmdDir, "index.sqlite"); + + mkdirSync(qmdDir, { recursive: true }); + setConfigSource({ configPath }); + storeDbPathOverride = dbPath; + closeDb(); + + if (!existsSync(configPath)) { + saveConfig({ + collections: {}, + models: resolveModels(), + }); + } else { + ensureModelsConfiguredForCli(); + } + + const localStore = createStore(dbPath); + syncConfigToDb(localStore.db, loadConfig()); + localStore.close(); + + console.log("ready to go with new local index"); +} + function isForceCpuEnabled(): boolean { const value = process.env.QMD_FORCE_CPU; return !!value && !["false", "off", "none", "disable", "disabled", "0"].includes(value.trim().toLowerCase()); @@ -3183,6 +3226,7 @@ function showHelp(): void { console.log(" qmd ls [collection[/path]] - Inspect indexed files"); console.log(""); console.log("Maintenance:"); + console.log(" qmd init - Create a project-local .qmd index"); console.log(" qmd status - View index + collection health"); console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)"); console.log(" qmd embed [-f] [-c ] - Generate/refresh vector embeddings"); @@ -3314,35 +3358,35 @@ function cosineDistance(a: ArrayLike, b: ArrayLike): number { return 1 - (dot / (Math.sqrt(normA) * Math.sqrt(normB))); } -function isGgufFile(path: string): boolean { - if (!existsSync(path)) return false; - let fd: number | null = null; - try { - fd = openSync(path, "r"); - const header = Buffer.alloc(4); - readSync(fd, header, 0, 4, 0); - return header.toString("utf-8") === "GGUF"; - } catch { - return false; - } finally { - if (fd !== null) closeSync(fd); - } +type CachedModelInspection = { + path: string | null; + invalid: string[]; +}; + +function formatModelDiagnosticPath(path: string): string { + return sanitizeDiagnosticMessage(path); } -function findCachedModelPath(model: string): string | null { +function findCachedModelInspection(model: string): CachedModelInspection { + const invalid: string[] = []; if (model.startsWith("hf:")) { const filename = model.split("/").pop(); - if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR)) return null; + if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR)) return { path: null, invalid }; const entries = readdirSync(DEFAULT_MODEL_CACHE_DIR, { withFileTypes: true }); for (const entry of entries) { if (!entry.isFile() || !entry.name.includes(filename)) continue; const candidate = pathJoin(DEFAULT_MODEL_CACHE_DIR, entry.name); - if (isGgufFile(candidate)) return candidate; + const inspection = inspectGgufFile(candidate); + if (inspection.valid) return { path: candidate, invalid }; + invalid.push(`${formatModelDiagnosticPath(candidate)}: ${inspection.details}`); } - return null; + return { path: null, invalid }; } - return existsSync(model) && isGgufFile(model) ? model : null; + const inspection = inspectGgufFile(model); + if (inspection.valid) return { path: model, invalid }; + if (inspection.exists) invalid.push(`${formatModelDiagnosticPath(model)}: ${inspection.details}`); + return { path: null, invalid }; } type EnvOverride = { @@ -3356,8 +3400,7 @@ function envValueForDisplay(value: string): string { return sanitized.length > 96 ? `${sanitized.slice(0, 93)}...` : sanitized; } -function collectEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }): EnvOverride[] { - const configModels = loadConfig().models ?? {}; +function collectEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): EnvOverride[] { const overrides: EnvOverride[] = []; const add = (name: string, consequence: string) => { const raw = process.env[name]?.trim(); @@ -3401,8 +3444,33 @@ function collectEnvironmentOverrides(activeModels: { embed: string; generate: st return overrides; } -function checkEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }): void { - const overrides = collectEnvironmentOverrides(activeModels); +type DoctorConfigCheck = { + config: CollectionConfig | null; + valid: boolean; +}; + +function checkDoctorIndexConfig(nextSteps: string[]): DoctorConfigCheck { + try { + const config = loadConfig(); + const collectionCount = Object.keys(config.collections ?? {}).length; + if (collectionCount === 0) { + doctorCheck("index config", false, "no collections configured. Next: `qmd collection add .`"); + nextSteps.push("Run `qmd collection add . --name ` from the folder you want to index, or edit .qmd/index.yml manually."); + } else { + doctorCheck("index config", true, `${formatCount(collectionCount)} ${collectionCount === 1 ? "collection" : "collections"} configured`); + } + return { config, valid: true }; + } catch (error) { + const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error)); + const configPath = getConfigPath(); + doctorCheck("index config", false, `invalid index.yml at ${configPath}: ${message}. Next: fix the YAML and rerun \`qmd doctor\``); + nextSteps.push(`Fix invalid YAML in ${configPath}, then rerun \`qmd doctor\`.`); + return { config: null, valid: false }; + } +} + +function checkEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): void { + const overrides = collectEnvironmentOverrides(activeModels, configModels); if (overrides.length === 0) { doctorCheck("environment overrides", true, "none"); return; @@ -3414,8 +3482,7 @@ function checkEnvironmentOverrides(activeModels: { embed: string; generate: stri } } -function checkModelDefaults(activeModels: { embed: string; generate: string; rerank: string }, _nextSteps: string[]): void { - const configModels = loadConfig().models ?? {}; +function checkModelDefaults(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): void { const checks = [ { role: "embedding", key: "embed", active: activeModels.embed, configured: configModels.embed, defaultModel: DEFAULT_EMBED_MODEL, envName: "QMD_EMBED_MODEL", envValue: process.env.QMD_EMBED_MODEL }, { role: "generation", key: "generate", active: activeModels.generate, configured: configModels.generate, defaultModel: DEFAULT_QUERY_MODEL, envName: "QMD_GENERATE_MODEL", envValue: process.env.QMD_GENERATE_MODEL }, @@ -3455,20 +3522,33 @@ function checkModelCache(activeModels: { embed: string; generate: string; rerank const missing: string[] = []; const cached: string[] = []; + const invalid: string[] = []; for (const [model, roles] of unique) { const label = `${roles.join("+")}: ${model}`; - const path = findCachedModelPath(model); - if (path) { + const inspection = findCachedModelInspection(model); + invalid.push(...inspection.invalid.map(detail => `${label} (${detail})`)); + if (inspection.path) { cached.push(label); } else { missing.push(label); } } - if (missing.length === 0) { - doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded`); + if (missing.length === 0 && invalid.length === 0) { + doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded and valid GGUF`); + return; + } + + const parts: string[] = []; + if (invalid.length > 0) parts.push(`invalid ${invalid.length}: ${invalid.join("; ")}`); + if (missing.length > 0) parts.push(`missing ${missing.length}/${unique.size}: ${missing.join("; ")}`); + const next = invalid.length > 0 + ? "Next: run `qmd pull --refresh` (or remove the bad cached file)" + : "Next: run `qmd pull`"; + doctorCheck("model cache", false, `${parts.join("; ")}. ${next}`); + if (invalid.length > 0) { + nextSteps.push("Run `qmd pull --refresh` to replace invalid cached model files, or delete the listed file and rerun `qmd pull`."); } else { - doctorCheck("model cache", false, `missing ${missing.length}/${unique.size}: ${missing.join("; ")}. Next: run \`qmd pull\``); nextSteps.push("Run `qmd pull` to download missing embedding/generation/reranking models before `qmd embed` or `qmd query`."); } } @@ -3624,8 +3704,10 @@ async function showDoctor(): Promise { doctorCheck("sqlite-vec", false, error instanceof Error ? error.message : String(error)); } - checkEnvironmentOverrides(activeModels); - checkModelDefaults(activeModels, nextSteps); + const configCheck = checkDoctorIndexConfig(nextSteps); + const configModels = configCheck.config?.models ?? {}; + checkEnvironmentOverrides(activeModels, configModels); + checkModelDefaults(activeModels, configModels); checkModelCache(activeModels, nextSteps); await runDoctorDeviceChecks(nextSteps); @@ -4015,6 +4097,15 @@ if (isMain) { break; } + case "init": + try { + initLocalIndex(); + } catch (error) { + console.error(error instanceof Error ? error.message : String(error)); + process.exit(1); + } + break; + case "status": await showStatus(); break; diff --git a/src/collections.ts b/src/collections.ts index b3da4a4..6950493 100644 --- a/src/collections.ts +++ b/src/collections.ts @@ -187,7 +187,8 @@ export function loadConfig(): CollectionConfig { try { const content = readFileSync(configPath, "utf-8"); - const config = YAML.parse(content) as CollectionConfig; + const parsed = YAML.parse(content) as CollectionConfig | null | undefined; + const config = parsed ?? { collections: {} }; // Ensure collections object exists if (!config.collections) { diff --git a/src/llm.ts b/src/llm.ts index 3047b20..656895a 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -32,6 +32,7 @@ export function setNodeLlamaCppModuleForTest(module: NodeLlamaCppModule | null): nodeLlamaCppImport = module ? Promise.resolve(module) : null; failedGpuInitModes.clear(); noGpuAccelerationWarningShown = false; + cpuForcedPrebuiltFallbackWarningShown = false; } type StdoutWrite = typeof process.stdout.write; @@ -324,37 +325,106 @@ async function getRemoteEtag(ref: HfRef): Promise { const GGUF_MAGIC = Buffer.from("GGUF"); +export type GgufFileInspection = { + exists: boolean; + valid: boolean; + kind: "missing" | "gguf" | "html" | "invalid"; + sizeBytes?: number; + magic?: string; + details: string; +}; + +function formatModelFileSize(sizeBytes: number): string { + return `${(sizeBytes / 1024).toFixed(0)} KB`; +} + +function printableMagic(header: Buffer): string { + const text = header.toString("utf-8"); + return /^[\x20-\x7e]{1,4}$/.test(text) ? text : `0x${header.toString("hex")}`; +} + +/** + * Inspect a potential GGUF model file without mutating it. + * Used by doctor for early diagnostics and by runtime validation before load. + */ +export function inspectGgufFile(filePath: string): GgufFileInspection { + if (!existsSync(filePath)) { + return { exists: false, valid: false, kind: "missing", details: "file does not exist" }; + } + + let sizeBytes = 0; + try { + sizeBytes = statSync(filePath).size; + const fd = openSync(filePath, "r"); + const sniff = Buffer.alloc(512); + try { + readSync(fd, sniff, 0, 512, 0); + } finally { + closeSync(fd); + } + + const header = sniff.subarray(0, 4); + if (header.equals(GGUF_MAGIC)) { + return { + exists: true, + valid: true, + kind: "gguf", + sizeBytes, + magic: "GGUF", + details: `valid GGUF (${formatModelFileSize(sizeBytes)})`, + }; + } + + const magic = printableMagic(header); + const text = sniff.toString("utf-8").toLowerCase(); + const isHtml = text.includes("(); let noGpuAccelerationWarningShown = false; +let cpuForcedPrebuiltFallbackWarningShown = false; + +function isCpuModeRequested(): boolean { + return resolveLlamaGpuMode() === false; +} export class LlamaCpp implements LLM { private readonly _ciMode = !!process.env.CI; @@ -765,22 +840,44 @@ export class LlamaCpp implements LLM { const gpuMode = resolveLlamaGpuMode(); const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp(); - const loadLlama = async (gpu: LlamaGpuMode) => + const loadLlama = async (gpu: LlamaGpuMode, sourceBuildAllowed = allowBuild) => await withNativeStdoutRedirectedToStderr(() => getLlama({ - build: allowBuild ? "autoAttempt" : "never", + // Prefer packaged prebuilt bindings before compiling llama.cpp locally. + // "autoAttempt" can try to compile a missing requested backend before + // falling back to another prebuilt backend; "auto" uses prebuilt/local + // binaries first and only builds when none are usable. + build: sourceBuildAllowed ? "auto" : "never", logLevel: LlamaLogLevel.error, gpu, - skipDownload: !allowBuild, + progressLogs: false, + skipDownload: !sourceBuildAllowed, })); + const loadCpuCompatibleLlama = async () => { + try { + return await loadLlama(false, false); + } catch (err) { + // Some platforms, notably Apple Silicon, ship a Metal prebuilt but no + // CPU-only prebuilt. Do a fast no-build lookup for an actual CPU + // binding first; if it does not exist, use the packaged auto/Metal + // binding and disable model offloading via gpuLayers: 0. + if (!cpuForcedPrebuiltFallbackWarningShown) { + cpuForcedPrebuiltFallbackWarningShown = true; + process.stderr.write( + `QMD Warning: CPU-only llama.cpp prebuilt not available (${err instanceof Error ? err.message : String(err)}); using packaged backend with GPU offloading disabled.\n` + ); + } + return await loadLlama("auto", false); + } + }; let llama: Llama; - if (gpuMode === false || failedGpuInitModes.has(gpuMode)) { - if (gpuMode !== false && failedGpuInitModes.has(gpuMode)) { - process.stderr.write( - `QMD Warning: skipping previously failed GPU init${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`}, using CPU.\n` - ); - } - llama = await loadLlama(false); + if (gpuMode === false) { + llama = await loadCpuCompatibleLlama(); + } else if (failedGpuInitModes.has(gpuMode)) { + process.stderr.write( + `QMD Warning: skipping previously failed GPU init${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`}, using CPU.\n` + ); + llama = await loadCpuCompatibleLlama(); } else { try { llama = await loadLlama(gpuMode); @@ -792,7 +889,7 @@ export class LlamaCpp implements LLM { process.stderr.write( `QMD Warning: GPU init failed${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`} (${err instanceof Error ? err.message : String(err)}), falling back to CPU.\n` ); - llama = await loadLlama(false); + llama = await loadCpuCompatibleLlama(); } } @@ -807,6 +904,17 @@ export class LlamaCpp implements LLM { return this.llama; } + private isCpuOffloadForced(): boolean { + return isCpuModeRequested(); + } + + private modelLoadOptions(modelPath: string): { modelPath: string; gpuLayers?: number } { + return { + modelPath, + ...(this.isCpuOffloadForced() ? { gpuLayers: 0 } : {}), + }; + } + /** * Resolve a model URI to a local path, downloading if needed. * Validates the downloaded file is actually a GGUF model (not an HTML error page @@ -835,7 +943,7 @@ export class LlamaCpp implements LLM { this.embedModelLoadPromise = (async () => { const llama = await this.ensureLlama(); const modelPath = await this.resolveModel(this.embedModelUri); - const model = await llama.loadModel({ modelPath }); + const model = await llama.loadModel(this.modelLoadOptions(modelPath)); this.embedModel = model; // Model loading counts as activity - ping to keep alive this.touchActivity(); @@ -861,7 +969,7 @@ export class LlamaCpp implements LLM { private async computeParallelism(perContextMB: number): Promise { const llama = await this.ensureLlama(); - if (llama.gpu) { + if (!this.isCpuOffloadForced() && llama.gpu) { try { const vram = await llama.getVramState(); const freeMB = vram.free / (1024 * 1024); @@ -886,7 +994,7 @@ export class LlamaCpp implements LLM { */ private async threadsPerContext(parallelism: number): Promise { const llama = await this.ensureLlama(); - if (llama.gpu) return 0; // GPU: let the library decide + if (!this.isCpuOffloadForced() && llama.gpu) return 0; // GPU: let the library decide const cores = llama.cpuMathCores || 4; return Math.max(1, Math.floor(cores / parallelism)); } @@ -954,7 +1062,7 @@ export class LlamaCpp implements LLM { this.generateModelLoadPromise = (async () => { const llama = await this.ensureLlama(); const modelPath = await this.resolveModel(this.generateModelUri); - const model = await llama.loadModel({ modelPath }); + const model = await llama.loadModel(this.modelLoadOptions(modelPath)); this.generateModel = model; return model; })(); @@ -986,7 +1094,7 @@ export class LlamaCpp implements LLM { this.rerankModelLoadPromise = (async () => { const llama = await this.ensureLlama(); const modelPath = await this.resolveModel(this.rerankModelUri); - const model = await llama.loadModel({ modelPath }); + const model = await llama.loadModel(this.modelLoadOptions(modelPath)); this.rerankModel = model; // Model loading counts as activity - ping to keep alive this.touchActivity(); @@ -1489,17 +1597,18 @@ export class LlamaCpp implements LLM { cpuCores: number; }> { const llama = await this.ensureLlama(options.allowBuild ?? true); - const gpuDevices = await llama.getGpuDeviceNames(); + const cpuForced = this.isCpuOffloadForced(); + const gpuDevices = cpuForced ? [] : await llama.getGpuDeviceNames(); let vram: { total: number; used: number; free: number } | undefined; - if (llama.gpu) { + if (!cpuForced && llama.gpu) { try { const state = await llama.getVramState(); vram = { total: state.total, used: state.used, free: state.free }; } catch { /* no vram info */ } } return { - gpu: llama.gpu, - gpuOffloading: llama.supportsGpuOffloading, + gpu: cpuForced ? false : llama.gpu, + gpuOffloading: !cpuForced && llama.supportsGpuOffloading, gpuDevices, vram, cpuCores: llama.cpuMathCores, diff --git a/test/cli.test.ts b/test/cli.test.ts index 740f447..0d723af 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -16,6 +16,7 @@ import { setTimeout as sleep } from "timers/promises"; import { buildEditorUri, termLink, resolveEmbedModelForCli } from "../src/cli/qmd.ts"; import { openDatabase } from "../src/db.ts"; import { DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI } from "../src/llm.ts"; +import { setConfigSource } from "../src/collections.ts"; // Test fixtures directory and database path let testDir: string; @@ -311,13 +312,15 @@ describe("CLI Skills", () => { }); describe("CLI Embed", () => { - test("prefers QMD_EMBED_MODEL for qmd embed", () => { + test("prefers QMD_EMBED_MODEL for qmd embed when the index has no model pin", () => { const prev = process.env.QMD_EMBED_MODEL; process.env.QMD_EMBED_MODEL = "hf:env/embed-model.gguf"; + setConfigSource({ config: { collections: {} } }); try { expect(resolveEmbedModelForCli()).toBe("hf:env/embed-model.gguf"); } finally { + setConfigSource(); if (prev === undefined) delete process.env.QMD_EMBED_MODEL; else process.env.QMD_EMBED_MODEL = prev; } @@ -326,10 +329,12 @@ describe("CLI Embed", () => { test("falls back to the default embed model when QMD_EMBED_MODEL is unset", () => { const prev = process.env.QMD_EMBED_MODEL; delete process.env.QMD_EMBED_MODEL; + setConfigSource({ config: { collections: {} } }); try { expect(resolveEmbedModelForCli()).toBe(DEFAULT_EMBED_MODEL_URI); } finally { + setConfigSource(); if (prev === undefined) delete process.env.QMD_EMBED_MODEL; else process.env.QMD_EMBED_MODEL = prev; } @@ -429,6 +434,36 @@ describe("CLI Skill Commands", () => { }); }); +describe("CLI Init Command", () => { + test("creates a project-local .qmd index", async () => { + const projectDir = join(testDir, "init-project"); + await mkdir(projectDir, { recursive: true }); + + const { stdout, exitCode } = await runQmd(["init"], { cwd: projectDir }); + expect(exitCode).toBe(0); + expect(stdout.trim()).toBe("ready to go with new local index"); + expect(existsSync(join(projectDir, ".qmd", "index.yml"))).toBe(true); + expect(existsSync(join(projectDir, ".qmd", "index.sqlite"))).toBe(true); + const configText = readFileSync(join(projectDir, ".qmd", "index.yml"), "utf-8"); + expect(configText).toContain("collections: {}"); + expect(configText).toContain("models:"); + }); + + test("refuses to initialize in HOME", async () => { + const fakeHome = join(testDir, "init-home"); + await mkdir(fakeHome, { recursive: true }); + + const { stderr, exitCode } = await runQmd(["init"], { + cwd: fakeHome, + env: { HOME: fakeHome }, + }); + expect(exitCode).toBe(1); + expect(stderr).toContain("Refusing to initialize a local index in $HOME"); + expect(stderr).toContain("global index is automatically created"); + expect(existsSync(join(fakeHome, ".qmd", "index.yml"))).toBe(false); + }); +}); + describe("CLI Add Command", () => { test("adds files from current directory", async () => { const { stdout, exitCode } = await runQmd(["collection", "add", "."]); @@ -491,7 +526,28 @@ describe("CLI Status Command", () => { expect(configText).toContain(DEFAULT_EMBED_MODEL_URI); expect(configText).toContain(DEFAULT_GENERATE_MODEL_URI); expect(configText).toContain(DEFAULT_RERANK_MODEL_URI); - }); + }, 20000); + + test("qmd doctor warns when no collections are configured", async () => { + const env = await createIsolatedTestEnv("doctor-no-collections"); + const { stdout, exitCode } = await runQmd(["doctor"], { dbPath: env.dbPath, configDir: env.configDir }); + expect(exitCode).toBe(0); + expect(stdout).toContain("index config"); + expect(stdout).toContain("no collections configured"); + expect(stdout).toContain("qmd collection add ."); + }, 20000); + + test("qmd doctor reports invalid index.yml without crashing", async () => { + const env = await createIsolatedTestEnv("doctor-invalid-config"); + await writeFile(join(env.configDir, "index.yml"), "collections:\n bad: [unterminated\n"); + + const { stdout, exitCode } = await runQmd(["doctor"], { dbPath: env.dbPath, configDir: env.configDir }); + expect(exitCode).toBe(0); + expect(stdout).toContain("index config"); + expect(stdout).toContain("invalid index.yml at"); + expect(stdout).toContain(join(env.configDir, "index.yml")); + expect(stdout).toContain("fix the YAML"); + }, 20000); test("qmd doctor warns when configured models differ from code defaults", async () => { const env = await createIsolatedTestEnv("doctor-custom-models"); @@ -504,7 +560,32 @@ describe("CLI Status Command", () => { expect(stdout).toContain("index hf:example/custom-embed/custom.gguf"); expect(stdout).toContain("might be ok"); expect(stdout).toContain("qmd pull"); - }); + }, 20000); + + test("qmd doctor identifies cached non-GGUF model files", async () => { + const env = await createIsolatedTestEnv("doctor-invalid-model-cache"); + const model = "hf:example/custom-model/custom.gguf"; + await writeFile(join(env.configDir, "index.yml"), `collections: {}\nmodels:\n embed: ${model}\n generate: ${model}\n rerank: ${model}\n`); + const cacheRoot = join(env.configDir, "cache"); + const modelCacheDir = join(cacheRoot, "qmd", "models"); + await mkdir(modelCacheDir, { recursive: true }); + const badModelPath = join(modelCacheDir, "custom.gguf"); + await writeFile(badModelPath, "blocked"); + + const { stdout, exitCode } = await runQmd(["doctor"], { + dbPath: env.dbPath, + configDir: env.configDir, + env: { + XDG_CACHE_HOME: cacheRoot, + QMD_DOCTOR_DEVICE_PROBE: "0", + }, + }); + expect(exitCode).toBe(0); + expect(stdout).toContain("model cache"); + expect(stdout).toContain("invalid 1"); + expect(stdout).toContain("HTML page, not a GGUF model"); + expect(stdout).toContain("qmd pull --refresh"); + }, 20000); test("qmd doctor says when models are overridden by env", async () => { const env = await createIsolatedTestEnv("doctor-env-models"); @@ -523,7 +604,60 @@ describe("CLI Status Command", () => { expect(stdout).toContain("environment overrides"); expect(stdout).toContain(`QMD_EMBED_MODEL=${customEmbed}`); expect(stdout).toContain("sets the active embed model"); - }); + }, 20000); + + test("qmd doctor shows CPU-forced device mode with QMD_FORCE_CPU=1", async () => { + const env = await createIsolatedTestEnv("doctor-force-cpu"); + const { stdout, exitCode } = await runQmd(["doctor"], { + dbPath: env.dbPath, + configDir: env.configDir, + env: { + QMD_FORCE_CPU: "1", + QMD_DOCTOR_DEVICE_PROBE: "0", + }, + }); + expect(exitCode).toBe(0); + expect(stdout).toContain("QMD_FORCE_CPU=1"); + expect(stdout).toContain("forces llama.cpp to bypass GPU backends"); + expect(stdout).toContain("device mode: CPU forced (QMD_FORCE_CPU)"); + }, 20000); + + test("qmd doctor lists known environment overrides and consequences", async () => { + const env = await createIsolatedTestEnv("doctor-env-overrides"); + const overrides = { + XDG_CACHE_HOME: join(env.configDir, "cache"), + QMD_DOCTOR_DEVICE_PROBE: "0", + QMD_STATUS_DEVICE_PROBE: "1", + QMD_FORCE_CPU: "1", + QMD_LLAMA_GPU: "metal", + QMD_EMBED_PARALLELISM: "2", + QMD_EXPAND_CONTEXT_SIZE: "4096", + QMD_RERANK_CONTEXT_SIZE: "8192", + QMD_EMBED_CONTEXT_SIZE: "1024", + QMD_EDITOR_URI: "vscode://file/{file}:{line}:{col}", + QMD_SKILLS_DIR: "/tmp/qmd-skills", + QMD_DISABLE_DARWIN_QUERY_JSON_SAFE_EXIT: "1", + NO_COLOR: "1", + CI: "1", + HF_ENDPOINT: "https://hf-mirror.com", + WSL_DISTRO_NAME: "Ubuntu", + WSL_INTEROP: "1", + }; + + const { stdout, exitCode } = await runQmd(["doctor"], { + dbPath: env.dbPath, + configDir: env.configDir, + env: overrides, + }); + expect(exitCode).toBe(0); + for (const name of Object.keys(overrides)) { + expect(stdout).toContain(name); + } + expect(stdout).toContain("forces llama.cpp to bypass GPU backends"); + expect(stdout).toContain("moves the default index cache"); + expect(stdout).toContain("disables real LLM operations"); + expect(stdout).toContain("changes Hugging Face download endpoint"); + }, 20000); test("qmd doctor flags mixed embedding fingerprints", async () => { const db = openDatabase(testDbPath); @@ -538,7 +672,7 @@ describe("CLI Status Command", () => { expect(exitCode).toBe(0); expect(stdout).toContain("embedding fingerprints"); expect(stdout).toContain("stale1"); - }); + }, 20000); test("shows index status", async () => { const { stdout, exitCode } = await runQmd(["status"]); @@ -1620,7 +1754,7 @@ describe("status and collection list hide filesystem paths", () => { const lines = stdout.split('\n').filter(l => !l.includes('Index:')); const pathLines = lines.filter(l => l.includes('/Users/') || l.includes('/home/') || l.includes('/tmp/')); expect(pathLines.length).toBe(0); - }); + }, 20000); test("collection list does not show full filesystem paths", async () => { const { stdout, exitCode } = await runQmd(["collection", "list"], { dbPath: localDbPath, configDir: localConfigDir }); diff --git a/test/collections-config.test.ts b/test/collections-config.test.ts index 3dd926b..ead770e 100644 --- a/test/collections-config.test.ts +++ b/test/collections-config.test.ts @@ -6,9 +6,11 @@ */ import { describe, test, expect, beforeEach, afterEach } from "vitest"; +import { mkdtemp, rm, writeFile } from "fs/promises"; +import { tmpdir } from "os"; import { join } from "path"; import { qmdHomedir } from "../src/paths.js"; -import { getConfigPath, setConfigIndexName } from "../src/collections.js"; +import { getConfigPath, loadConfig, setConfigIndexName } from "../src/collections.js"; // Save/restore env vars around each test let savedEnv: Record; @@ -82,4 +84,15 @@ describe("getConfigDir via getConfigPath", () => { setConfigIndexName("myindex"); expect(getConfigPath()).toBe(join("/xdg/config", "qmd", "myindex.yml")); }); + + test("loadConfig treats an empty YAML file as an empty config", async () => { + const dir = await mkdtemp(join(tmpdir(), "qmd-empty-config-")); + try { + process.env.QMD_CONFIG_DIR = dir; + await writeFile(join(dir, "index.yml"), ""); + expect(loadConfig()).toEqual({ collections: {} }); + } finally { + await rm(dir, { recursive: true, force: true }); + } + }); }); diff --git a/test/llm.test.ts b/test/llm.test.ts index 0ab1281..b6ee3ab 100644 --- a/test/llm.test.ts +++ b/test/llm.test.ts @@ -273,6 +273,63 @@ describe("native llama stdout containment", () => { else process.env.QMD_FORCE_CPU = prevForceCpu; } }); + + test("embeds hello world with QMD_FORCE_CPU=1 without throwing", async () => { + const prevGpu = process.env.QMD_LLAMA_GPU; + const prevForceCpu = process.env.QMD_FORCE_CPU; + process.env.QMD_FORCE_CPU = "1"; + process.env.QMD_LLAMA_GPU = "metal"; + + const getEmbeddingFor = vi.fn(async (text: string) => ({ + vector: new Float32Array([0.1, 0.2, 0.3]), + text, + })); + const createEmbeddingContext = vi.fn(async () => ({ + getEmbeddingFor, + dispose: vi.fn(async () => {}), + })); + const loadModel = vi.fn(async () => ({ + trainContextSize: 2048, + tokenize: (text: string) => Array.from(text), + detokenize: (tokens: string[]) => tokens.join(""), + createEmbeddingContext, + dispose: vi.fn(async () => {}), + })); + const getLlama = vi.fn(async (options: Record) => ({ + gpu: false, + cpuMathCores: 4, + loadModel, + dispose: vi.fn(async () => {}), + }) as any); + + setNodeLlamaCppModuleForTest({ + LlamaLogLevel: { error: "error" }, + resolveModelFile: vi.fn(async () => "/tmp/nonexistent-model.gguf"), + LlamaChatSession: vi.fn() as any, + getLlama, + }); + + const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true); + const llm = new LlamaCpp(); + try { + const result = await llm.embed("hello world"); + expect(result).toEqual({ + embedding: [0.10000000149011612, 0.20000000298023224, 0.30000001192092896], + model: llm.embedModelName, + }); + expect(getLlama).toHaveBeenCalledWith(expect.objectContaining({ gpu: false, build: "never" })); + expect(loadModel).toHaveBeenCalledWith(expect.objectContaining({ gpuLayers: 0 })); + expect(getEmbeddingFor).toHaveBeenCalledWith("hello world"); + } finally { + await llm.dispose(); + stderrSpy.mockRestore(); + setNodeLlamaCppModuleForTest(null); + if (prevGpu === undefined) delete process.env.QMD_LLAMA_GPU; + else process.env.QMD_LLAMA_GPU = prevGpu; + if (prevForceCpu === undefined) delete process.env.QMD_FORCE_CPU; + else process.env.QMD_FORCE_CPU = prevForceCpu; + } + }); }); describe("LLM context parallelism safety", () => {