feat: add local init and doctor diagnostics

2026-05-19 14:27:33 -04:00 · 2026-05-19 14:27:33 -04:00 · d9348f43a0
commit d9348f43a0
parent 5cda3cf54c
6 changed files with 488 additions and 83 deletions
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@ -5,7 +5,7 @@ import { execSync, spawn as nodeSpawn } from "child_process";
 import { fileURLToPath } from "url";
 import { basename, dirname, join as pathJoin, relative as relativePath, resolve as pathResolve } from "path";
 import { parseArgs } from "util";
-import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, readSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
+import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
 import { createInterface } from "readline/promises";
 import {
  getPwd,
@ -81,7 +81,7 @@ import {
  type ReindexResult,
  type ChunkStrategy,
 } from "../store.js";
-import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels } from "../llm.js";
+import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels, inspectGgufFile } from "../llm.js";
 import {
  formatSearchResults,
  formatDocuments,
@ -107,6 +107,8 @@ import {
  getLocalDbPath,
  getConfigPath,
  configExists,
+  type CollectionConfig,
+  type ModelsConfig,
 } from "../collections.js";

 // NOTE: enableProductionMode() is intentionally NOT called at module scope here.
@ -393,6 +395,47 @@ function formatBytes(bytes: number): string {
  return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
 }

+function sameDirectory(a: string, b: string): boolean {
+  try {
+    return realpathSync(a) === realpathSync(b);
+  } catch {
+    return pathResolve(a) === pathResolve(b);
+  }
+}
+
+function initLocalIndex(): void {
+  const cwd = getPwd();
+  if (sameDirectory(cwd, homedir())) {
+    throw new Error("Refusing to initialize a local index in $HOME. The global index is automatically created; run `qmd collection add <path>` for the global index, or run `qmd init` inside a project folder.");
+  }
+
+  const qmdDir = pathJoin(cwd, ".qmd");
+  const ymlPath = pathJoin(qmdDir, "index.yml");
+  const yamlPath = pathJoin(qmdDir, "index.yaml");
+  const configPath = existsSync(yamlPath) ? yamlPath : ymlPath;
+  const dbPath = pathJoin(qmdDir, "index.sqlite");
+
+  mkdirSync(qmdDir, { recursive: true });
+  setConfigSource({ configPath });
+  storeDbPathOverride = dbPath;
+  closeDb();
+
+  if (!existsSync(configPath)) {
+    saveConfig({
+      collections: {},
+      models: resolveModels(),
+    });
+  } else {
+    ensureModelsConfiguredForCli();
+  }
+
+  const localStore = createStore(dbPath);
+  syncConfigToDb(localStore.db, loadConfig());
+  localStore.close();
+
+  console.log("ready to go with new local index");
+}
+
 function isForceCpuEnabled(): boolean {
  const value = process.env.QMD_FORCE_CPU;
  return !!value && !["false", "off", "none", "disable", "disabled", "0"].includes(value.trim().toLowerCase());
@ -3183,6 +3226,7 @@ function showHelp(): void {
  console.log("  qmd ls [collection[/path]]                   - Inspect indexed files");
  console.log("");
  console.log("Maintenance:");
+  console.log("  qmd init                      - Create a project-local .qmd index");
  console.log("  qmd status                    - View index + collection health");
  console.log("  qmd update [--pull]           - Re-index collections (optionally git pull first)");
  console.log("  qmd embed [-f] [-c <name>]    - Generate/refresh vector embeddings");
@ -3314,35 +3358,35 @@ function cosineDistance(a: ArrayLike<number>, b: ArrayLike<number>): number {
  return 1 - (dot / (Math.sqrt(normA) * Math.sqrt(normB)));
 }

-function isGgufFile(path: string): boolean {
-  if (!existsSync(path)) return false;
-  let fd: number | null = null;
-  try {
-    fd = openSync(path, "r");
-    const header = Buffer.alloc(4);
-    readSync(fd, header, 0, 4, 0);
-    return header.toString("utf-8") === "GGUF";
-  } catch {
-    return false;
-  } finally {
-    if (fd !== null) closeSync(fd);
-  }
+type CachedModelInspection = {
+  path: string | null;
+  invalid: string[];
+};
+
+function formatModelDiagnosticPath(path: string): string {
+  return sanitizeDiagnosticMessage(path);
 }

-function findCachedModelPath(model: string): string | null {
+function findCachedModelInspection(model: string): CachedModelInspection {
+  const invalid: string[] = [];
  if (model.startsWith("hf:")) {
    const filename = model.split("/").pop();
-    if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR)) return null;
+    if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR)) return { path: null, invalid };
    const entries = readdirSync(DEFAULT_MODEL_CACHE_DIR, { withFileTypes: true });
    for (const entry of entries) {
      if (!entry.isFile() || !entry.name.includes(filename)) continue;
      const candidate = pathJoin(DEFAULT_MODEL_CACHE_DIR, entry.name);
-      if (isGgufFile(candidate)) return candidate;
+      const inspection = inspectGgufFile(candidate);
+      if (inspection.valid) return { path: candidate, invalid };
+      invalid.push(`${formatModelDiagnosticPath(candidate)}: ${inspection.details}`);
    }
-    return null;
+    return { path: null, invalid };
  }

-  return existsSync(model) && isGgufFile(model) ? model : null;
+  const inspection = inspectGgufFile(model);
+  if (inspection.valid) return { path: model, invalid };
+  if (inspection.exists) invalid.push(`${formatModelDiagnosticPath(model)}: ${inspection.details}`);
+  return { path: null, invalid };
 }

 type EnvOverride = {
@ -3356,8 +3400,7 @@ function envValueForDisplay(value: string): string {
  return sanitized.length > 96 ? `${sanitized.slice(0, 93)}...` : sanitized;
 }

-function collectEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }): EnvOverride[] {
-  const configModels = loadConfig().models ?? {};
+function collectEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): EnvOverride[] {
  const overrides: EnvOverride[] = [];
  const add = (name: string, consequence: string) => {
    const raw = process.env[name]?.trim();
@ -3401,8 +3444,33 @@ function collectEnvironmentOverrides(activeModels: { embed: string; generate: st
  return overrides;
 }

-function checkEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }): void {
-  const overrides = collectEnvironmentOverrides(activeModels);
+type DoctorConfigCheck = {
+  config: CollectionConfig | null;
+  valid: boolean;
+};
+
+function checkDoctorIndexConfig(nextSteps: string[]): DoctorConfigCheck {
+  try {
+    const config = loadConfig();
+    const collectionCount = Object.keys(config.collections ?? {}).length;
+    if (collectionCount === 0) {
+      doctorCheck("index config", false, "no collections configured. Next: `qmd collection add .`");
+      nextSteps.push("Run `qmd collection add . --name <name>` from the folder you want to index, or edit .qmd/index.yml manually.");
+    } else {
+      doctorCheck("index config", true, `${formatCount(collectionCount)} ${collectionCount === 1 ? "collection" : "collections"} configured`);
+    }
+    return { config, valid: true };
+  } catch (error) {
+    const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
+    const configPath = getConfigPath();
+    doctorCheck("index config", false, `invalid index.yml at ${configPath}: ${message}. Next: fix the YAML and rerun \`qmd doctor\``);
+    nextSteps.push(`Fix invalid YAML in ${configPath}, then rerun \`qmd doctor\`.`);
+    return { config: null, valid: false };
+  }
+}
+
+function checkEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): void {
+  const overrides = collectEnvironmentOverrides(activeModels, configModels);
  if (overrides.length === 0) {
    doctorCheck("environment overrides", true, "none");
    return;
@ -3414,8 +3482,7 @@ function checkEnvironmentOverrides(activeModels: { embed: string; generate: stri
  }
 }

-function checkModelDefaults(activeModels: { embed: string; generate: string; rerank: string }, _nextSteps: string[]): void {
-  const configModels = loadConfig().models ?? {};
+function checkModelDefaults(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): void {
  const checks = [
    { role: "embedding", key: "embed", active: activeModels.embed, configured: configModels.embed, defaultModel: DEFAULT_EMBED_MODEL, envName: "QMD_EMBED_MODEL", envValue: process.env.QMD_EMBED_MODEL },
    { role: "generation", key: "generate", active: activeModels.generate, configured: configModels.generate, defaultModel: DEFAULT_QUERY_MODEL, envName: "QMD_GENERATE_MODEL", envValue: process.env.QMD_GENERATE_MODEL },
@ -3455,20 +3522,33 @@ function checkModelCache(activeModels: { embed: string; generate: string; rerank

  const missing: string[] = [];
  const cached: string[] = [];
+  const invalid: string[] = [];
  for (const [model, roles] of unique) {
    const label = `${roles.join("+")}: ${model}`;
-    const path = findCachedModelPath(model);
-    if (path) {
+    const inspection = findCachedModelInspection(model);
+    invalid.push(...inspection.invalid.map(detail => `${label} (${detail})`));
+    if (inspection.path) {
      cached.push(label);
    } else {
      missing.push(label);
    }
  }

-  if (missing.length === 0) {
-    doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded`);
+  if (missing.length === 0 && invalid.length === 0) {
+    doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded and valid GGUF`);
+    return;
+  }
+
+  const parts: string[] = [];
+  if (invalid.length > 0) parts.push(`invalid ${invalid.length}: ${invalid.join("; ")}`);
+  if (missing.length > 0) parts.push(`missing ${missing.length}/${unique.size}: ${missing.join("; ")}`);
+  const next = invalid.length > 0
+    ? "Next: run `qmd pull --refresh` (or remove the bad cached file)"
+    : "Next: run `qmd pull`";
+  doctorCheck("model cache", false, `${parts.join("; ")}. ${next}`);
+  if (invalid.length > 0) {
+    nextSteps.push("Run `qmd pull --refresh` to replace invalid cached model files, or delete the listed file and rerun `qmd pull`.");
  } else {
-    doctorCheck("model cache", false, `missing ${missing.length}/${unique.size}: ${missing.join("; ")}. Next: run \`qmd pull\``);
    nextSteps.push("Run `qmd pull` to download missing embedding/generation/reranking models before `qmd embed` or `qmd query`.");
  }
 }
@ -3624,8 +3704,10 @@ async function showDoctor(): Promise<void> {
    doctorCheck("sqlite-vec", false, error instanceof Error ? error.message : String(error));
  }

-  checkEnvironmentOverrides(activeModels);
-  checkModelDefaults(activeModels, nextSteps);
+  const configCheck = checkDoctorIndexConfig(nextSteps);
+  const configModels = configCheck.config?.models ?? {};
+  checkEnvironmentOverrides(activeModels, configModels);
+  checkModelDefaults(activeModels, configModels);
  checkModelCache(activeModels, nextSteps);

  await runDoctorDeviceChecks(nextSteps);
@ -4015,6 +4097,15 @@ if (isMain) {
      break;
    }

+    case "init":
+      try {
+        initLocalIndex();
+      } catch (error) {
+        console.error(error instanceof Error ? error.message : String(error));
+        process.exit(1);
+      }
+      break;
+
    case "status":
      await showStatus();
      break;
--- a/src/collections.ts
+++ b/src/collections.ts
@ -187,7 +187,8 @@ export function loadConfig(): CollectionConfig {

  try {
    const content = readFileSync(configPath, "utf-8");
-    const config = YAML.parse(content) as CollectionConfig;
+    const parsed = YAML.parse(content) as CollectionConfig | null | undefined;
+    const config = parsed ?? { collections: {} };

    // Ensure collections object exists
    if (!config.collections) {
--- a/src/llm.ts
+++ b/src/llm.ts
@ -32,6 +32,7 @@ export function setNodeLlamaCppModuleForTest(module: NodeLlamaCppModule | null):
  nodeLlamaCppImport = module ? Promise.resolve(module) : null;
  failedGpuInitModes.clear();
  noGpuAccelerationWarningShown = false;
+  cpuForcedPrebuiltFallbackWarningShown = false;
 }

 type StdoutWrite = typeof process.stdout.write;
@ -324,37 +325,106 @@ async function getRemoteEtag(ref: HfRef): Promise<string | null> {

 const GGUF_MAGIC = Buffer.from("GGUF");

+export type GgufFileInspection = {
+  exists: boolean;
+  valid: boolean;
+  kind: "missing" | "gguf" | "html" | "invalid";
+  sizeBytes?: number;
+  magic?: string;
+  details: string;
+};
+
+function formatModelFileSize(sizeBytes: number): string {
+  return `${(sizeBytes / 1024).toFixed(0)} KB`;
+}
+
+function printableMagic(header: Buffer): string {
+  const text = header.toString("utf-8");
+  return /^[\x20-\x7e]{1,4}$/.test(text) ? text : `0x${header.toString("hex")}`;
+}
+
+/**
+ * Inspect a potential GGUF model file without mutating it.
+ * Used by doctor for early diagnostics and by runtime validation before load.
+ */
+export function inspectGgufFile(filePath: string): GgufFileInspection {
+  if (!existsSync(filePath)) {
+    return { exists: false, valid: false, kind: "missing", details: "file does not exist" };
+  }
+
+  let sizeBytes = 0;
+  try {
+    sizeBytes = statSync(filePath).size;
+    const fd = openSync(filePath, "r");
+    const sniff = Buffer.alloc(512);
+    try {
+      readSync(fd, sniff, 0, 512, 0);
+    } finally {
+      closeSync(fd);
+    }
+
+    const header = sniff.subarray(0, 4);
+    if (header.equals(GGUF_MAGIC)) {
+      return {
+        exists: true,
+        valid: true,
+        kind: "gguf",
+        sizeBytes,
+        magic: "GGUF",
+        details: `valid GGUF (${formatModelFileSize(sizeBytes)})`,
+      };
+    }
+
+    const magic = printableMagic(header);
+    const text = sniff.toString("utf-8").toLowerCase();
+    const isHtml = text.includes("<!doctype") || text.includes("<html");
+    if (isHtml) {
+      return {
+        exists: true,
+        valid: false,
+        kind: "html",
+        sizeBytes,
+        magic,
+        details: `HTML page, not a GGUF model (${formatModelFileSize(sizeBytes)}); likely proxy/firewall/captive portal response`,
+      };
+    }
+
+    return {
+      exists: true,
+      valid: false,
+      kind: "invalid",
+      sizeBytes,
+      magic,
+      details: `not valid GGUF (expected magic "GGUF", got "${magic}", ${formatModelFileSize(sizeBytes)})`,
+    };
+  } catch (error) {
+    return {
+      exists: true,
+      valid: false,
+      kind: "invalid",
+      sizeBytes,
+      details: `cannot read model file: ${error instanceof Error ? error.message : String(error)}`,
+    };
+  }
+}
+
 /**
 * Validate that a file is actually a GGUF model, not an HTML error page
 * from a proxy, firewall, or failed download.
 * Throws a descriptive error if the file is not valid GGUF.
 */
 function validateGgufFile(filePath: string, modelUri: string): void {
-  if (!existsSync(filePath)) return; // let downstream handle missing files
-
-  // Read header + sniff bytes in one go, then close immediately
-  const fd = openSync(filePath, "r");
-  const sniff = Buffer.alloc(512);
-  try {
-    readSync(fd, sniff, 0, 512, 0);
-  } finally {
-    closeSync(fd);
-  }
-
-  const header = sniff.subarray(0, 4);
-  if (header.equals(GGUF_MAGIC)) return; // valid GGUF
-
-  const text = sniff.toString("utf-8").toLowerCase();
-  const isHtml = text.includes("<!doctype") || text.includes("<html");
-  const got = header.toString("utf-8");
-  const sizeKB = (statSync(filePath).size / 1024).toFixed(0);
+  const inspection = inspectGgufFile(filePath);
+  if (!inspection.exists || inspection.valid) return; // let downstream handle missing files

  // Remove the bad file so the next attempt re-downloads
-  unlinkSync(filePath);
+  try {
+    unlinkSync(filePath);
+  } catch { /* best effort */ }

-  if (isHtml) {
+  if (inspection.kind === "html") {
    throw new Error(
-      `Downloaded model file is an HTML page, not a GGUF model (${sizeKB} KB).\n` +
+      `Downloaded model file is an HTML page, not a GGUF model (${formatModelFileSize(inspection.sizeBytes ?? 0)}).\n` +
      `Something is intercepting the download from huggingface.co (a proxy, firewall, or captive portal).\n\n` +
      `Model: ${modelUri}\n` +
      `Path:  ${filePath}\n\n` +
@ -367,7 +437,7 @@ function validateGgufFile(filePath: string, modelUri: string): void {
  }

  throw new Error(
-    `Model file is not valid GGUF (expected magic "GGUF", got "${got}", file is ${sizeKB} KB).\n` +
+    `Model file is not valid GGUF (expected magic "GGUF", got "${inspection.magic ?? "unknown"}", file is ${formatModelFileSize(inspection.sizeBytes ?? 0)}).\n` +
    `Model: ${modelUri}\n` +
    `Path:  ${filePath}\n\n` +
    `The file has been removed. Run the command again to re-download.`
@ -607,6 +677,11 @@ function resolveExpandContextSize(configValue?: number): number {

 const failedGpuInitModes = new Set<LlamaGpuMode>();
 let noGpuAccelerationWarningShown = false;
+let cpuForcedPrebuiltFallbackWarningShown = false;
+
+function isCpuModeRequested(): boolean {
+  return resolveLlamaGpuMode() === false;
+}

 export class LlamaCpp implements LLM {
  private readonly _ciMode = !!process.env.CI;
@ -765,22 +840,44 @@ export class LlamaCpp implements LLM {
      const gpuMode = resolveLlamaGpuMode();

      const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
-      const loadLlama = async (gpu: LlamaGpuMode) =>
+      const loadLlama = async (gpu: LlamaGpuMode, sourceBuildAllowed = allowBuild) =>
        await withNativeStdoutRedirectedToStderr(() => getLlama({
-          build: allowBuild ? "autoAttempt" : "never",
+          // Prefer packaged prebuilt bindings before compiling llama.cpp locally.
+          // "autoAttempt" can try to compile a missing requested backend before
+          // falling back to another prebuilt backend; "auto" uses prebuilt/local
+          // binaries first and only builds when none are usable.
+          build: sourceBuildAllowed ? "auto" : "never",
          logLevel: LlamaLogLevel.error,
          gpu,
-          skipDownload: !allowBuild,
+          progressLogs: false,
+          skipDownload: !sourceBuildAllowed,
        }));
+      const loadCpuCompatibleLlama = async () => {
+        try {
+          return await loadLlama(false, false);
+        } catch (err) {
+          // Some platforms, notably Apple Silicon, ship a Metal prebuilt but no
+          // CPU-only prebuilt. Do a fast no-build lookup for an actual CPU
+          // binding first; if it does not exist, use the packaged auto/Metal
+          // binding and disable model offloading via gpuLayers: 0.
+          if (!cpuForcedPrebuiltFallbackWarningShown) {
+            cpuForcedPrebuiltFallbackWarningShown = true;
+            process.stderr.write(
+              `QMD Warning: CPU-only llama.cpp prebuilt not available (${err instanceof Error ? err.message : String(err)}); using packaged backend with GPU offloading disabled.\n`
+            );
+          }
+          return await loadLlama("auto", false);
+        }
+      };

      let llama: Llama;
-      if (gpuMode === false || failedGpuInitModes.has(gpuMode)) {
-        if (gpuMode !== false && failedGpuInitModes.has(gpuMode)) {
-          process.stderr.write(
-            `QMD Warning: skipping previously failed GPU init${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`}, using CPU.\n`
-          );
-        }
-        llama = await loadLlama(false);
+      if (gpuMode === false) {
+        llama = await loadCpuCompatibleLlama();
+      } else if (failedGpuInitModes.has(gpuMode)) {
+        process.stderr.write(
+          `QMD Warning: skipping previously failed GPU init${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`}, using CPU.\n`
+        );
+        llama = await loadCpuCompatibleLlama();
      } else {
        try {
          llama = await loadLlama(gpuMode);
@ -792,7 +889,7 @@ export class LlamaCpp implements LLM {
          process.stderr.write(
            `QMD Warning: GPU init failed${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`} (${err instanceof Error ? err.message : String(err)}), falling back to CPU.\n`
          );
-          llama = await loadLlama(false);
+          llama = await loadCpuCompatibleLlama();
        }
      }

@ -807,6 +904,17 @@ export class LlamaCpp implements LLM {
    return this.llama;
  }

+  private isCpuOffloadForced(): boolean {
+    return isCpuModeRequested();
+  }
+
+  private modelLoadOptions(modelPath: string): { modelPath: string; gpuLayers?: number } {
+    return {
+      modelPath,
+      ...(this.isCpuOffloadForced() ? { gpuLayers: 0 } : {}),
+    };
+  }
+
  /**
   * Resolve a model URI to a local path, downloading if needed.
   * Validates the downloaded file is actually a GGUF model (not an HTML error page
@ -835,7 +943,7 @@ export class LlamaCpp implements LLM {
    this.embedModelLoadPromise = (async () => {
      const llama = await this.ensureLlama();
      const modelPath = await this.resolveModel(this.embedModelUri);
-      const model = await llama.loadModel({ modelPath });
+      const model = await llama.loadModel(this.modelLoadOptions(modelPath));
      this.embedModel = model;
      // Model loading counts as activity - ping to keep alive
      this.touchActivity();
@ -861,7 +969,7 @@ export class LlamaCpp implements LLM {
  private async computeParallelism(perContextMB: number): Promise<number> {
    const llama = await this.ensureLlama();

-    if (llama.gpu) {
+    if (!this.isCpuOffloadForced() && llama.gpu) {
      try {
        const vram = await llama.getVramState();
        const freeMB = vram.free / (1024 * 1024);
@ -886,7 +994,7 @@ export class LlamaCpp implements LLM {
   */
  private async threadsPerContext(parallelism: number): Promise<number> {
    const llama = await this.ensureLlama();
-    if (llama.gpu) return 0; // GPU: let the library decide
+    if (!this.isCpuOffloadForced() && llama.gpu) return 0; // GPU: let the library decide
    const cores = llama.cpuMathCores || 4;
    return Math.max(1, Math.floor(cores / parallelism));
  }
@ -954,7 +1062,7 @@ export class LlamaCpp implements LLM {
      this.generateModelLoadPromise = (async () => {
        const llama = await this.ensureLlama();
        const modelPath = await this.resolveModel(this.generateModelUri);
-        const model = await llama.loadModel({ modelPath });
+        const model = await llama.loadModel(this.modelLoadOptions(modelPath));
        this.generateModel = model;
        return model;
      })();
@ -986,7 +1094,7 @@ export class LlamaCpp implements LLM {
    this.rerankModelLoadPromise = (async () => {
      const llama = await this.ensureLlama();
      const modelPath = await this.resolveModel(this.rerankModelUri);
-      const model = await llama.loadModel({ modelPath });
+      const model = await llama.loadModel(this.modelLoadOptions(modelPath));
      this.rerankModel = model;
      // Model loading counts as activity - ping to keep alive
      this.touchActivity();
@ -1489,17 +1597,18 @@ export class LlamaCpp implements LLM {
    cpuCores: number;
  }> {
    const llama = await this.ensureLlama(options.allowBuild ?? true);
-    const gpuDevices = await llama.getGpuDeviceNames();
+    const cpuForced = this.isCpuOffloadForced();
+    const gpuDevices = cpuForced ? [] : await llama.getGpuDeviceNames();
    let vram: { total: number; used: number; free: number } | undefined;
-    if (llama.gpu) {
+    if (!cpuForced && llama.gpu) {
      try {
        const state = await llama.getVramState();
        vram = { total: state.total, used: state.used, free: state.free };
      } catch { /* no vram info */ }
    }
    return {
-      gpu: llama.gpu,
-      gpuOffloading: llama.supportsGpuOffloading,
+      gpu: cpuForced ? false : llama.gpu,
+      gpuOffloading: !cpuForced && llama.supportsGpuOffloading,
      gpuDevices,
      vram,
      cpuCores: llama.cpuMathCores,
--- a/test/cli.test.ts
+++ b/test/cli.test.ts
@ -16,6 +16,7 @@ import { setTimeout as sleep } from "timers/promises";
 import { buildEditorUri, termLink, resolveEmbedModelForCli } from "../src/cli/qmd.ts";
 import { openDatabase } from "../src/db.ts";
 import { DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI } from "../src/llm.ts";
+import { setConfigSource } from "../src/collections.ts";

 // Test fixtures directory and database path
 let testDir: string;
@ -311,13 +312,15 @@ describe("CLI Skills", () => {
 });

 describe("CLI Embed", () => {
-  test("prefers QMD_EMBED_MODEL for qmd embed", () => {
+  test("prefers QMD_EMBED_MODEL for qmd embed when the index has no model pin", () => {
    const prev = process.env.QMD_EMBED_MODEL;
    process.env.QMD_EMBED_MODEL = "hf:env/embed-model.gguf";
+    setConfigSource({ config: { collections: {} } });

    try {
      expect(resolveEmbedModelForCli()).toBe("hf:env/embed-model.gguf");
    } finally {
+      setConfigSource();
      if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
      else process.env.QMD_EMBED_MODEL = prev;
    }
@ -326,10 +329,12 @@ describe("CLI Embed", () => {
  test("falls back to the default embed model when QMD_EMBED_MODEL is unset", () => {
    const prev = process.env.QMD_EMBED_MODEL;
    delete process.env.QMD_EMBED_MODEL;
+    setConfigSource({ config: { collections: {} } });

    try {
      expect(resolveEmbedModelForCli()).toBe(DEFAULT_EMBED_MODEL_URI);
    } finally {
+      setConfigSource();
      if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
      else process.env.QMD_EMBED_MODEL = prev;
    }
@ -429,6 +434,36 @@ describe("CLI Skill Commands", () => {
  });
 });

+describe("CLI Init Command", () => {
+  test("creates a project-local .qmd index", async () => {
+    const projectDir = join(testDir, "init-project");
+    await mkdir(projectDir, { recursive: true });
+
+    const { stdout, exitCode } = await runQmd(["init"], { cwd: projectDir });
+    expect(exitCode).toBe(0);
+    expect(stdout.trim()).toBe("ready to go with new local index");
+    expect(existsSync(join(projectDir, ".qmd", "index.yml"))).toBe(true);
+    expect(existsSync(join(projectDir, ".qmd", "index.sqlite"))).toBe(true);
+    const configText = readFileSync(join(projectDir, ".qmd", "index.yml"), "utf-8");
+    expect(configText).toContain("collections: {}");
+    expect(configText).toContain("models:");
+  });
+
+  test("refuses to initialize in HOME", async () => {
+    const fakeHome = join(testDir, "init-home");
+    await mkdir(fakeHome, { recursive: true });
+
+    const { stderr, exitCode } = await runQmd(["init"], {
+      cwd: fakeHome,
+      env: { HOME: fakeHome },
+    });
+    expect(exitCode).toBe(1);
+    expect(stderr).toContain("Refusing to initialize a local index in $HOME");
+    expect(stderr).toContain("global index is automatically created");
+    expect(existsSync(join(fakeHome, ".qmd", "index.yml"))).toBe(false);
+  });
+});
+
 describe("CLI Add Command", () => {
  test("adds files from current directory", async () => {
    const { stdout, exitCode } = await runQmd(["collection", "add", "."]);
@ -491,7 +526,28 @@ describe("CLI Status Command", () => {
    expect(configText).toContain(DEFAULT_EMBED_MODEL_URI);
    expect(configText).toContain(DEFAULT_GENERATE_MODEL_URI);
    expect(configText).toContain(DEFAULT_RERANK_MODEL_URI);
-  });
+  }, 20000);
+
+  test("qmd doctor warns when no collections are configured", async () => {
+    const env = await createIsolatedTestEnv("doctor-no-collections");
+    const { stdout, exitCode } = await runQmd(["doctor"], { dbPath: env.dbPath, configDir: env.configDir });
+    expect(exitCode).toBe(0);
+    expect(stdout).toContain("index config");
+    expect(stdout).toContain("no collections configured");
+    expect(stdout).toContain("qmd collection add .");
+  }, 20000);
+
+  test("qmd doctor reports invalid index.yml without crashing", async () => {
+    const env = await createIsolatedTestEnv("doctor-invalid-config");
+    await writeFile(join(env.configDir, "index.yml"), "collections:\n  bad: [unterminated\n");
+
+    const { stdout, exitCode } = await runQmd(["doctor"], { dbPath: env.dbPath, configDir: env.configDir });
+    expect(exitCode).toBe(0);
+    expect(stdout).toContain("index config");
+    expect(stdout).toContain("invalid index.yml at");
+    expect(stdout).toContain(join(env.configDir, "index.yml"));
+    expect(stdout).toContain("fix the YAML");
+  }, 20000);

  test("qmd doctor warns when configured models differ from code defaults", async () => {
    const env = await createIsolatedTestEnv("doctor-custom-models");
@ -504,7 +560,32 @@ describe("CLI Status Command", () => {
    expect(stdout).toContain("index hf:example/custom-embed/custom.gguf");
    expect(stdout).toContain("might be ok");
    expect(stdout).toContain("qmd pull");
-  });
+  }, 20000);
+
+  test("qmd doctor identifies cached non-GGUF model files", async () => {
+    const env = await createIsolatedTestEnv("doctor-invalid-model-cache");
+    const model = "hf:example/custom-model/custom.gguf";
+    await writeFile(join(env.configDir, "index.yml"), `collections: {}\nmodels:\n  embed: ${model}\n  generate: ${model}\n  rerank: ${model}\n`);
+    const cacheRoot = join(env.configDir, "cache");
+    const modelCacheDir = join(cacheRoot, "qmd", "models");
+    await mkdir(modelCacheDir, { recursive: true });
+    const badModelPath = join(modelCacheDir, "custom.gguf");
+    await writeFile(badModelPath, "<!doctype html><html>blocked</html>");
+
+    const { stdout, exitCode } = await runQmd(["doctor"], {
+      dbPath: env.dbPath,
+      configDir: env.configDir,
+      env: {
+        XDG_CACHE_HOME: cacheRoot,
+        QMD_DOCTOR_DEVICE_PROBE: "0",
+      },
+    });
+    expect(exitCode).toBe(0);
+    expect(stdout).toContain("model cache");
+    expect(stdout).toContain("invalid 1");
+    expect(stdout).toContain("HTML page, not a GGUF model");
+    expect(stdout).toContain("qmd pull --refresh");
+  }, 20000);

  test("qmd doctor says when models are overridden by env", async () => {
    const env = await createIsolatedTestEnv("doctor-env-models");
@ -523,7 +604,60 @@ describe("CLI Status Command", () => {
    expect(stdout).toContain("environment overrides");
    expect(stdout).toContain(`QMD_EMBED_MODEL=${customEmbed}`);
    expect(stdout).toContain("sets the active embed model");
-  });
+  }, 20000);
+
+  test("qmd doctor shows CPU-forced device mode with QMD_FORCE_CPU=1", async () => {
+    const env = await createIsolatedTestEnv("doctor-force-cpu");
+    const { stdout, exitCode } = await runQmd(["doctor"], {
+      dbPath: env.dbPath,
+      configDir: env.configDir,
+      env: {
+        QMD_FORCE_CPU: "1",
+        QMD_DOCTOR_DEVICE_PROBE: "0",
+      },
+    });
+    expect(exitCode).toBe(0);
+    expect(stdout).toContain("QMD_FORCE_CPU=1");
+    expect(stdout).toContain("forces llama.cpp to bypass GPU backends");
+    expect(stdout).toContain("device mode: CPU forced (QMD_FORCE_CPU)");
+  }, 20000);
+
+  test("qmd doctor lists known environment overrides and consequences", async () => {
+    const env = await createIsolatedTestEnv("doctor-env-overrides");
+    const overrides = {
+      XDG_CACHE_HOME: join(env.configDir, "cache"),
+      QMD_DOCTOR_DEVICE_PROBE: "0",
+      QMD_STATUS_DEVICE_PROBE: "1",
+      QMD_FORCE_CPU: "1",
+      QMD_LLAMA_GPU: "metal",
+      QMD_EMBED_PARALLELISM: "2",
+      QMD_EXPAND_CONTEXT_SIZE: "4096",
+      QMD_RERANK_CONTEXT_SIZE: "8192",
+      QMD_EMBED_CONTEXT_SIZE: "1024",
+      QMD_EDITOR_URI: "vscode://file/{file}:{line}:{col}",
+      QMD_SKILLS_DIR: "/tmp/qmd-skills",
+      QMD_DISABLE_DARWIN_QUERY_JSON_SAFE_EXIT: "1",
+      NO_COLOR: "1",
+      CI: "1",
+      HF_ENDPOINT: "https://hf-mirror.com",
+      WSL_DISTRO_NAME: "Ubuntu",
+      WSL_INTEROP: "1",
+    };
+
+    const { stdout, exitCode } = await runQmd(["doctor"], {
+      dbPath: env.dbPath,
+      configDir: env.configDir,
+      env: overrides,
+    });
+    expect(exitCode).toBe(0);
+    for (const name of Object.keys(overrides)) {
+      expect(stdout).toContain(name);
+    }
+    expect(stdout).toContain("forces llama.cpp to bypass GPU backends");
+    expect(stdout).toContain("moves the default index cache");
+    expect(stdout).toContain("disables real LLM operations");
+    expect(stdout).toContain("changes Hugging Face download endpoint");
+  }, 20000);

  test("qmd doctor flags mixed embedding fingerprints", async () => {
    const db = openDatabase(testDbPath);
@ -538,7 +672,7 @@ describe("CLI Status Command", () => {
    expect(exitCode).toBe(0);
    expect(stdout).toContain("embedding fingerprints");
    expect(stdout).toContain("stale1");
-  });
+  }, 20000);

  test("shows index status", async () => {
    const { stdout, exitCode } = await runQmd(["status"]);
@ -1620,7 +1754,7 @@ describe("status and collection list hide filesystem paths", () => {
    const lines = stdout.split('\n').filter(l => !l.includes('Index:'));
    const pathLines = lines.filter(l => l.includes('/Users/') || l.includes('/home/') || l.includes('/tmp/'));
    expect(pathLines.length).toBe(0);
-  });
+  }, 20000);

  test("collection list does not show full filesystem paths", async () => {
    const { stdout, exitCode } = await runQmd(["collection", "list"], { dbPath: localDbPath, configDir: localConfigDir });
--- a/test/collections-config.test.ts
+++ b/test/collections-config.test.ts
@ -6,9 +6,11 @@
 */

 import { describe, test, expect, beforeEach, afterEach } from "vitest";
+import { mkdtemp, rm, writeFile } from "fs/promises";
+import { tmpdir } from "os";
 import { join } from "path";
 import { qmdHomedir } from "../src/paths.js";
-import { getConfigPath, setConfigIndexName } from "../src/collections.js";
+import { getConfigPath, loadConfig, setConfigIndexName } from "../src/collections.js";

 // Save/restore env vars around each test
 let savedEnv: Record<string, string | undefined>;
@ -82,4 +84,15 @@ describe("getConfigDir via getConfigPath", () => {
    setConfigIndexName("myindex");
    expect(getConfigPath()).toBe(join("/xdg/config", "qmd", "myindex.yml"));
  });
+
+  test("loadConfig treats an empty YAML file as an empty config", async () => {
+    const dir = await mkdtemp(join(tmpdir(), "qmd-empty-config-"));
+    try {
+      process.env.QMD_CONFIG_DIR = dir;
+      await writeFile(join(dir, "index.yml"), "");
+      expect(loadConfig()).toEqual({ collections: {} });
+    } finally {
+      await rm(dir, { recursive: true, force: true });
+    }
+  });
 });
--- a/test/llm.test.ts
+++ b/test/llm.test.ts
@ -273,6 +273,63 @@ describe("native llama stdout containment", () => {
      else process.env.QMD_FORCE_CPU = prevForceCpu;
    }
  });
+
+  test("embeds hello world with QMD_FORCE_CPU=1 without throwing", async () => {
+    const prevGpu = process.env.QMD_LLAMA_GPU;
+    const prevForceCpu = process.env.QMD_FORCE_CPU;
+    process.env.QMD_FORCE_CPU = "1";
+    process.env.QMD_LLAMA_GPU = "metal";
+
+    const getEmbeddingFor = vi.fn(async (text: string) => ({
+      vector: new Float32Array([0.1, 0.2, 0.3]),
+      text,
+    }));
+    const createEmbeddingContext = vi.fn(async () => ({
+      getEmbeddingFor,
+      dispose: vi.fn(async () => {}),
+    }));
+    const loadModel = vi.fn(async () => ({
+      trainContextSize: 2048,
+      tokenize: (text: string) => Array.from(text),
+      detokenize: (tokens: string[]) => tokens.join(""),
+      createEmbeddingContext,
+      dispose: vi.fn(async () => {}),
+    }));
+    const getLlama = vi.fn(async (options: Record<string, unknown>) => ({
+      gpu: false,
+      cpuMathCores: 4,
+      loadModel,
+      dispose: vi.fn(async () => {}),
+    }) as any);
+
+    setNodeLlamaCppModuleForTest({
+      LlamaLogLevel: { error: "error" },
+      resolveModelFile: vi.fn(async () => "/tmp/nonexistent-model.gguf"),
+      LlamaChatSession: vi.fn() as any,
+      getLlama,
+    });
+
+    const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
+    const llm = new LlamaCpp();
+    try {
+      const result = await llm.embed("hello world");
+      expect(result).toEqual({
+        embedding: [0.10000000149011612, 0.20000000298023224, 0.30000001192092896],
+        model: llm.embedModelName,
+      });
+      expect(getLlama).toHaveBeenCalledWith(expect.objectContaining({ gpu: false, build: "never" }));
+      expect(loadModel).toHaveBeenCalledWith(expect.objectContaining({ gpuLayers: 0 }));
+      expect(getEmbeddingFor).toHaveBeenCalledWith("hello world");
+    } finally {
+      await llm.dispose();
+      stderrSpy.mockRestore();
+      setNodeLlamaCppModuleForTest(null);
+      if (prevGpu === undefined) delete process.env.QMD_LLAMA_GPU;
+      else process.env.QMD_LLAMA_GPU = prevGpu;
+      if (prevForceCpu === undefined) delete process.env.QMD_FORCE_CPU;
+      else process.env.QMD_FORCE_CPU = prevForceCpu;
+    }
+  });
 });

 describe("LLM context parallelism safety", () => {