From d9348f43a09b7518dd814d74f80c623195c1b430 Mon Sep 17 00:00:00 2001
From: Tobi Lutke <tobi@shopify.com>
Date: Tue, 19 May 2026 14:27:33 -0400
Subject: [PATCH] feat: add local init and doctor diagnostics

---
 src/cli/qmd.ts                  | 157 ++++++++++++++++++++------
 src/collections.ts              |   3 +-
 src/llm.ts                      | 193 +++++++++++++++++++++++++-------
 test/cli.test.ts                | 146 +++++++++++++++++++++++-
 test/collections-config.test.ts |  15 ++-
 test/llm.test.ts                |  57 ++++++++++
 6 files changed, 488 insertions(+), 83 deletions(-)
diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts
index 11e8e7c..8851fdb 100755
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@@ -5,7 +5,7 @@ import { execSync, spawn as nodeSpawn } from "child_process";
 import { fileURLToPath } from "url";
 import { basename, dirname, join as pathJoin, relative as relativePath, resolve as pathResolve } from "path";
 import { parseArgs } from "util";
-import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, readSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
+import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
 import { createInterface } from "readline/promises";
 import {
   getPwd,
@@ -81,7 +81,7 @@ import {
   type ReindexResult,
   type ChunkStrategy,
 } from "../store.js";
-import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels } from "../llm.js";
+import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels, inspectGgufFile } from "../llm.js";
 import {
   formatSearchResults,
   formatDocuments,
@@ -107,6 +107,8 @@ import {
   getLocalDbPath,
   getConfigPath,
   configExists,
+  type CollectionConfig,
+  type ModelsConfig,
 } from "../collections.js";
 
 // NOTE: enableProductionMode() is intentionally NOT called at module scope here.
@@ -393,6 +395,47 @@ function formatBytes(bytes: number): string {
   return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
 }
 
+function sameDirectory(a: string, b: string): boolean {
+  try {
+    return realpathSync(a) === realpathSync(b);
+  } catch {
+    return pathResolve(a) === pathResolve(b);
+  }
+}
+
+function initLocalIndex(): void {
+  const cwd = getPwd();
+  if (sameDirectory(cwd, homedir())) {
+    throw new Error("Refusing to initialize a local index in $HOME. The global index is automatically created; run `qmd collection add <path>` for the global index, or run `qmd init` inside a project folder.");
+  }
+
+  const qmdDir = pathJoin(cwd, ".qmd");
+  const ymlPath = pathJoin(qmdDir, "index.yml");
+  const yamlPath = pathJoin(qmdDir, "index.yaml");
+  const configPath = existsSync(yamlPath) ? yamlPath : ymlPath;
+  const dbPath = pathJoin(qmdDir, "index.sqlite");
+
+  mkdirSync(qmdDir, { recursive: true });
+  setConfigSource({ configPath });
+  storeDbPathOverride = dbPath;
+  closeDb();
+
+  if (!existsSync(configPath)) {
+    saveConfig({
+      collections: {},
+      models: resolveModels(),
+    });
+  } else {
+    ensureModelsConfiguredForCli();
+  }
+
+  const localStore = createStore(dbPath);
+  syncConfigToDb(localStore.db, loadConfig());
+  localStore.close();
+
+  console.log("ready to go with new local index");
+}
+
 function isForceCpuEnabled(): boolean {
   const value = process.env.QMD_FORCE_CPU;
   return !!value && !["false", "off", "none", "disable", "disabled", "0"].includes(value.trim().toLowerCase());
@@ -3183,6 +3226,7 @@ function showHelp(): void {
   console.log("  qmd ls [collection[/path]]                   - Inspect indexed files");
   console.log("");
   console.log("Maintenance:");
+  console.log("  qmd init                      - Create a project-local .qmd index");
   console.log("  qmd status                    - View index + collection health");
   console.log("  qmd update [--pull]           - Re-index collections (optionally git pull first)");
   console.log("  qmd embed [-f] [-c <name>]    - Generate/refresh vector embeddings");
@@ -3314,35 +3358,35 @@ function cosineDistance(a: ArrayLike<number>, b: ArrayLike<number>): number {
   return 1 - (dot / (Math.sqrt(normA) * Math.sqrt(normB)));
 }
 
-function isGgufFile(path: string): boolean {
-  if (!existsSync(path)) return false;
-  let fd: number | null = null;
-  try {
-    fd = openSync(path, "r");
-    const header = Buffer.alloc(4);
-    readSync(fd, header, 0, 4, 0);
-    return header.toString("utf-8") === "GGUF";
-  } catch {
-    return false;
-  } finally {
-    if (fd !== null) closeSync(fd);
-  }
+type CachedModelInspection = {
+  path: string | null;
+  invalid: string[];
+};
+
+function formatModelDiagnosticPath(path: string): string {
+  return sanitizeDiagnosticMessage(path);
 }
 
-function findCachedModelPath(model: string): string | null {
+function findCachedModelInspection(model: string): CachedModelInspection {
+  const invalid: string[] = [];
   if (model.startsWith("hf:")) {
     const filename = model.split("/").pop();
-    if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR)) return null;
+    if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR)) return { path: null, invalid };
     const entries = readdirSync(DEFAULT_MODEL_CACHE_DIR, { withFileTypes: true });
     for (const entry of entries) {
       if (!entry.isFile() || !entry.name.includes(filename)) continue;
       const candidate = pathJoin(DEFAULT_MODEL_CACHE_DIR, entry.name);
-      if (isGgufFile(candidate)) return candidate;
+      const inspection = inspectGgufFile(candidate);
+      if (inspection.valid) return { path: candidate, invalid };
+      invalid.push(`${formatModelDiagnosticPath(candidate)}: ${inspection.details}`);
     }
-    return null;
+    return { path: null, invalid };
   }
 
-  return existsSync(model) && isGgufFile(model) ? model : null;
+  const inspection = inspectGgufFile(model);
+  if (inspection.valid) return { path: model, invalid };
+  if (inspection.exists) invalid.push(`${formatModelDiagnosticPath(model)}: ${inspection.details}`);
+  return { path: null, invalid };
 }
 
 type EnvOverride = {
@@ -3356,8 +3400,7 @@ function envValueForDisplay(value: string): string {
   return sanitized.length > 96 ? `${sanitized.slice(0, 93)}...` : sanitized;
 }
 
-function collectEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }): EnvOverride[] {
-  const configModels = loadConfig().models ?? {};
+function collectEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): EnvOverride[] {
   const overrides: EnvOverride[] = [];
   const add = (name: string, consequence: string) => {
     const raw = process.env[name]?.trim();
@@ -3401,8 +3444,33 @@ function collectEnvironmentOverrides(activeModels: { embed: string; generate: st
   return overrides;
 }
 
-function checkEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }): void {
-  const overrides = collectEnvironmentOverrides(activeModels);
+type DoctorConfigCheck = {
+  config: CollectionConfig | null;
+  valid: boolean;
+};
+
+function checkDoctorIndexConfig(nextSteps: string[]): DoctorConfigCheck {
+  try {
+    const config = loadConfig();
+    const collectionCount = Object.keys(config.collections ?? {}).length;
+    if (collectionCount === 0) {
+      doctorCheck("index config", false, "no collections configured. Next: `qmd collection add .`");
+      nextSteps.push("Run `qmd collection add . --name <name>` from the folder you want to index, or edit .qmd/index.yml manually.");
+    } else {
+      doctorCheck("index config", true, `${formatCount(collectionCount)} ${collectionCount === 1 ? "collection" : "collections"} configured`);
+    }
+    return { config, valid: true };
+  } catch (error) {
+    const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
+    const configPath = getConfigPath();
+    doctorCheck("index config", false, `invalid index.yml at ${configPath}: ${message}. Next: fix the YAML and rerun \`qmd doctor\``);
+    nextSteps.push(`Fix invalid YAML in ${configPath}, then rerun \`qmd doctor\`.`);
+    return { config: null, valid: false };
+  }
+}
+
+function checkEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): void {
+  const overrides = collectEnvironmentOverrides(activeModels, configModels);
   if (overrides.length === 0) {
     doctorCheck("environment overrides", true, "none");
     return;
@@ -3414,8 +3482,7 @@ function checkEnvironmentOverrides(activeModels: { embed: string; generate: stri
   }
 }
 
-function checkModelDefaults(activeModels: { embed: string; generate: string; rerank: string }, _nextSteps: string[]): void {
-  const configModels = loadConfig().models ?? {};
+function checkModelDefaults(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): void {
   const checks = [
     { role: "embedding", key: "embed", active: activeModels.embed, configured: configModels.embed, defaultModel: DEFAULT_EMBED_MODEL, envName: "QMD_EMBED_MODEL", envValue: process.env.QMD_EMBED_MODEL },
     { role: "generation", key: "generate", active: activeModels.generate, configured: configModels.generate, defaultModel: DEFAULT_QUERY_MODEL, envName: "QMD_GENERATE_MODEL", envValue: process.env.QMD_GENERATE_MODEL },
@@ -3455,20 +3522,33 @@ function checkModelCache(activeModels: { embed: string; generate: string; rerank
 
   const missing: string[] = [];
   const cached: string[] = [];
+  const invalid: string[] = [];
   for (const [model, roles] of unique) {
     const label = `${roles.join("+")}: ${model}`;
-    const path = findCachedModelPath(model);
-    if (path) {
+    const inspection = findCachedModelInspection(model);
+    invalid.push(...inspection.invalid.map(detail => `${label} (${detail})`));
+    if (inspection.path) {
       cached.push(label);
     } else {
       missing.push(label);
     }
   }
 
-  if (missing.length === 0) {
-    doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded`);
+  if (missing.length === 0 && invalid.length === 0) {
+    doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded and valid GGUF`);
+    return;
+  }
+
+  const parts: string[] = [];
+  if (invalid.length > 0) parts.push(`invalid ${invalid.length}: ${invalid.join("; ")}`);
+  if (missing.length > 0) parts.push(`missing ${missing.length}/${unique.size}: ${missing.join("; ")}`);
+  const next = invalid.length > 0
+    ? "Next: run `qmd pull --refresh` (or remove the bad cached file)"
+    : "Next: run `qmd pull`";
+  doctorCheck("model cache", false, `${parts.join("; ")}. ${next}`);
+  if (invalid.length > 0) {
+    nextSteps.push("Run `qmd pull --refresh` to replace invalid cached model files, or delete the listed file and rerun `qmd pull`.");
   } else {
-    doctorCheck("model cache", false, `missing ${missing.length}/${unique.size}: ${missing.join("; ")}. Next: run \`qmd pull\``);
     nextSteps.push("Run `qmd pull` to download missing embedding/generation/reranking models before `qmd embed` or `qmd query`.");
   }
 }
@@ -3624,8 +3704,10 @@ async function showDoctor(): Promise<void> {
     doctorCheck("sqlite-vec", false, error instanceof Error ? error.message : String(error));
   }
 
-  checkEnvironmentOverrides(activeModels);
-  checkModelDefaults(activeModels, nextSteps);
+  const configCheck = checkDoctorIndexConfig(nextSteps);
+  const configModels = configCheck.config?.models ?? {};
+  checkEnvironmentOverrides(activeModels, configModels);
+  checkModelDefaults(activeModels, configModels);
   checkModelCache(activeModels, nextSteps);
 
   await runDoctorDeviceChecks(nextSteps);
@@ -4015,6 +4097,15 @@ if (isMain) {
       break;
     }
 
+    case "init":
+      try {
+        initLocalIndex();
+      } catch (error) {
+        console.error(error instanceof Error ? error.message : String(error));
+        process.exit(1);
+      }
+      break;
+
     case "status":
       await showStatus();
       break;
diff --git a/src/collections.ts b/src/collections.ts
index b3da4a4..6950493 100644
--- a/src/collections.ts
+++ b/src/collections.ts
@@ -187,7 +187,8 @@ export function loadConfig(): CollectionConfig {
 
   try {
     const content = readFileSync(configPath, "utf-8");
-    const config = YAML.parse(content) as CollectionConfig;
+    const parsed = YAML.parse(content) as CollectionConfig | null | undefined;
+    const config = parsed ?? { collections: {} };
 
     // Ensure collections object exists
     if (!config.collections) {
diff --git a/src/llm.ts b/src/llm.ts
index 3047b20..656895a 100644
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -32,6 +32,7 @@ export function setNodeLlamaCppModuleForTest(module: NodeLlamaCppModule | null):
   nodeLlamaCppImport = module ? Promise.resolve(module) : null;
   failedGpuInitModes.clear();
   noGpuAccelerationWarningShown = false;
+  cpuForcedPrebuiltFallbackWarningShown = false;
 }
 
 type StdoutWrite = typeof process.stdout.write;
@@ -324,37 +325,106 @@ async function getRemoteEtag(ref: HfRef): Promise<string | null> {
 
 const GGUF_MAGIC = Buffer.from("GGUF");
 
+export type GgufFileInspection = {
+  exists: boolean;
+  valid: boolean;
+  kind: "missing" | "gguf" | "html" | "invalid";
+  sizeBytes?: number;
+  magic?: string;
+  details: string;
+};
+
+function formatModelFileSize(sizeBytes: number): string {
+  return `${(sizeBytes / 1024).toFixed(0)} KB`;
+}
+
+function printableMagic(header: Buffer): string {
+  const text = header.toString("utf-8");
+  return /^[\x20-\x7e]{1,4}$/.test(text) ? text : `0x${header.toString("hex")}`;
+}
+
+/**
+ * Inspect a potential GGUF model file without mutating it.
+ * Used by doctor for early diagnostics and by runtime validation before load.
+ */
+export function inspectGgufFile(filePath: string): GgufFileInspection {
+  if (!existsSync(filePath)) {
+    return { exists: false, valid: false, kind: "missing", details: "file does not exist" };
+  }
+
+  let sizeBytes = 0;
+  try {
+    sizeBytes = statSync(filePath).size;
+    const fd = openSync(filePath, "r");
+    const sniff = Buffer.alloc(512);
+    try {
+      readSync(fd, sniff, 0, 512, 0);
+    } finally {
+      closeSync(fd);
+    }
+
+    const header = sniff.subarray(0, 4);
+    if (header.equals(GGUF_MAGIC)) {
+      return {
+        exists: true,
+        valid: true,
+        kind: "gguf",
+        sizeBytes,
+        magic: "GGUF",
+        details: `valid GGUF (${formatModelFileSize(sizeBytes)})`,
+      };
+    }
+
+    const magic = printableMagic(header);
+    const text = sniff.toString("utf-8").toLowerCase();
+    const isHtml = text.includes("<!doctype") || text.includes("<html");
+    if (isHtml) {
+      return {
+        exists: true,
+        valid: false,
+        kind: "html",
+        sizeBytes,
+        magic,
+        details: `HTML page, not a GGUF model (${formatModelFileSize(sizeBytes)}); likely proxy/firewall/captive portal response`,
+      };
+    }
+
+    return {
+      exists: true,
+      valid: false,
+      kind: "invalid",
+      sizeBytes,
+      magic,
+      details: `not valid GGUF (expected magic "GGUF", got "${magic}", ${formatModelFileSize(sizeBytes)})`,
+    };
+  } catch (error) {
+    return {
+      exists: true,
+      valid: false,
+      kind: "invalid",
+      sizeBytes,
+      details: `cannot read model file: ${error instanceof Error ? error.message : String(error)}`,
+    };
+  }
+}
+
 /**
  * Validate that a file is actually a GGUF model, not an HTML error page
  * from a proxy, firewall, or failed download.
  * Throws a descriptive error if the file is not valid GGUF.
  */
 function validateGgufFile(filePath: string, modelUri: string): void {
-  if (!existsSync(filePath)) return; // let downstream handle missing files
-
-  // Read header + sniff bytes in one go, then close immediately
-  const fd = openSync(filePath, "r");
-  const sniff = Buffer.alloc(512);
-  try {
-    readSync(fd, sniff, 0, 512, 0);
-  } finally {
-    closeSync(fd);
-  }
-
-  const header = sniff.subarray(0, 4);
-  if (header.equals(GGUF_MAGIC)) return; // valid GGUF
-
-  const text = sniff.toString("utf-8").toLowerCase();
-  const isHtml = text.includes("<!doctype") || text.includes("<html");
-  const got = header.toString("utf-8");
-  const sizeKB = (statSync(filePath).size / 1024).toFixed(0);
+  const inspection = inspectGgufFile(filePath);
+  if (!inspection.exists || inspection.valid) return; // let downstream handle missing files
 
   // Remove the bad file so the next attempt re-downloads
-  unlinkSync(filePath);
+  try {
+    unlinkSync(filePath);
+  } catch { /* best effort */ }
 
-  if (isHtml) {
+  if (inspection.kind === "html") {
     throw new Error(
-      `Downloaded model file is an HTML page, not a GGUF model (${sizeKB} KB).\n` +
+      `Downloaded model file is an HTML page, not a GGUF model (${formatModelFileSize(inspection.sizeBytes ?? 0)}).\n` +
       `Something is intercepting the download from huggingface.co (a proxy, firewall, or captive portal).\n\n` +
       `Model: ${modelUri}\n` +
       `Path:  ${filePath}\n\n` +
@@ -367,7 +437,7 @@ function validateGgufFile(filePath: string, modelUri: string): void {
   }
 
   throw new Error(
-    `Model file is not valid GGUF (expected magic "GGUF", got "${got}", file is ${sizeKB} KB).\n` +
+    `Model file is not valid GGUF (expected magic "GGUF", got "${inspection.magic ?? "unknown"}", file is ${formatModelFileSize(inspection.sizeBytes ?? 0)}).\n` +
     `Model: ${modelUri}\n` +
     `Path:  ${filePath}\n\n` +
     `The file has been removed. Run the command again to re-download.`
@@ -607,6 +677,11 @@ function resolveExpandContextSize(configValue?: number): number {
 
 const failedGpuInitModes = new Set<LlamaGpuMode>();
 let noGpuAccelerationWarningShown = false;
+let cpuForcedPrebuiltFallbackWarningShown = false;
+
+function isCpuModeRequested(): boolean {
+  return resolveLlamaGpuMode() === false;
+}
 
 export class LlamaCpp implements LLM {
   private readonly _ciMode = !!process.env.CI;
@@ -765,22 +840,44 @@ export class LlamaCpp implements LLM {
       const gpuMode = resolveLlamaGpuMode();
 
       const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
-      const loadLlama = async (gpu: LlamaGpuMode) =>
+      const loadLlama = async (gpu: LlamaGpuMode, sourceBuildAllowed = allowBuild) =>
         await withNativeStdoutRedirectedToStderr(() => getLlama({
-          build: allowBuild ? "autoAttempt" : "never",
+          // Prefer packaged prebuilt bindings before compiling llama.cpp locally.
+          // "autoAttempt" can try to compile a missing requested backend before
+          // falling back to another prebuilt backend; "auto" uses prebuilt/local
+          // binaries first and only builds when none are usable.
+          build: sourceBuildAllowed ? "auto" : "never",
           logLevel: LlamaLogLevel.error,
           gpu,
-          skipDownload: !allowBuild,
+          progressLogs: false,
+          skipDownload: !sourceBuildAllowed,
         }));
+      const loadCpuCompatibleLlama = async () => {
+        try {
+          return await loadLlama(false, false);
+        } catch (err) {
+          // Some platforms, notably Apple Silicon, ship a Metal prebuilt but no
+          // CPU-only prebuilt. Do a fast no-build lookup for an actual CPU
+          // binding first; if it does not exist, use the packaged auto/Metal
+          // binding and disable model offloading via gpuLayers: 0.
+          if (!cpuForcedPrebuiltFallbackWarningShown) {
+            cpuForcedPrebuiltFallbackWarningShown = true;
+            process.stderr.write(
+              `QMD Warning: CPU-only llama.cpp prebuilt not available (${err instanceof Error ? err.message : String(err)}); using packaged backend with GPU offloading disabled.\n`
+            );
+          }
+          return await loadLlama("auto", false);
+        }
+      };
 
       let llama: Llama;
-      if (gpuMode === false || failedGpuInitModes.has(gpuMode)) {
-        if (gpuMode !== false && failedGpuInitModes.has(gpuMode)) {
-          process.stderr.write(
-            `QMD Warning: skipping previously failed GPU init${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`}, using CPU.\n`
-          );
-        }
-        llama = await loadLlama(false);
+      if (gpuMode === false) {
+        llama = await loadCpuCompatibleLlama();
+      } else if (failedGpuInitModes.has(gpuMode)) {
+        process.stderr.write(
+          `QMD Warning: skipping previously failed GPU init${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`}, using CPU.\n`
+        );
+        llama = await loadCpuCompatibleLlama();
       } else {
         try {
           llama = await loadLlama(gpuMode);
@@ -792,7 +889,7 @@ export class LlamaCpp implements LLM {
           process.stderr.write(
             `QMD Warning: GPU init failed${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`} (${err instanceof Error ? err.message : String(err)}), falling back to CPU.\n`
           );
-          llama = await loadLlama(false);
+          llama = await loadCpuCompatibleLlama();
         }
       }
 
@@ -807,6 +904,17 @@ export class LlamaCpp implements LLM {
     return this.llama;
   }
 
+  private isCpuOffloadForced(): boolean {
+    return isCpuModeRequested();
+  }
+
+  private modelLoadOptions(modelPath: string): { modelPath: string; gpuLayers?: number } {
+    return {
+      modelPath,
+      ...(this.isCpuOffloadForced() ? { gpuLayers: 0 } : {}),
+    };
+  }
+
   /**
    * Resolve a model URI to a local path, downloading if needed.
    * Validates the downloaded file is actually a GGUF model (not an HTML error page
@@ -835,7 +943,7 @@ export class LlamaCpp implements LLM {
     this.embedModelLoadPromise = (async () => {
       const llama = await this.ensureLlama();
       const modelPath = await this.resolveModel(this.embedModelUri);
-      const model = await llama.loadModel({ modelPath });
+      const model = await llama.loadModel(this.modelLoadOptions(modelPath));
       this.embedModel = model;
       // Model loading counts as activity - ping to keep alive
       this.touchActivity();
@@ -861,7 +969,7 @@ export class LlamaCpp implements LLM {
   private async computeParallelism(perContextMB: number): Promise<number> {
     const llama = await this.ensureLlama();
 
-    if (llama.gpu) {
+    if (!this.isCpuOffloadForced() && llama.gpu) {
       try {
         const vram = await llama.getVramState();
         const freeMB = vram.free / (1024 * 1024);
@@ -886,7 +994,7 @@ export class LlamaCpp implements LLM {
    */
   private async threadsPerContext(parallelism: number): Promise<number> {
     const llama = await this.ensureLlama();
-    if (llama.gpu) return 0; // GPU: let the library decide
+    if (!this.isCpuOffloadForced() && llama.gpu) return 0; // GPU: let the library decide
     const cores = llama.cpuMathCores || 4;
     return Math.max(1, Math.floor(cores / parallelism));
   }
@@ -954,7 +1062,7 @@ export class LlamaCpp implements LLM {
       this.generateModelLoadPromise = (async () => {
         const llama = await this.ensureLlama();
         const modelPath = await this.resolveModel(this.generateModelUri);
-        const model = await llama.loadModel({ modelPath });
+        const model = await llama.loadModel(this.modelLoadOptions(modelPath));
         this.generateModel = model;
         return model;
       })();
@@ -986,7 +1094,7 @@ export class LlamaCpp implements LLM {
     this.rerankModelLoadPromise = (async () => {
       const llama = await this.ensureLlama();
       const modelPath = await this.resolveModel(this.rerankModelUri);
-      const model = await llama.loadModel({ modelPath });
+      const model = await llama.loadModel(this.modelLoadOptions(modelPath));
       this.rerankModel = model;
       // Model loading counts as activity - ping to keep alive
       this.touchActivity();
@@ -1489,17 +1597,18 @@ export class LlamaCpp implements LLM {
     cpuCores: number;
   }> {
     const llama = await this.ensureLlama(options.allowBuild ?? true);
-    const gpuDevices = await llama.getGpuDeviceNames();
+    const cpuForced = this.isCpuOffloadForced();
+    const gpuDevices = cpuForced ? [] : await llama.getGpuDeviceNames();
     let vram: { total: number; used: number; free: number } | undefined;
-    if (llama.gpu) {
+    if (!cpuForced && llama.gpu) {
       try {
         const state = await llama.getVramState();
         vram = { total: state.total, used: state.used, free: state.free };
       } catch { /* no vram info */ }
     }
     return {
-      gpu: llama.gpu,
-      gpuOffloading: llama.supportsGpuOffloading,
+      gpu: cpuForced ? false : llama.gpu,
+      gpuOffloading: !cpuForced && llama.supportsGpuOffloading,
       gpuDevices,
       vram,
       cpuCores: llama.cpuMathCores,
diff --git a/test/cli.test.ts b/test/cli.test.ts
index 740f447..0d723af 100644
--- a/test/cli.test.ts
+++ b/test/cli.test.ts
@@ -16,6 +16,7 @@ import { setTimeout as sleep } from "timers/promises";
 import { buildEditorUri, termLink, resolveEmbedModelForCli } from "../src/cli/qmd.ts";
 import { openDatabase } from "../src/db.ts";
 import { DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI } from "../src/llm.ts";
+import { setConfigSource } from "../src/collections.ts";
 
 // Test fixtures directory and database path
 let testDir: string;
@@ -311,13 +312,15 @@ describe("CLI Skills", () => {
 });
 
 describe("CLI Embed", () => {
-  test("prefers QMD_EMBED_MODEL for qmd embed", () => {
+  test("prefers QMD_EMBED_MODEL for qmd embed when the index has no model pin", () => {
     const prev = process.env.QMD_EMBED_MODEL;
     process.env.QMD_EMBED_MODEL = "hf:env/embed-model.gguf";
+    setConfigSource({ config: { collections: {} } });
 
     try {
       expect(resolveEmbedModelForCli()).toBe("hf:env/embed-model.gguf");
     } finally {
+      setConfigSource();
       if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
       else process.env.QMD_EMBED_MODEL = prev;
     }
@@ -326,10 +329,12 @@ describe("CLI Embed", () => {
   test("falls back to the default embed model when QMD_EMBED_MODEL is unset", () => {
     const prev = process.env.QMD_EMBED_MODEL;
     delete process.env.QMD_EMBED_MODEL;
+    setConfigSource({ config: { collections: {} } });
 
     try {
       expect(resolveEmbedModelForCli()).toBe(DEFAULT_EMBED_MODEL_URI);
     } finally {
+      setConfigSource();
       if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
       else process.env.QMD_EMBED_MODEL = prev;
     }
@@ -429,6 +434,36 @@ describe("CLI Skill Commands", () => {
   });
 });
 
+describe("CLI Init Command", () => {
+  test("creates a project-local .qmd index", async () => {
+    const projectDir = join(testDir, "init-project");
+    await mkdir(projectDir, { recursive: true });
+
+    const { stdout, exitCode } = await runQmd(["init"], { cwd: projectDir });
+    expect(exitCode).toBe(0);
+    expect(stdout.trim()).toBe("ready to go with new local index");
+    expect(existsSync(join(projectDir, ".qmd", "index.yml"))).toBe(true);
+    expect(existsSync(join(projectDir, ".qmd", "index.sqlite"))).toBe(true);
+    const configText = readFileSync(join(projectDir, ".qmd", "index.yml"), "utf-8");
+    expect(configText).toContain("collections: {}");
+    expect(configText).toContain("models:");
+  });
+
+  test("refuses to initialize in HOME", async () => {
+    const fakeHome = join(testDir, "init-home");
+    await mkdir(fakeHome, { recursive: true });
+
+    const { stderr, exitCode } = await runQmd(["init"], {
+      cwd: fakeHome,
+      env: { HOME: fakeHome },
+    });
+    expect(exitCode).toBe(1);
+    expect(stderr).toContain("Refusing to initialize a local index in $HOME");
+    expect(stderr).toContain("global index is automatically created");
+    expect(existsSync(join(fakeHome, ".qmd", "index.yml"))).toBe(false);
+  });
+});
+
 describe("CLI Add Command", () => {
   test("adds files from current directory", async () => {
     const { stdout, exitCode } = await runQmd(["collection", "add", "."]);
@@ -491,7 +526,28 @@ describe("CLI Status Command", () => {
     expect(configText).toContain(DEFAULT_EMBED_MODEL_URI);
     expect(configText).toContain(DEFAULT_GENERATE_MODEL_URI);
     expect(configText).toContain(DEFAULT_RERANK_MODEL_URI);
-  });
+  }, 20000);
+
+  test("qmd doctor warns when no collections are configured", async () => {
+    const env = await createIsolatedTestEnv("doctor-no-collections");
+    const { stdout, exitCode } = await runQmd(["doctor"], { dbPath: env.dbPath, configDir: env.configDir });
+    expect(exitCode).toBe(0);
+    expect(stdout).toContain("index config");
+    expect(stdout).toContain("no collections configured");
+    expect(stdout).toContain("qmd collection add .");
+  }, 20000);
+
+  test("qmd doctor reports invalid index.yml without crashing", async () => {
+    const env = await createIsolatedTestEnv("doctor-invalid-config");
+    await writeFile(join(env.configDir, "index.yml"), "collections:\n  bad: [unterminated\n");
+
+    const { stdout, exitCode } = await runQmd(["doctor"], { dbPath: env.dbPath, configDir: env.configDir });
+    expect(exitCode).toBe(0);
+    expect(stdout).toContain("index config");
+    expect(stdout).toContain("invalid index.yml at");
+    expect(stdout).toContain(join(env.configDir, "index.yml"));
+    expect(stdout).toContain("fix the YAML");
+  }, 20000);
 
   test("qmd doctor warns when configured models differ from code defaults", async () => {
     const env = await createIsolatedTestEnv("doctor-custom-models");
@@ -504,7 +560,32 @@ describe("CLI Status Command", () => {
     expect(stdout).toContain("index hf:example/custom-embed/custom.gguf");
     expect(stdout).toContain("might be ok");
     expect(stdout).toContain("qmd pull");
-  });
+  }, 20000);
+
+  test("qmd doctor identifies cached non-GGUF model files", async () => {
+    const env = await createIsolatedTestEnv("doctor-invalid-model-cache");
+    const model = "hf:example/custom-model/custom.gguf";
+    await writeFile(join(env.configDir, "index.yml"), `collections: {}\nmodels:\n  embed: ${model}\n  generate: ${model}\n  rerank: ${model}\n`);
+    const cacheRoot = join(env.configDir, "cache");
+    const modelCacheDir = join(cacheRoot, "qmd", "models");
+    await mkdir(modelCacheDir, { recursive: true });
+    const badModelPath = join(modelCacheDir, "custom.gguf");
+    await writeFile(badModelPath, "<!doctype html><html>blocked</html>");
+
+    const { stdout, exitCode } = await runQmd(["doctor"], {
+      dbPath: env.dbPath,
+      configDir: env.configDir,
+      env: {
+        XDG_CACHE_HOME: cacheRoot,
+        QMD_DOCTOR_DEVICE_PROBE: "0",
+      },
+    });
+    expect(exitCode).toBe(0);
+    expect(stdout).toContain("model cache");
+    expect(stdout).toContain("invalid 1");
+    expect(stdout).toContain("HTML page, not a GGUF model");
+    expect(stdout).toContain("qmd pull --refresh");
+  }, 20000);
 
   test("qmd doctor says when models are overridden by env", async () => {
     const env = await createIsolatedTestEnv("doctor-env-models");
@@ -523,7 +604,60 @@ describe("CLI Status Command", () => {
     expect(stdout).toContain("environment overrides");
     expect(stdout).toContain(`QMD_EMBED_MODEL=${customEmbed}`);
     expect(stdout).toContain("sets the active embed model");
-  });
+  }, 20000);
+
+  test("qmd doctor shows CPU-forced device mode with QMD_FORCE_CPU=1", async () => {
+    const env = await createIsolatedTestEnv("doctor-force-cpu");
+    const { stdout, exitCode } = await runQmd(["doctor"], {
+      dbPath: env.dbPath,
+      configDir: env.configDir,
+      env: {
+        QMD_FORCE_CPU: "1",
+        QMD_DOCTOR_DEVICE_PROBE: "0",
+      },
+    });
+    expect(exitCode).toBe(0);
+    expect(stdout).toContain("QMD_FORCE_CPU=1");
+    expect(stdout).toContain("forces llama.cpp to bypass GPU backends");
+    expect(stdout).toContain("device mode: CPU forced (QMD_FORCE_CPU)");
+  }, 20000);
+
+  test("qmd doctor lists known environment overrides and consequences", async () => {
+    const env = await createIsolatedTestEnv("doctor-env-overrides");
+    const overrides = {
+      XDG_CACHE_HOME: join(env.configDir, "cache"),
+      QMD_DOCTOR_DEVICE_PROBE: "0",
+      QMD_STATUS_DEVICE_PROBE: "1",
+      QMD_FORCE_CPU: "1",
+      QMD_LLAMA_GPU: "metal",
+      QMD_EMBED_PARALLELISM: "2",
+      QMD_EXPAND_CONTEXT_SIZE: "4096",
+      QMD_RERANK_CONTEXT_SIZE: "8192",
+      QMD_EMBED_CONTEXT_SIZE: "1024",
+      QMD_EDITOR_URI: "vscode://file/{file}:{line}:{col}",
+      QMD_SKILLS_DIR: "/tmp/qmd-skills",
+      QMD_DISABLE_DARWIN_QUERY_JSON_SAFE_EXIT: "1",
+      NO_COLOR: "1",
+      CI: "1",
+      HF_ENDPOINT: "https://hf-mirror.com",
+      WSL_DISTRO_NAME: "Ubuntu",
+      WSL_INTEROP: "1",
+    };
+
+    const { stdout, exitCode } = await runQmd(["doctor"], {
+      dbPath: env.dbPath,
+      configDir: env.configDir,
+      env: overrides,
+    });
+    expect(exitCode).toBe(0);
+    for (const name of Object.keys(overrides)) {
+      expect(stdout).toContain(name);
+    }
+    expect(stdout).toContain("forces llama.cpp to bypass GPU backends");
+    expect(stdout).toContain("moves the default index cache");
+    expect(stdout).toContain("disables real LLM operations");
+    expect(stdout).toContain("changes Hugging Face download endpoint");
+  }, 20000);
 
   test("qmd doctor flags mixed embedding fingerprints", async () => {
     const db = openDatabase(testDbPath);
@@ -538,7 +672,7 @@ describe("CLI Status Command", () => {
     expect(exitCode).toBe(0);
     expect(stdout).toContain("embedding fingerprints");
     expect(stdout).toContain("stale1");
-  });
+  }, 20000);
 
   test("shows index status", async () => {
     const { stdout, exitCode } = await runQmd(["status"]);
@@ -1620,7 +1754,7 @@ describe("status and collection list hide filesystem paths", () => {
     const lines = stdout.split('\n').filter(l => !l.includes('Index:'));
     const pathLines = lines.filter(l => l.includes('/Users/') || l.includes('/home/') || l.includes('/tmp/'));
     expect(pathLines.length).toBe(0);
-  });
+  }, 20000);
 
   test("collection list does not show full filesystem paths", async () => {
     const { stdout, exitCode } = await runQmd(["collection", "list"], { dbPath: localDbPath, configDir: localConfigDir });
diff --git a/test/collections-config.test.ts b/test/collections-config.test.ts
index 3dd926b..ead770e 100644
--- a/test/collections-config.test.ts
+++ b/test/collections-config.test.ts
@@ -6,9 +6,11 @@
  */
 
 import { describe, test, expect, beforeEach, afterEach } from "vitest";
+import { mkdtemp, rm, writeFile } from "fs/promises";
+import { tmpdir } from "os";
 import { join } from "path";
 import { qmdHomedir } from "../src/paths.js";
-import { getConfigPath, setConfigIndexName } from "../src/collections.js";
+import { getConfigPath, loadConfig, setConfigIndexName } from "../src/collections.js";
 
 // Save/restore env vars around each test
 let savedEnv: Record<string, string | undefined>;
@@ -82,4 +84,15 @@ describe("getConfigDir via getConfigPath", () => {
     setConfigIndexName("myindex");
     expect(getConfigPath()).toBe(join("/xdg/config", "qmd", "myindex.yml"));
   });
+
+  test("loadConfig treats an empty YAML file as an empty config", async () => {
+    const dir = await mkdtemp(join(tmpdir(), "qmd-empty-config-"));
+    try {
+      process.env.QMD_CONFIG_DIR = dir;
+      await writeFile(join(dir, "index.yml"), "");
+      expect(loadConfig()).toEqual({ collections: {} });
+    } finally {
+      await rm(dir, { recursive: true, force: true });
+    }
+  });
 });
diff --git a/test/llm.test.ts b/test/llm.test.ts
index 0ab1281..b6ee3ab 100644
--- a/test/llm.test.ts
+++ b/test/llm.test.ts
@@ -273,6 +273,63 @@ describe("native llama stdout containment", () => {
       else process.env.QMD_FORCE_CPU = prevForceCpu;
     }
   });
+
+  test("embeds hello world with QMD_FORCE_CPU=1 without throwing", async () => {
+    const prevGpu = process.env.QMD_LLAMA_GPU;
+    const prevForceCpu = process.env.QMD_FORCE_CPU;
+    process.env.QMD_FORCE_CPU = "1";
+    process.env.QMD_LLAMA_GPU = "metal";
+
+    const getEmbeddingFor = vi.fn(async (text: string) => ({
+      vector: new Float32Array([0.1, 0.2, 0.3]),
+      text,
+    }));
+    const createEmbeddingContext = vi.fn(async () => ({
+      getEmbeddingFor,
+      dispose: vi.fn(async () => {}),
+    }));
+    const loadModel = vi.fn(async () => ({
+      trainContextSize: 2048,
+      tokenize: (text: string) => Array.from(text),
+      detokenize: (tokens: string[]) => tokens.join(""),
+      createEmbeddingContext,
+      dispose: vi.fn(async () => {}),
+    }));
+    const getLlama = vi.fn(async (options: Record<string, unknown>) => ({
+      gpu: false,
+      cpuMathCores: 4,
+      loadModel,
+      dispose: vi.fn(async () => {}),
+    }) as any);
+
+    setNodeLlamaCppModuleForTest({
+      LlamaLogLevel: { error: "error" },
+      resolveModelFile: vi.fn(async () => "/tmp/nonexistent-model.gguf"),
+      LlamaChatSession: vi.fn() as any,
+      getLlama,
+    });
+
+    const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
+    const llm = new LlamaCpp();
+    try {
+      const result = await llm.embed("hello world");
+      expect(result).toEqual({
+        embedding: [0.10000000149011612, 0.20000000298023224, 0.30000001192092896],
+        model: llm.embedModelName,
+      });
+      expect(getLlama).toHaveBeenCalledWith(expect.objectContaining({ gpu: false, build: "never" }));
+      expect(loadModel).toHaveBeenCalledWith(expect.objectContaining({ gpuLayers: 0 }));
+      expect(getEmbeddingFor).toHaveBeenCalledWith("hello world");
+    } finally {
+      await llm.dispose();
+      stderrSpy.mockRestore();
+      setNodeLlamaCppModuleForTest(null);
+      if (prevGpu === undefined) delete process.env.QMD_LLAMA_GPU;
+      else process.env.QMD_LLAMA_GPU = prevGpu;
+      if (prevForceCpu === undefined) delete process.env.QMD_FORCE_CPU;
+      else process.env.QMD_FORCE_CPU = prevForceCpu;
+    }
+  });
 });
 
 describe("LLM context parallelism safety", () => {