feat: add local init and doctor diagnostics

This commit is contained in:
Tobi Lutke 2026-05-19 14:27:33 -04:00
parent 5cda3cf54c
commit d9348f43a0
No known key found for this signature in database
6 changed files with 488 additions and 83 deletions

View File

@ -5,7 +5,7 @@ import { execSync, spawn as nodeSpawn } from "child_process";
import { fileURLToPath } from "url";
import { basename, dirname, join as pathJoin, relative as relativePath, resolve as pathResolve } from "path";
import { parseArgs } from "util";
import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, readSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
import { createInterface } from "readline/promises";
import {
getPwd,
@ -81,7 +81,7 @@ import {
type ReindexResult,
type ChunkStrategy,
} from "../store.js";
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels } from "../llm.js";
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels, inspectGgufFile } from "../llm.js";
import {
formatSearchResults,
formatDocuments,
@ -107,6 +107,8 @@ import {
getLocalDbPath,
getConfigPath,
configExists,
type CollectionConfig,
type ModelsConfig,
} from "../collections.js";
// NOTE: enableProductionMode() is intentionally NOT called at module scope here.
@ -393,6 +395,47 @@ function formatBytes(bytes: number): string {
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
}
function sameDirectory(a: string, b: string): boolean {
try {
return realpathSync(a) === realpathSync(b);
} catch {
return pathResolve(a) === pathResolve(b);
}
}
function initLocalIndex(): void {
const cwd = getPwd();
if (sameDirectory(cwd, homedir())) {
throw new Error("Refusing to initialize a local index in $HOME. The global index is automatically created; run `qmd collection add <path>` for the global index, or run `qmd init` inside a project folder.");
}
const qmdDir = pathJoin(cwd, ".qmd");
const ymlPath = pathJoin(qmdDir, "index.yml");
const yamlPath = pathJoin(qmdDir, "index.yaml");
const configPath = existsSync(yamlPath) ? yamlPath : ymlPath;
const dbPath = pathJoin(qmdDir, "index.sqlite");
mkdirSync(qmdDir, { recursive: true });
setConfigSource({ configPath });
storeDbPathOverride = dbPath;
closeDb();
if (!existsSync(configPath)) {
saveConfig({
collections: {},
models: resolveModels(),
});
} else {
ensureModelsConfiguredForCli();
}
const localStore = createStore(dbPath);
syncConfigToDb(localStore.db, loadConfig());
localStore.close();
console.log("ready to go with new local index");
}
function isForceCpuEnabled(): boolean {
const value = process.env.QMD_FORCE_CPU;
return !!value && !["false", "off", "none", "disable", "disabled", "0"].includes(value.trim().toLowerCase());
@ -3183,6 +3226,7 @@ function showHelp(): void {
console.log(" qmd ls [collection[/path]] - Inspect indexed files");
console.log("");
console.log("Maintenance:");
console.log(" qmd init - Create a project-local .qmd index");
console.log(" qmd status - View index + collection health");
console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
console.log(" qmd embed [-f] [-c <name>] - Generate/refresh vector embeddings");
@ -3314,35 +3358,35 @@ function cosineDistance(a: ArrayLike<number>, b: ArrayLike<number>): number {
return 1 - (dot / (Math.sqrt(normA) * Math.sqrt(normB)));
}
function isGgufFile(path: string): boolean {
if (!existsSync(path)) return false;
let fd: number | null = null;
try {
fd = openSync(path, "r");
const header = Buffer.alloc(4);
readSync(fd, header, 0, 4, 0);
return header.toString("utf-8") === "GGUF";
} catch {
return false;
} finally {
if (fd !== null) closeSync(fd);
}
type CachedModelInspection = {
path: string | null;
invalid: string[];
};
function formatModelDiagnosticPath(path: string): string {
return sanitizeDiagnosticMessage(path);
}
function findCachedModelPath(model: string): string | null {
function findCachedModelInspection(model: string): CachedModelInspection {
const invalid: string[] = [];
if (model.startsWith("hf:")) {
const filename = model.split("/").pop();
if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR)) return null;
if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR)) return { path: null, invalid };
const entries = readdirSync(DEFAULT_MODEL_CACHE_DIR, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isFile() || !entry.name.includes(filename)) continue;
const candidate = pathJoin(DEFAULT_MODEL_CACHE_DIR, entry.name);
if (isGgufFile(candidate)) return candidate;
const inspection = inspectGgufFile(candidate);
if (inspection.valid) return { path: candidate, invalid };
invalid.push(`${formatModelDiagnosticPath(candidate)}: ${inspection.details}`);
}
return null;
return { path: null, invalid };
}
return existsSync(model) && isGgufFile(model) ? model : null;
const inspection = inspectGgufFile(model);
if (inspection.valid) return { path: model, invalid };
if (inspection.exists) invalid.push(`${formatModelDiagnosticPath(model)}: ${inspection.details}`);
return { path: null, invalid };
}
type EnvOverride = {
@ -3356,8 +3400,7 @@ function envValueForDisplay(value: string): string {
return sanitized.length > 96 ? `${sanitized.slice(0, 93)}...` : sanitized;
}
function collectEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }): EnvOverride[] {
const configModels = loadConfig().models ?? {};
function collectEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): EnvOverride[] {
const overrides: EnvOverride[] = [];
const add = (name: string, consequence: string) => {
const raw = process.env[name]?.trim();
@ -3401,8 +3444,33 @@ function collectEnvironmentOverrides(activeModels: { embed: string; generate: st
return overrides;
}
function checkEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }): void {
const overrides = collectEnvironmentOverrides(activeModels);
type DoctorConfigCheck = {
config: CollectionConfig | null;
valid: boolean;
};
function checkDoctorIndexConfig(nextSteps: string[]): DoctorConfigCheck {
try {
const config = loadConfig();
const collectionCount = Object.keys(config.collections ?? {}).length;
if (collectionCount === 0) {
doctorCheck("index config", false, "no collections configured. Next: `qmd collection add .`");
nextSteps.push("Run `qmd collection add . --name <name>` from the folder you want to index, or edit .qmd/index.yml manually.");
} else {
doctorCheck("index config", true, `${formatCount(collectionCount)} ${collectionCount === 1 ? "collection" : "collections"} configured`);
}
return { config, valid: true };
} catch (error) {
const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
const configPath = getConfigPath();
doctorCheck("index config", false, `invalid index.yml at ${configPath}: ${message}. Next: fix the YAML and rerun \`qmd doctor\``);
nextSteps.push(`Fix invalid YAML in ${configPath}, then rerun \`qmd doctor\`.`);
return { config: null, valid: false };
}
}
function checkEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): void {
const overrides = collectEnvironmentOverrides(activeModels, configModels);
if (overrides.length === 0) {
doctorCheck("environment overrides", true, "none");
return;
@ -3414,8 +3482,7 @@ function checkEnvironmentOverrides(activeModels: { embed: string; generate: stri
}
}
function checkModelDefaults(activeModels: { embed: string; generate: string; rerank: string }, _nextSteps: string[]): void {
const configModels = loadConfig().models ?? {};
function checkModelDefaults(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): void {
const checks = [
{ role: "embedding", key: "embed", active: activeModels.embed, configured: configModels.embed, defaultModel: DEFAULT_EMBED_MODEL, envName: "QMD_EMBED_MODEL", envValue: process.env.QMD_EMBED_MODEL },
{ role: "generation", key: "generate", active: activeModels.generate, configured: configModels.generate, defaultModel: DEFAULT_QUERY_MODEL, envName: "QMD_GENERATE_MODEL", envValue: process.env.QMD_GENERATE_MODEL },
@ -3455,20 +3522,33 @@ function checkModelCache(activeModels: { embed: string; generate: string; rerank
const missing: string[] = [];
const cached: string[] = [];
const invalid: string[] = [];
for (const [model, roles] of unique) {
const label = `${roles.join("+")}: ${model}`;
const path = findCachedModelPath(model);
if (path) {
const inspection = findCachedModelInspection(model);
invalid.push(...inspection.invalid.map(detail => `${label} (${detail})`));
if (inspection.path) {
cached.push(label);
} else {
missing.push(label);
}
}
if (missing.length === 0) {
doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded`);
if (missing.length === 0 && invalid.length === 0) {
doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded and valid GGUF`);
return;
}
const parts: string[] = [];
if (invalid.length > 0) parts.push(`invalid ${invalid.length}: ${invalid.join("; ")}`);
if (missing.length > 0) parts.push(`missing ${missing.length}/${unique.size}: ${missing.join("; ")}`);
const next = invalid.length > 0
? "Next: run `qmd pull --refresh` (or remove the bad cached file)"
: "Next: run `qmd pull`";
doctorCheck("model cache", false, `${parts.join("; ")}. ${next}`);
if (invalid.length > 0) {
nextSteps.push("Run `qmd pull --refresh` to replace invalid cached model files, or delete the listed file and rerun `qmd pull`.");
} else {
doctorCheck("model cache", false, `missing ${missing.length}/${unique.size}: ${missing.join("; ")}. Next: run \`qmd pull\``);
nextSteps.push("Run `qmd pull` to download missing embedding/generation/reranking models before `qmd embed` or `qmd query`.");
}
}
@ -3624,8 +3704,10 @@ async function showDoctor(): Promise<void> {
doctorCheck("sqlite-vec", false, error instanceof Error ? error.message : String(error));
}
checkEnvironmentOverrides(activeModels);
checkModelDefaults(activeModels, nextSteps);
const configCheck = checkDoctorIndexConfig(nextSteps);
const configModels = configCheck.config?.models ?? {};
checkEnvironmentOverrides(activeModels, configModels);
checkModelDefaults(activeModels, configModels);
checkModelCache(activeModels, nextSteps);
await runDoctorDeviceChecks(nextSteps);
@ -4015,6 +4097,15 @@ if (isMain) {
break;
}
case "init":
try {
initLocalIndex();
} catch (error) {
console.error(error instanceof Error ? error.message : String(error));
process.exit(1);
}
break;
case "status":
await showStatus();
break;

View File

@ -187,7 +187,8 @@ export function loadConfig(): CollectionConfig {
try {
const content = readFileSync(configPath, "utf-8");
const config = YAML.parse(content) as CollectionConfig;
const parsed = YAML.parse(content) as CollectionConfig | null | undefined;
const config = parsed ?? { collections: {} };
// Ensure collections object exists
if (!config.collections) {

View File

@ -32,6 +32,7 @@ export function setNodeLlamaCppModuleForTest(module: NodeLlamaCppModule | null):
nodeLlamaCppImport = module ? Promise.resolve(module) : null;
failedGpuInitModes.clear();
noGpuAccelerationWarningShown = false;
cpuForcedPrebuiltFallbackWarningShown = false;
}
type StdoutWrite = typeof process.stdout.write;
@ -324,37 +325,106 @@ async function getRemoteEtag(ref: HfRef): Promise<string | null> {
const GGUF_MAGIC = Buffer.from("GGUF");
export type GgufFileInspection = {
exists: boolean;
valid: boolean;
kind: "missing" | "gguf" | "html" | "invalid";
sizeBytes?: number;
magic?: string;
details: string;
};
function formatModelFileSize(sizeBytes: number): string {
return `${(sizeBytes / 1024).toFixed(0)} KB`;
}
function printableMagic(header: Buffer): string {
const text = header.toString("utf-8");
return /^[\x20-\x7e]{1,4}$/.test(text) ? text : `0x${header.toString("hex")}`;
}
/**
* Inspect a potential GGUF model file without mutating it.
* Used by doctor for early diagnostics and by runtime validation before load.
*/
export function inspectGgufFile(filePath: string): GgufFileInspection {
if (!existsSync(filePath)) {
return { exists: false, valid: false, kind: "missing", details: "file does not exist" };
}
let sizeBytes = 0;
try {
sizeBytes = statSync(filePath).size;
const fd = openSync(filePath, "r");
const sniff = Buffer.alloc(512);
try {
readSync(fd, sniff, 0, 512, 0);
} finally {
closeSync(fd);
}
const header = sniff.subarray(0, 4);
if (header.equals(GGUF_MAGIC)) {
return {
exists: true,
valid: true,
kind: "gguf",
sizeBytes,
magic: "GGUF",
details: `valid GGUF (${formatModelFileSize(sizeBytes)})`,
};
}
const magic = printableMagic(header);
const text = sniff.toString("utf-8").toLowerCase();
const isHtml = text.includes("<!doctype") || text.includes("<html");
if (isHtml) {
return {
exists: true,
valid: false,
kind: "html",
sizeBytes,
magic,
details: `HTML page, not a GGUF model (${formatModelFileSize(sizeBytes)}); likely proxy/firewall/captive portal response`,
};
}
return {
exists: true,
valid: false,
kind: "invalid",
sizeBytes,
magic,
details: `not valid GGUF (expected magic "GGUF", got "${magic}", ${formatModelFileSize(sizeBytes)})`,
};
} catch (error) {
return {
exists: true,
valid: false,
kind: "invalid",
sizeBytes,
details: `cannot read model file: ${error instanceof Error ? error.message : String(error)}`,
};
}
}
/**
* Validate that a file is actually a GGUF model, not an HTML error page
* from a proxy, firewall, or failed download.
* Throws a descriptive error if the file is not valid GGUF.
*/
function validateGgufFile(filePath: string, modelUri: string): void {
if (!existsSync(filePath)) return; // let downstream handle missing files
// Read header + sniff bytes in one go, then close immediately
const fd = openSync(filePath, "r");
const sniff = Buffer.alloc(512);
try {
readSync(fd, sniff, 0, 512, 0);
} finally {
closeSync(fd);
}
const header = sniff.subarray(0, 4);
if (header.equals(GGUF_MAGIC)) return; // valid GGUF
const text = sniff.toString("utf-8").toLowerCase();
const isHtml = text.includes("<!doctype") || text.includes("<html");
const got = header.toString("utf-8");
const sizeKB = (statSync(filePath).size / 1024).toFixed(0);
const inspection = inspectGgufFile(filePath);
if (!inspection.exists || inspection.valid) return; // let downstream handle missing files
// Remove the bad file so the next attempt re-downloads
unlinkSync(filePath);
try {
unlinkSync(filePath);
} catch { /* best effort */ }
if (isHtml) {
if (inspection.kind === "html") {
throw new Error(
`Downloaded model file is an HTML page, not a GGUF model (${sizeKB} KB).\n` +
`Downloaded model file is an HTML page, not a GGUF model (${formatModelFileSize(inspection.sizeBytes ?? 0)}).\n` +
`Something is intercepting the download from huggingface.co (a proxy, firewall, or captive portal).\n\n` +
`Model: ${modelUri}\n` +
`Path: ${filePath}\n\n` +
@ -367,7 +437,7 @@ function validateGgufFile(filePath: string, modelUri: string): void {
}
throw new Error(
`Model file is not valid GGUF (expected magic "GGUF", got "${got}", file is ${sizeKB} KB).\n` +
`Model file is not valid GGUF (expected magic "GGUF", got "${inspection.magic ?? "unknown"}", file is ${formatModelFileSize(inspection.sizeBytes ?? 0)}).\n` +
`Model: ${modelUri}\n` +
`Path: ${filePath}\n\n` +
`The file has been removed. Run the command again to re-download.`
@ -607,6 +677,11 @@ function resolveExpandContextSize(configValue?: number): number {
const failedGpuInitModes = new Set<LlamaGpuMode>();
let noGpuAccelerationWarningShown = false;
let cpuForcedPrebuiltFallbackWarningShown = false;
function isCpuModeRequested(): boolean {
return resolveLlamaGpuMode() === false;
}
export class LlamaCpp implements LLM {
private readonly _ciMode = !!process.env.CI;
@ -765,22 +840,44 @@ export class LlamaCpp implements LLM {
const gpuMode = resolveLlamaGpuMode();
const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
const loadLlama = async (gpu: LlamaGpuMode) =>
const loadLlama = async (gpu: LlamaGpuMode, sourceBuildAllowed = allowBuild) =>
await withNativeStdoutRedirectedToStderr(() => getLlama({
build: allowBuild ? "autoAttempt" : "never",
// Prefer packaged prebuilt bindings before compiling llama.cpp locally.
// "autoAttempt" can try to compile a missing requested backend before
// falling back to another prebuilt backend; "auto" uses prebuilt/local
// binaries first and only builds when none are usable.
build: sourceBuildAllowed ? "auto" : "never",
logLevel: LlamaLogLevel.error,
gpu,
skipDownload: !allowBuild,
progressLogs: false,
skipDownload: !sourceBuildAllowed,
}));
const loadCpuCompatibleLlama = async () => {
try {
return await loadLlama(false, false);
} catch (err) {
// Some platforms, notably Apple Silicon, ship a Metal prebuilt but no
// CPU-only prebuilt. Do a fast no-build lookup for an actual CPU
// binding first; if it does not exist, use the packaged auto/Metal
// binding and disable model offloading via gpuLayers: 0.
if (!cpuForcedPrebuiltFallbackWarningShown) {
cpuForcedPrebuiltFallbackWarningShown = true;
process.stderr.write(
`QMD Warning: CPU-only llama.cpp prebuilt not available (${err instanceof Error ? err.message : String(err)}); using packaged backend with GPU offloading disabled.\n`
);
}
return await loadLlama("auto", false);
}
};
let llama: Llama;
if (gpuMode === false || failedGpuInitModes.has(gpuMode)) {
if (gpuMode !== false && failedGpuInitModes.has(gpuMode)) {
process.stderr.write(
`QMD Warning: skipping previously failed GPU init${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`}, using CPU.\n`
);
}
llama = await loadLlama(false);
if (gpuMode === false) {
llama = await loadCpuCompatibleLlama();
} else if (failedGpuInitModes.has(gpuMode)) {
process.stderr.write(
`QMD Warning: skipping previously failed GPU init${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`}, using CPU.\n`
);
llama = await loadCpuCompatibleLlama();
} else {
try {
llama = await loadLlama(gpuMode);
@ -792,7 +889,7 @@ export class LlamaCpp implements LLM {
process.stderr.write(
`QMD Warning: GPU init failed${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`} (${err instanceof Error ? err.message : String(err)}), falling back to CPU.\n`
);
llama = await loadLlama(false);
llama = await loadCpuCompatibleLlama();
}
}
@ -807,6 +904,17 @@ export class LlamaCpp implements LLM {
return this.llama;
}
private isCpuOffloadForced(): boolean {
return isCpuModeRequested();
}
private modelLoadOptions(modelPath: string): { modelPath: string; gpuLayers?: number } {
return {
modelPath,
...(this.isCpuOffloadForced() ? { gpuLayers: 0 } : {}),
};
}
/**
* Resolve a model URI to a local path, downloading if needed.
* Validates the downloaded file is actually a GGUF model (not an HTML error page
@ -835,7 +943,7 @@ export class LlamaCpp implements LLM {
this.embedModelLoadPromise = (async () => {
const llama = await this.ensureLlama();
const modelPath = await this.resolveModel(this.embedModelUri);
const model = await llama.loadModel({ modelPath });
const model = await llama.loadModel(this.modelLoadOptions(modelPath));
this.embedModel = model;
// Model loading counts as activity - ping to keep alive
this.touchActivity();
@ -861,7 +969,7 @@ export class LlamaCpp implements LLM {
private async computeParallelism(perContextMB: number): Promise<number> {
const llama = await this.ensureLlama();
if (llama.gpu) {
if (!this.isCpuOffloadForced() && llama.gpu) {
try {
const vram = await llama.getVramState();
const freeMB = vram.free / (1024 * 1024);
@ -886,7 +994,7 @@ export class LlamaCpp implements LLM {
*/
private async threadsPerContext(parallelism: number): Promise<number> {
const llama = await this.ensureLlama();
if (llama.gpu) return 0; // GPU: let the library decide
if (!this.isCpuOffloadForced() && llama.gpu) return 0; // GPU: let the library decide
const cores = llama.cpuMathCores || 4;
return Math.max(1, Math.floor(cores / parallelism));
}
@ -954,7 +1062,7 @@ export class LlamaCpp implements LLM {
this.generateModelLoadPromise = (async () => {
const llama = await this.ensureLlama();
const modelPath = await this.resolveModel(this.generateModelUri);
const model = await llama.loadModel({ modelPath });
const model = await llama.loadModel(this.modelLoadOptions(modelPath));
this.generateModel = model;
return model;
})();
@ -986,7 +1094,7 @@ export class LlamaCpp implements LLM {
this.rerankModelLoadPromise = (async () => {
const llama = await this.ensureLlama();
const modelPath = await this.resolveModel(this.rerankModelUri);
const model = await llama.loadModel({ modelPath });
const model = await llama.loadModel(this.modelLoadOptions(modelPath));
this.rerankModel = model;
// Model loading counts as activity - ping to keep alive
this.touchActivity();
@ -1489,17 +1597,18 @@ export class LlamaCpp implements LLM {
cpuCores: number;
}> {
const llama = await this.ensureLlama(options.allowBuild ?? true);
const gpuDevices = await llama.getGpuDeviceNames();
const cpuForced = this.isCpuOffloadForced();
const gpuDevices = cpuForced ? [] : await llama.getGpuDeviceNames();
let vram: { total: number; used: number; free: number } | undefined;
if (llama.gpu) {
if (!cpuForced && llama.gpu) {
try {
const state = await llama.getVramState();
vram = { total: state.total, used: state.used, free: state.free };
} catch { /* no vram info */ }
}
return {
gpu: llama.gpu,
gpuOffloading: llama.supportsGpuOffloading,
gpu: cpuForced ? false : llama.gpu,
gpuOffloading: !cpuForced && llama.supportsGpuOffloading,
gpuDevices,
vram,
cpuCores: llama.cpuMathCores,

View File

@ -16,6 +16,7 @@ import { setTimeout as sleep } from "timers/promises";
import { buildEditorUri, termLink, resolveEmbedModelForCli } from "../src/cli/qmd.ts";
import { openDatabase } from "../src/db.ts";
import { DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI } from "../src/llm.ts";
import { setConfigSource } from "../src/collections.ts";
// Test fixtures directory and database path
let testDir: string;
@ -311,13 +312,15 @@ describe("CLI Skills", () => {
});
describe("CLI Embed", () => {
test("prefers QMD_EMBED_MODEL for qmd embed", () => {
test("prefers QMD_EMBED_MODEL for qmd embed when the index has no model pin", () => {
const prev = process.env.QMD_EMBED_MODEL;
process.env.QMD_EMBED_MODEL = "hf:env/embed-model.gguf";
setConfigSource({ config: { collections: {} } });
try {
expect(resolveEmbedModelForCli()).toBe("hf:env/embed-model.gguf");
} finally {
setConfigSource();
if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
else process.env.QMD_EMBED_MODEL = prev;
}
@ -326,10 +329,12 @@ describe("CLI Embed", () => {
test("falls back to the default embed model when QMD_EMBED_MODEL is unset", () => {
const prev = process.env.QMD_EMBED_MODEL;
delete process.env.QMD_EMBED_MODEL;
setConfigSource({ config: { collections: {} } });
try {
expect(resolveEmbedModelForCli()).toBe(DEFAULT_EMBED_MODEL_URI);
} finally {
setConfigSource();
if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
else process.env.QMD_EMBED_MODEL = prev;
}
@ -429,6 +434,36 @@ describe("CLI Skill Commands", () => {
});
});
describe("CLI Init Command", () => {
test("creates a project-local .qmd index", async () => {
const projectDir = join(testDir, "init-project");
await mkdir(projectDir, { recursive: true });
const { stdout, exitCode } = await runQmd(["init"], { cwd: projectDir });
expect(exitCode).toBe(0);
expect(stdout.trim()).toBe("ready to go with new local index");
expect(existsSync(join(projectDir, ".qmd", "index.yml"))).toBe(true);
expect(existsSync(join(projectDir, ".qmd", "index.sqlite"))).toBe(true);
const configText = readFileSync(join(projectDir, ".qmd", "index.yml"), "utf-8");
expect(configText).toContain("collections: {}");
expect(configText).toContain("models:");
});
test("refuses to initialize in HOME", async () => {
const fakeHome = join(testDir, "init-home");
await mkdir(fakeHome, { recursive: true });
const { stderr, exitCode } = await runQmd(["init"], {
cwd: fakeHome,
env: { HOME: fakeHome },
});
expect(exitCode).toBe(1);
expect(stderr).toContain("Refusing to initialize a local index in $HOME");
expect(stderr).toContain("global index is automatically created");
expect(existsSync(join(fakeHome, ".qmd", "index.yml"))).toBe(false);
});
});
describe("CLI Add Command", () => {
test("adds files from current directory", async () => {
const { stdout, exitCode } = await runQmd(["collection", "add", "."]);
@ -491,7 +526,28 @@ describe("CLI Status Command", () => {
expect(configText).toContain(DEFAULT_EMBED_MODEL_URI);
expect(configText).toContain(DEFAULT_GENERATE_MODEL_URI);
expect(configText).toContain(DEFAULT_RERANK_MODEL_URI);
});
}, 20000);
test("qmd doctor warns when no collections are configured", async () => {
const env = await createIsolatedTestEnv("doctor-no-collections");
const { stdout, exitCode } = await runQmd(["doctor"], { dbPath: env.dbPath, configDir: env.configDir });
expect(exitCode).toBe(0);
expect(stdout).toContain("index config");
expect(stdout).toContain("no collections configured");
expect(stdout).toContain("qmd collection add .");
}, 20000);
test("qmd doctor reports invalid index.yml without crashing", async () => {
const env = await createIsolatedTestEnv("doctor-invalid-config");
await writeFile(join(env.configDir, "index.yml"), "collections:\n bad: [unterminated\n");
const { stdout, exitCode } = await runQmd(["doctor"], { dbPath: env.dbPath, configDir: env.configDir });
expect(exitCode).toBe(0);
expect(stdout).toContain("index config");
expect(stdout).toContain("invalid index.yml at");
expect(stdout).toContain(join(env.configDir, "index.yml"));
expect(stdout).toContain("fix the YAML");
}, 20000);
test("qmd doctor warns when configured models differ from code defaults", async () => {
const env = await createIsolatedTestEnv("doctor-custom-models");
@ -504,7 +560,32 @@ describe("CLI Status Command", () => {
expect(stdout).toContain("index hf:example/custom-embed/custom.gguf");
expect(stdout).toContain("might be ok");
expect(stdout).toContain("qmd pull");
});
}, 20000);
test("qmd doctor identifies cached non-GGUF model files", async () => {
const env = await createIsolatedTestEnv("doctor-invalid-model-cache");
const model = "hf:example/custom-model/custom.gguf";
await writeFile(join(env.configDir, "index.yml"), `collections: {}\nmodels:\n embed: ${model}\n generate: ${model}\n rerank: ${model}\n`);
const cacheRoot = join(env.configDir, "cache");
const modelCacheDir = join(cacheRoot, "qmd", "models");
await mkdir(modelCacheDir, { recursive: true });
const badModelPath = join(modelCacheDir, "custom.gguf");
await writeFile(badModelPath, "<!doctype html><html>blocked</html>");
const { stdout, exitCode } = await runQmd(["doctor"], {
dbPath: env.dbPath,
configDir: env.configDir,
env: {
XDG_CACHE_HOME: cacheRoot,
QMD_DOCTOR_DEVICE_PROBE: "0",
},
});
expect(exitCode).toBe(0);
expect(stdout).toContain("model cache");
expect(stdout).toContain("invalid 1");
expect(stdout).toContain("HTML page, not a GGUF model");
expect(stdout).toContain("qmd pull --refresh");
}, 20000);
test("qmd doctor says when models are overridden by env", async () => {
const env = await createIsolatedTestEnv("doctor-env-models");
@ -523,7 +604,60 @@ describe("CLI Status Command", () => {
expect(stdout).toContain("environment overrides");
expect(stdout).toContain(`QMD_EMBED_MODEL=${customEmbed}`);
expect(stdout).toContain("sets the active embed model");
});
}, 20000);
test("qmd doctor shows CPU-forced device mode with QMD_FORCE_CPU=1", async () => {
const env = await createIsolatedTestEnv("doctor-force-cpu");
const { stdout, exitCode } = await runQmd(["doctor"], {
dbPath: env.dbPath,
configDir: env.configDir,
env: {
QMD_FORCE_CPU: "1",
QMD_DOCTOR_DEVICE_PROBE: "0",
},
});
expect(exitCode).toBe(0);
expect(stdout).toContain("QMD_FORCE_CPU=1");
expect(stdout).toContain("forces llama.cpp to bypass GPU backends");
expect(stdout).toContain("device mode: CPU forced (QMD_FORCE_CPU)");
}, 20000);
test("qmd doctor lists known environment overrides and consequences", async () => {
const env = await createIsolatedTestEnv("doctor-env-overrides");
const overrides = {
XDG_CACHE_HOME: join(env.configDir, "cache"),
QMD_DOCTOR_DEVICE_PROBE: "0",
QMD_STATUS_DEVICE_PROBE: "1",
QMD_FORCE_CPU: "1",
QMD_LLAMA_GPU: "metal",
QMD_EMBED_PARALLELISM: "2",
QMD_EXPAND_CONTEXT_SIZE: "4096",
QMD_RERANK_CONTEXT_SIZE: "8192",
QMD_EMBED_CONTEXT_SIZE: "1024",
QMD_EDITOR_URI: "vscode://file/{file}:{line}:{col}",
QMD_SKILLS_DIR: "/tmp/qmd-skills",
QMD_DISABLE_DARWIN_QUERY_JSON_SAFE_EXIT: "1",
NO_COLOR: "1",
CI: "1",
HF_ENDPOINT: "https://hf-mirror.com",
WSL_DISTRO_NAME: "Ubuntu",
WSL_INTEROP: "1",
};
const { stdout, exitCode } = await runQmd(["doctor"], {
dbPath: env.dbPath,
configDir: env.configDir,
env: overrides,
});
expect(exitCode).toBe(0);
for (const name of Object.keys(overrides)) {
expect(stdout).toContain(name);
}
expect(stdout).toContain("forces llama.cpp to bypass GPU backends");
expect(stdout).toContain("moves the default index cache");
expect(stdout).toContain("disables real LLM operations");
expect(stdout).toContain("changes Hugging Face download endpoint");
}, 20000);
test("qmd doctor flags mixed embedding fingerprints", async () => {
const db = openDatabase(testDbPath);
@ -538,7 +672,7 @@ describe("CLI Status Command", () => {
expect(exitCode).toBe(0);
expect(stdout).toContain("embedding fingerprints");
expect(stdout).toContain("stale1");
});
}, 20000);
test("shows index status", async () => {
const { stdout, exitCode } = await runQmd(["status"]);
@ -1620,7 +1754,7 @@ describe("status and collection list hide filesystem paths", () => {
const lines = stdout.split('\n').filter(l => !l.includes('Index:'));
const pathLines = lines.filter(l => l.includes('/Users/') || l.includes('/home/') || l.includes('/tmp/'));
expect(pathLines.length).toBe(0);
});
}, 20000);
test("collection list does not show full filesystem paths", async () => {
const { stdout, exitCode } = await runQmd(["collection", "list"], { dbPath: localDbPath, configDir: localConfigDir });

View File

@ -6,9 +6,11 @@
*/
import { describe, test, expect, beforeEach, afterEach } from "vitest";
import { mkdtemp, rm, writeFile } from "fs/promises";
import { tmpdir } from "os";
import { join } from "path";
import { qmdHomedir } from "../src/paths.js";
import { getConfigPath, setConfigIndexName } from "../src/collections.js";
import { getConfigPath, loadConfig, setConfigIndexName } from "../src/collections.js";
// Save/restore env vars around each test
let savedEnv: Record<string, string | undefined>;
@ -82,4 +84,15 @@ describe("getConfigDir via getConfigPath", () => {
setConfigIndexName("myindex");
expect(getConfigPath()).toBe(join("/xdg/config", "qmd", "myindex.yml"));
});
test("loadConfig treats an empty YAML file as an empty config", async () => {
const dir = await mkdtemp(join(tmpdir(), "qmd-empty-config-"));
try {
process.env.QMD_CONFIG_DIR = dir;
await writeFile(join(dir, "index.yml"), "");
expect(loadConfig()).toEqual({ collections: {} });
} finally {
await rm(dir, { recursive: true, force: true });
}
});
});

View File

@ -273,6 +273,63 @@ describe("native llama stdout containment", () => {
else process.env.QMD_FORCE_CPU = prevForceCpu;
}
});
test("embeds hello world with QMD_FORCE_CPU=1 without throwing", async () => {
const prevGpu = process.env.QMD_LLAMA_GPU;
const prevForceCpu = process.env.QMD_FORCE_CPU;
process.env.QMD_FORCE_CPU = "1";
process.env.QMD_LLAMA_GPU = "metal";
const getEmbeddingFor = vi.fn(async (text: string) => ({
vector: new Float32Array([0.1, 0.2, 0.3]),
text,
}));
const createEmbeddingContext = vi.fn(async () => ({
getEmbeddingFor,
dispose: vi.fn(async () => {}),
}));
const loadModel = vi.fn(async () => ({
trainContextSize: 2048,
tokenize: (text: string) => Array.from(text),
detokenize: (tokens: string[]) => tokens.join(""),
createEmbeddingContext,
dispose: vi.fn(async () => {}),
}));
const getLlama = vi.fn(async (options: Record<string, unknown>) => ({
gpu: false,
cpuMathCores: 4,
loadModel,
dispose: vi.fn(async () => {}),
}) as any);
setNodeLlamaCppModuleForTest({
LlamaLogLevel: { error: "error" },
resolveModelFile: vi.fn(async () => "/tmp/nonexistent-model.gguf"),
LlamaChatSession: vi.fn() as any,
getLlama,
});
const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
const llm = new LlamaCpp();
try {
const result = await llm.embed("hello world");
expect(result).toEqual({
embedding: [0.10000000149011612, 0.20000000298023224, 0.30000001192092896],
model: llm.embedModelName,
});
expect(getLlama).toHaveBeenCalledWith(expect.objectContaining({ gpu: false, build: "never" }));
expect(loadModel).toHaveBeenCalledWith(expect.objectContaining({ gpuLayers: 0 }));
expect(getEmbeddingFor).toHaveBeenCalledWith("hello world");
} finally {
await llm.dispose();
stderrSpy.mockRestore();
setNodeLlamaCppModuleForTest(null);
if (prevGpu === undefined) delete process.env.QMD_LLAMA_GPU;
else process.env.QMD_LLAMA_GPU = prevGpu;
if (prevForceCpu === undefined) delete process.env.QMD_FORCE_CPU;
else process.env.QMD_FORCE_CPU = prevForceCpu;
}
});
});
describe("LLM context parallelism safety", () => {