feat: add local init and doctor diagnostics
This commit is contained in:
parent
5cda3cf54c
commit
d9348f43a0
157
src/cli/qmd.ts
157
src/cli/qmd.ts
@ -5,7 +5,7 @@ import { execSync, spawn as nodeSpawn } from "child_process";
|
||||
import { fileURLToPath } from "url";
|
||||
import { basename, dirname, join as pathJoin, relative as relativePath, resolve as pathResolve } from "path";
|
||||
import { parseArgs } from "util";
|
||||
import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, readSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
|
||||
import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
|
||||
import { createInterface } from "readline/promises";
|
||||
import {
|
||||
getPwd,
|
||||
@ -81,7 +81,7 @@ import {
|
||||
type ReindexResult,
|
||||
type ChunkStrategy,
|
||||
} from "../store.js";
|
||||
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels } from "../llm.js";
|
||||
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels, inspectGgufFile } from "../llm.js";
|
||||
import {
|
||||
formatSearchResults,
|
||||
formatDocuments,
|
||||
@ -107,6 +107,8 @@ import {
|
||||
getLocalDbPath,
|
||||
getConfigPath,
|
||||
configExists,
|
||||
type CollectionConfig,
|
||||
type ModelsConfig,
|
||||
} from "../collections.js";
|
||||
|
||||
// NOTE: enableProductionMode() is intentionally NOT called at module scope here.
|
||||
@ -393,6 +395,47 @@ function formatBytes(bytes: number): string {
|
||||
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
|
||||
}
|
||||
|
||||
function sameDirectory(a: string, b: string): boolean {
|
||||
try {
|
||||
return realpathSync(a) === realpathSync(b);
|
||||
} catch {
|
||||
return pathResolve(a) === pathResolve(b);
|
||||
}
|
||||
}
|
||||
|
||||
function initLocalIndex(): void {
|
||||
const cwd = getPwd();
|
||||
if (sameDirectory(cwd, homedir())) {
|
||||
throw new Error("Refusing to initialize a local index in $HOME. The global index is automatically created; run `qmd collection add <path>` for the global index, or run `qmd init` inside a project folder.");
|
||||
}
|
||||
|
||||
const qmdDir = pathJoin(cwd, ".qmd");
|
||||
const ymlPath = pathJoin(qmdDir, "index.yml");
|
||||
const yamlPath = pathJoin(qmdDir, "index.yaml");
|
||||
const configPath = existsSync(yamlPath) ? yamlPath : ymlPath;
|
||||
const dbPath = pathJoin(qmdDir, "index.sqlite");
|
||||
|
||||
mkdirSync(qmdDir, { recursive: true });
|
||||
setConfigSource({ configPath });
|
||||
storeDbPathOverride = dbPath;
|
||||
closeDb();
|
||||
|
||||
if (!existsSync(configPath)) {
|
||||
saveConfig({
|
||||
collections: {},
|
||||
models: resolveModels(),
|
||||
});
|
||||
} else {
|
||||
ensureModelsConfiguredForCli();
|
||||
}
|
||||
|
||||
const localStore = createStore(dbPath);
|
||||
syncConfigToDb(localStore.db, loadConfig());
|
||||
localStore.close();
|
||||
|
||||
console.log("ready to go with new local index");
|
||||
}
|
||||
|
||||
function isForceCpuEnabled(): boolean {
|
||||
const value = process.env.QMD_FORCE_CPU;
|
||||
return !!value && !["false", "off", "none", "disable", "disabled", "0"].includes(value.trim().toLowerCase());
|
||||
@ -3183,6 +3226,7 @@ function showHelp(): void {
|
||||
console.log(" qmd ls [collection[/path]] - Inspect indexed files");
|
||||
console.log("");
|
||||
console.log("Maintenance:");
|
||||
console.log(" qmd init - Create a project-local .qmd index");
|
||||
console.log(" qmd status - View index + collection health");
|
||||
console.log(" qmd update [--pull] - Re-index collections (optionally git pull first)");
|
||||
console.log(" qmd embed [-f] [-c <name>] - Generate/refresh vector embeddings");
|
||||
@ -3314,35 +3358,35 @@ function cosineDistance(a: ArrayLike<number>, b: ArrayLike<number>): number {
|
||||
return 1 - (dot / (Math.sqrt(normA) * Math.sqrt(normB)));
|
||||
}
|
||||
|
||||
function isGgufFile(path: string): boolean {
|
||||
if (!existsSync(path)) return false;
|
||||
let fd: number | null = null;
|
||||
try {
|
||||
fd = openSync(path, "r");
|
||||
const header = Buffer.alloc(4);
|
||||
readSync(fd, header, 0, 4, 0);
|
||||
return header.toString("utf-8") === "GGUF";
|
||||
} catch {
|
||||
return false;
|
||||
} finally {
|
||||
if (fd !== null) closeSync(fd);
|
||||
}
|
||||
type CachedModelInspection = {
|
||||
path: string | null;
|
||||
invalid: string[];
|
||||
};
|
||||
|
||||
function formatModelDiagnosticPath(path: string): string {
|
||||
return sanitizeDiagnosticMessage(path);
|
||||
}
|
||||
|
||||
function findCachedModelPath(model: string): string | null {
|
||||
function findCachedModelInspection(model: string): CachedModelInspection {
|
||||
const invalid: string[] = [];
|
||||
if (model.startsWith("hf:")) {
|
||||
const filename = model.split("/").pop();
|
||||
if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR)) return null;
|
||||
if (!filename || !existsSync(DEFAULT_MODEL_CACHE_DIR)) return { path: null, invalid };
|
||||
const entries = readdirSync(DEFAULT_MODEL_CACHE_DIR, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
if (!entry.isFile() || !entry.name.includes(filename)) continue;
|
||||
const candidate = pathJoin(DEFAULT_MODEL_CACHE_DIR, entry.name);
|
||||
if (isGgufFile(candidate)) return candidate;
|
||||
const inspection = inspectGgufFile(candidate);
|
||||
if (inspection.valid) return { path: candidate, invalid };
|
||||
invalid.push(`${formatModelDiagnosticPath(candidate)}: ${inspection.details}`);
|
||||
}
|
||||
return null;
|
||||
return { path: null, invalid };
|
||||
}
|
||||
|
||||
return existsSync(model) && isGgufFile(model) ? model : null;
|
||||
const inspection = inspectGgufFile(model);
|
||||
if (inspection.valid) return { path: model, invalid };
|
||||
if (inspection.exists) invalid.push(`${formatModelDiagnosticPath(model)}: ${inspection.details}`);
|
||||
return { path: null, invalid };
|
||||
}
|
||||
|
||||
type EnvOverride = {
|
||||
@ -3356,8 +3400,7 @@ function envValueForDisplay(value: string): string {
|
||||
return sanitized.length > 96 ? `${sanitized.slice(0, 93)}...` : sanitized;
|
||||
}
|
||||
|
||||
function collectEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }): EnvOverride[] {
|
||||
const configModels = loadConfig().models ?? {};
|
||||
function collectEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): EnvOverride[] {
|
||||
const overrides: EnvOverride[] = [];
|
||||
const add = (name: string, consequence: string) => {
|
||||
const raw = process.env[name]?.trim();
|
||||
@ -3401,8 +3444,33 @@ function collectEnvironmentOverrides(activeModels: { embed: string; generate: st
|
||||
return overrides;
|
||||
}
|
||||
|
||||
function checkEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }): void {
|
||||
const overrides = collectEnvironmentOverrides(activeModels);
|
||||
type DoctorConfigCheck = {
|
||||
config: CollectionConfig | null;
|
||||
valid: boolean;
|
||||
};
|
||||
|
||||
function checkDoctorIndexConfig(nextSteps: string[]): DoctorConfigCheck {
|
||||
try {
|
||||
const config = loadConfig();
|
||||
const collectionCount = Object.keys(config.collections ?? {}).length;
|
||||
if (collectionCount === 0) {
|
||||
doctorCheck("index config", false, "no collections configured. Next: `qmd collection add .`");
|
||||
nextSteps.push("Run `qmd collection add . --name <name>` from the folder you want to index, or edit .qmd/index.yml manually.");
|
||||
} else {
|
||||
doctorCheck("index config", true, `${formatCount(collectionCount)} ${collectionCount === 1 ? "collection" : "collections"} configured`);
|
||||
}
|
||||
return { config, valid: true };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? sanitizeDiagnosticMessage(error.message) : sanitizeDiagnosticMessage(String(error));
|
||||
const configPath = getConfigPath();
|
||||
doctorCheck("index config", false, `invalid index.yml at ${configPath}: ${message}. Next: fix the YAML and rerun \`qmd doctor\``);
|
||||
nextSteps.push(`Fix invalid YAML in ${configPath}, then rerun \`qmd doctor\`.`);
|
||||
return { config: null, valid: false };
|
||||
}
|
||||
}
|
||||
|
||||
function checkEnvironmentOverrides(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): void {
|
||||
const overrides = collectEnvironmentOverrides(activeModels, configModels);
|
||||
if (overrides.length === 0) {
|
||||
doctorCheck("environment overrides", true, "none");
|
||||
return;
|
||||
@ -3414,8 +3482,7 @@ function checkEnvironmentOverrides(activeModels: { embed: string; generate: stri
|
||||
}
|
||||
}
|
||||
|
||||
function checkModelDefaults(activeModels: { embed: string; generate: string; rerank: string }, _nextSteps: string[]): void {
|
||||
const configModels = loadConfig().models ?? {};
|
||||
function checkModelDefaults(activeModels: { embed: string; generate: string; rerank: string }, configModels: ModelsConfig = {}): void {
|
||||
const checks = [
|
||||
{ role: "embedding", key: "embed", active: activeModels.embed, configured: configModels.embed, defaultModel: DEFAULT_EMBED_MODEL, envName: "QMD_EMBED_MODEL", envValue: process.env.QMD_EMBED_MODEL },
|
||||
{ role: "generation", key: "generate", active: activeModels.generate, configured: configModels.generate, defaultModel: DEFAULT_QUERY_MODEL, envName: "QMD_GENERATE_MODEL", envValue: process.env.QMD_GENERATE_MODEL },
|
||||
@ -3455,20 +3522,33 @@ function checkModelCache(activeModels: { embed: string; generate: string; rerank
|
||||
|
||||
const missing: string[] = [];
|
||||
const cached: string[] = [];
|
||||
const invalid: string[] = [];
|
||||
for (const [model, roles] of unique) {
|
||||
const label = `${roles.join("+")}: ${model}`;
|
||||
const path = findCachedModelPath(model);
|
||||
if (path) {
|
||||
const inspection = findCachedModelInspection(model);
|
||||
invalid.push(...inspection.invalid.map(detail => `${label} (${detail})`));
|
||||
if (inspection.path) {
|
||||
cached.push(label);
|
||||
} else {
|
||||
missing.push(label);
|
||||
}
|
||||
}
|
||||
|
||||
if (missing.length === 0) {
|
||||
doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded`);
|
||||
if (missing.length === 0 && invalid.length === 0) {
|
||||
doctorCheck("model cache", true, `${cached.length} active ${cached.length === 1 ? "model is" : "models are"} downloaded and valid GGUF`);
|
||||
return;
|
||||
}
|
||||
|
||||
const parts: string[] = [];
|
||||
if (invalid.length > 0) parts.push(`invalid ${invalid.length}: ${invalid.join("; ")}`);
|
||||
if (missing.length > 0) parts.push(`missing ${missing.length}/${unique.size}: ${missing.join("; ")}`);
|
||||
const next = invalid.length > 0
|
||||
? "Next: run `qmd pull --refresh` (or remove the bad cached file)"
|
||||
: "Next: run `qmd pull`";
|
||||
doctorCheck("model cache", false, `${parts.join("; ")}. ${next}`);
|
||||
if (invalid.length > 0) {
|
||||
nextSteps.push("Run `qmd pull --refresh` to replace invalid cached model files, or delete the listed file and rerun `qmd pull`.");
|
||||
} else {
|
||||
doctorCheck("model cache", false, `missing ${missing.length}/${unique.size}: ${missing.join("; ")}. Next: run \`qmd pull\``);
|
||||
nextSteps.push("Run `qmd pull` to download missing embedding/generation/reranking models before `qmd embed` or `qmd query`.");
|
||||
}
|
||||
}
|
||||
@ -3624,8 +3704,10 @@ async function showDoctor(): Promise<void> {
|
||||
doctorCheck("sqlite-vec", false, error instanceof Error ? error.message : String(error));
|
||||
}
|
||||
|
||||
checkEnvironmentOverrides(activeModels);
|
||||
checkModelDefaults(activeModels, nextSteps);
|
||||
const configCheck = checkDoctorIndexConfig(nextSteps);
|
||||
const configModels = configCheck.config?.models ?? {};
|
||||
checkEnvironmentOverrides(activeModels, configModels);
|
||||
checkModelDefaults(activeModels, configModels);
|
||||
checkModelCache(activeModels, nextSteps);
|
||||
|
||||
await runDoctorDeviceChecks(nextSteps);
|
||||
@ -4015,6 +4097,15 @@ if (isMain) {
|
||||
break;
|
||||
}
|
||||
|
||||
case "init":
|
||||
try {
|
||||
initLocalIndex();
|
||||
} catch (error) {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
process.exit(1);
|
||||
}
|
||||
break;
|
||||
|
||||
case "status":
|
||||
await showStatus();
|
||||
break;
|
||||
|
||||
@ -187,7 +187,8 @@ export function loadConfig(): CollectionConfig {
|
||||
|
||||
try {
|
||||
const content = readFileSync(configPath, "utf-8");
|
||||
const config = YAML.parse(content) as CollectionConfig;
|
||||
const parsed = YAML.parse(content) as CollectionConfig | null | undefined;
|
||||
const config = parsed ?? { collections: {} };
|
||||
|
||||
// Ensure collections object exists
|
||||
if (!config.collections) {
|
||||
|
||||
193
src/llm.ts
193
src/llm.ts
@ -32,6 +32,7 @@ export function setNodeLlamaCppModuleForTest(module: NodeLlamaCppModule | null):
|
||||
nodeLlamaCppImport = module ? Promise.resolve(module) : null;
|
||||
failedGpuInitModes.clear();
|
||||
noGpuAccelerationWarningShown = false;
|
||||
cpuForcedPrebuiltFallbackWarningShown = false;
|
||||
}
|
||||
|
||||
type StdoutWrite = typeof process.stdout.write;
|
||||
@ -324,37 +325,106 @@ async function getRemoteEtag(ref: HfRef): Promise<string | null> {
|
||||
|
||||
const GGUF_MAGIC = Buffer.from("GGUF");
|
||||
|
||||
export type GgufFileInspection = {
|
||||
exists: boolean;
|
||||
valid: boolean;
|
||||
kind: "missing" | "gguf" | "html" | "invalid";
|
||||
sizeBytes?: number;
|
||||
magic?: string;
|
||||
details: string;
|
||||
};
|
||||
|
||||
function formatModelFileSize(sizeBytes: number): string {
|
||||
return `${(sizeBytes / 1024).toFixed(0)} KB`;
|
||||
}
|
||||
|
||||
function printableMagic(header: Buffer): string {
|
||||
const text = header.toString("utf-8");
|
||||
return /^[\x20-\x7e]{1,4}$/.test(text) ? text : `0x${header.toString("hex")}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Inspect a potential GGUF model file without mutating it.
|
||||
* Used by doctor for early diagnostics and by runtime validation before load.
|
||||
*/
|
||||
export function inspectGgufFile(filePath: string): GgufFileInspection {
|
||||
if (!existsSync(filePath)) {
|
||||
return { exists: false, valid: false, kind: "missing", details: "file does not exist" };
|
||||
}
|
||||
|
||||
let sizeBytes = 0;
|
||||
try {
|
||||
sizeBytes = statSync(filePath).size;
|
||||
const fd = openSync(filePath, "r");
|
||||
const sniff = Buffer.alloc(512);
|
||||
try {
|
||||
readSync(fd, sniff, 0, 512, 0);
|
||||
} finally {
|
||||
closeSync(fd);
|
||||
}
|
||||
|
||||
const header = sniff.subarray(0, 4);
|
||||
if (header.equals(GGUF_MAGIC)) {
|
||||
return {
|
||||
exists: true,
|
||||
valid: true,
|
||||
kind: "gguf",
|
||||
sizeBytes,
|
||||
magic: "GGUF",
|
||||
details: `valid GGUF (${formatModelFileSize(sizeBytes)})`,
|
||||
};
|
||||
}
|
||||
|
||||
const magic = printableMagic(header);
|
||||
const text = sniff.toString("utf-8").toLowerCase();
|
||||
const isHtml = text.includes("<!doctype") || text.includes("<html");
|
||||
if (isHtml) {
|
||||
return {
|
||||
exists: true,
|
||||
valid: false,
|
||||
kind: "html",
|
||||
sizeBytes,
|
||||
magic,
|
||||
details: `HTML page, not a GGUF model (${formatModelFileSize(sizeBytes)}); likely proxy/firewall/captive portal response`,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
exists: true,
|
||||
valid: false,
|
||||
kind: "invalid",
|
||||
sizeBytes,
|
||||
magic,
|
||||
details: `not valid GGUF (expected magic "GGUF", got "${magic}", ${formatModelFileSize(sizeBytes)})`,
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
exists: true,
|
||||
valid: false,
|
||||
kind: "invalid",
|
||||
sizeBytes,
|
||||
details: `cannot read model file: ${error instanceof Error ? error.message : String(error)}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate that a file is actually a GGUF model, not an HTML error page
|
||||
* from a proxy, firewall, or failed download.
|
||||
* Throws a descriptive error if the file is not valid GGUF.
|
||||
*/
|
||||
function validateGgufFile(filePath: string, modelUri: string): void {
|
||||
if (!existsSync(filePath)) return; // let downstream handle missing files
|
||||
|
||||
// Read header + sniff bytes in one go, then close immediately
|
||||
const fd = openSync(filePath, "r");
|
||||
const sniff = Buffer.alloc(512);
|
||||
try {
|
||||
readSync(fd, sniff, 0, 512, 0);
|
||||
} finally {
|
||||
closeSync(fd);
|
||||
}
|
||||
|
||||
const header = sniff.subarray(0, 4);
|
||||
if (header.equals(GGUF_MAGIC)) return; // valid GGUF
|
||||
|
||||
const text = sniff.toString("utf-8").toLowerCase();
|
||||
const isHtml = text.includes("<!doctype") || text.includes("<html");
|
||||
const got = header.toString("utf-8");
|
||||
const sizeKB = (statSync(filePath).size / 1024).toFixed(0);
|
||||
const inspection = inspectGgufFile(filePath);
|
||||
if (!inspection.exists || inspection.valid) return; // let downstream handle missing files
|
||||
|
||||
// Remove the bad file so the next attempt re-downloads
|
||||
unlinkSync(filePath);
|
||||
try {
|
||||
unlinkSync(filePath);
|
||||
} catch { /* best effort */ }
|
||||
|
||||
if (isHtml) {
|
||||
if (inspection.kind === "html") {
|
||||
throw new Error(
|
||||
`Downloaded model file is an HTML page, not a GGUF model (${sizeKB} KB).\n` +
|
||||
`Downloaded model file is an HTML page, not a GGUF model (${formatModelFileSize(inspection.sizeBytes ?? 0)}).\n` +
|
||||
`Something is intercepting the download from huggingface.co (a proxy, firewall, or captive portal).\n\n` +
|
||||
`Model: ${modelUri}\n` +
|
||||
`Path: ${filePath}\n\n` +
|
||||
@ -367,7 +437,7 @@ function validateGgufFile(filePath: string, modelUri: string): void {
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Model file is not valid GGUF (expected magic "GGUF", got "${got}", file is ${sizeKB} KB).\n` +
|
||||
`Model file is not valid GGUF (expected magic "GGUF", got "${inspection.magic ?? "unknown"}", file is ${formatModelFileSize(inspection.sizeBytes ?? 0)}).\n` +
|
||||
`Model: ${modelUri}\n` +
|
||||
`Path: ${filePath}\n\n` +
|
||||
`The file has been removed. Run the command again to re-download.`
|
||||
@ -607,6 +677,11 @@ function resolveExpandContextSize(configValue?: number): number {
|
||||
|
||||
const failedGpuInitModes = new Set<LlamaGpuMode>();
|
||||
let noGpuAccelerationWarningShown = false;
|
||||
let cpuForcedPrebuiltFallbackWarningShown = false;
|
||||
|
||||
function isCpuModeRequested(): boolean {
|
||||
return resolveLlamaGpuMode() === false;
|
||||
}
|
||||
|
||||
export class LlamaCpp implements LLM {
|
||||
private readonly _ciMode = !!process.env.CI;
|
||||
@ -765,22 +840,44 @@ export class LlamaCpp implements LLM {
|
||||
const gpuMode = resolveLlamaGpuMode();
|
||||
|
||||
const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
|
||||
const loadLlama = async (gpu: LlamaGpuMode) =>
|
||||
const loadLlama = async (gpu: LlamaGpuMode, sourceBuildAllowed = allowBuild) =>
|
||||
await withNativeStdoutRedirectedToStderr(() => getLlama({
|
||||
build: allowBuild ? "autoAttempt" : "never",
|
||||
// Prefer packaged prebuilt bindings before compiling llama.cpp locally.
|
||||
// "autoAttempt" can try to compile a missing requested backend before
|
||||
// falling back to another prebuilt backend; "auto" uses prebuilt/local
|
||||
// binaries first and only builds when none are usable.
|
||||
build: sourceBuildAllowed ? "auto" : "never",
|
||||
logLevel: LlamaLogLevel.error,
|
||||
gpu,
|
||||
skipDownload: !allowBuild,
|
||||
progressLogs: false,
|
||||
skipDownload: !sourceBuildAllowed,
|
||||
}));
|
||||
const loadCpuCompatibleLlama = async () => {
|
||||
try {
|
||||
return await loadLlama(false, false);
|
||||
} catch (err) {
|
||||
// Some platforms, notably Apple Silicon, ship a Metal prebuilt but no
|
||||
// CPU-only prebuilt. Do a fast no-build lookup for an actual CPU
|
||||
// binding first; if it does not exist, use the packaged auto/Metal
|
||||
// binding and disable model offloading via gpuLayers: 0.
|
||||
if (!cpuForcedPrebuiltFallbackWarningShown) {
|
||||
cpuForcedPrebuiltFallbackWarningShown = true;
|
||||
process.stderr.write(
|
||||
`QMD Warning: CPU-only llama.cpp prebuilt not available (${err instanceof Error ? err.message : String(err)}); using packaged backend with GPU offloading disabled.\n`
|
||||
);
|
||||
}
|
||||
return await loadLlama("auto", false);
|
||||
}
|
||||
};
|
||||
|
||||
let llama: Llama;
|
||||
if (gpuMode === false || failedGpuInitModes.has(gpuMode)) {
|
||||
if (gpuMode !== false && failedGpuInitModes.has(gpuMode)) {
|
||||
process.stderr.write(
|
||||
`QMD Warning: skipping previously failed GPU init${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`}, using CPU.\n`
|
||||
);
|
||||
}
|
||||
llama = await loadLlama(false);
|
||||
if (gpuMode === false) {
|
||||
llama = await loadCpuCompatibleLlama();
|
||||
} else if (failedGpuInitModes.has(gpuMode)) {
|
||||
process.stderr.write(
|
||||
`QMD Warning: skipping previously failed GPU init${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`}, using CPU.\n`
|
||||
);
|
||||
llama = await loadCpuCompatibleLlama();
|
||||
} else {
|
||||
try {
|
||||
llama = await loadLlama(gpuMode);
|
||||
@ -792,7 +889,7 @@ export class LlamaCpp implements LLM {
|
||||
process.stderr.write(
|
||||
`QMD Warning: GPU init failed${gpuMode === "auto" ? "" : ` for QMD_LLAMA_GPU=${gpuMode}`} (${err instanceof Error ? err.message : String(err)}), falling back to CPU.\n`
|
||||
);
|
||||
llama = await loadLlama(false);
|
||||
llama = await loadCpuCompatibleLlama();
|
||||
}
|
||||
}
|
||||
|
||||
@ -807,6 +904,17 @@ export class LlamaCpp implements LLM {
|
||||
return this.llama;
|
||||
}
|
||||
|
||||
private isCpuOffloadForced(): boolean {
|
||||
return isCpuModeRequested();
|
||||
}
|
||||
|
||||
private modelLoadOptions(modelPath: string): { modelPath: string; gpuLayers?: number } {
|
||||
return {
|
||||
modelPath,
|
||||
...(this.isCpuOffloadForced() ? { gpuLayers: 0 } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a model URI to a local path, downloading if needed.
|
||||
* Validates the downloaded file is actually a GGUF model (not an HTML error page
|
||||
@ -835,7 +943,7 @@ export class LlamaCpp implements LLM {
|
||||
this.embedModelLoadPromise = (async () => {
|
||||
const llama = await this.ensureLlama();
|
||||
const modelPath = await this.resolveModel(this.embedModelUri);
|
||||
const model = await llama.loadModel({ modelPath });
|
||||
const model = await llama.loadModel(this.modelLoadOptions(modelPath));
|
||||
this.embedModel = model;
|
||||
// Model loading counts as activity - ping to keep alive
|
||||
this.touchActivity();
|
||||
@ -861,7 +969,7 @@ export class LlamaCpp implements LLM {
|
||||
private async computeParallelism(perContextMB: number): Promise<number> {
|
||||
const llama = await this.ensureLlama();
|
||||
|
||||
if (llama.gpu) {
|
||||
if (!this.isCpuOffloadForced() && llama.gpu) {
|
||||
try {
|
||||
const vram = await llama.getVramState();
|
||||
const freeMB = vram.free / (1024 * 1024);
|
||||
@ -886,7 +994,7 @@ export class LlamaCpp implements LLM {
|
||||
*/
|
||||
private async threadsPerContext(parallelism: number): Promise<number> {
|
||||
const llama = await this.ensureLlama();
|
||||
if (llama.gpu) return 0; // GPU: let the library decide
|
||||
if (!this.isCpuOffloadForced() && llama.gpu) return 0; // GPU: let the library decide
|
||||
const cores = llama.cpuMathCores || 4;
|
||||
return Math.max(1, Math.floor(cores / parallelism));
|
||||
}
|
||||
@ -954,7 +1062,7 @@ export class LlamaCpp implements LLM {
|
||||
this.generateModelLoadPromise = (async () => {
|
||||
const llama = await this.ensureLlama();
|
||||
const modelPath = await this.resolveModel(this.generateModelUri);
|
||||
const model = await llama.loadModel({ modelPath });
|
||||
const model = await llama.loadModel(this.modelLoadOptions(modelPath));
|
||||
this.generateModel = model;
|
||||
return model;
|
||||
})();
|
||||
@ -986,7 +1094,7 @@ export class LlamaCpp implements LLM {
|
||||
this.rerankModelLoadPromise = (async () => {
|
||||
const llama = await this.ensureLlama();
|
||||
const modelPath = await this.resolveModel(this.rerankModelUri);
|
||||
const model = await llama.loadModel({ modelPath });
|
||||
const model = await llama.loadModel(this.modelLoadOptions(modelPath));
|
||||
this.rerankModel = model;
|
||||
// Model loading counts as activity - ping to keep alive
|
||||
this.touchActivity();
|
||||
@ -1489,17 +1597,18 @@ export class LlamaCpp implements LLM {
|
||||
cpuCores: number;
|
||||
}> {
|
||||
const llama = await this.ensureLlama(options.allowBuild ?? true);
|
||||
const gpuDevices = await llama.getGpuDeviceNames();
|
||||
const cpuForced = this.isCpuOffloadForced();
|
||||
const gpuDevices = cpuForced ? [] : await llama.getGpuDeviceNames();
|
||||
let vram: { total: number; used: number; free: number } | undefined;
|
||||
if (llama.gpu) {
|
||||
if (!cpuForced && llama.gpu) {
|
||||
try {
|
||||
const state = await llama.getVramState();
|
||||
vram = { total: state.total, used: state.used, free: state.free };
|
||||
} catch { /* no vram info */ }
|
||||
}
|
||||
return {
|
||||
gpu: llama.gpu,
|
||||
gpuOffloading: llama.supportsGpuOffloading,
|
||||
gpu: cpuForced ? false : llama.gpu,
|
||||
gpuOffloading: !cpuForced && llama.supportsGpuOffloading,
|
||||
gpuDevices,
|
||||
vram,
|
||||
cpuCores: llama.cpuMathCores,
|
||||
|
||||
146
test/cli.test.ts
146
test/cli.test.ts
@ -16,6 +16,7 @@ import { setTimeout as sleep } from "timers/promises";
|
||||
import { buildEditorUri, termLink, resolveEmbedModelForCli } from "../src/cli/qmd.ts";
|
||||
import { openDatabase } from "../src/db.ts";
|
||||
import { DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI } from "../src/llm.ts";
|
||||
import { setConfigSource } from "../src/collections.ts";
|
||||
|
||||
// Test fixtures directory and database path
|
||||
let testDir: string;
|
||||
@ -311,13 +312,15 @@ describe("CLI Skills", () => {
|
||||
});
|
||||
|
||||
describe("CLI Embed", () => {
|
||||
test("prefers QMD_EMBED_MODEL for qmd embed", () => {
|
||||
test("prefers QMD_EMBED_MODEL for qmd embed when the index has no model pin", () => {
|
||||
const prev = process.env.QMD_EMBED_MODEL;
|
||||
process.env.QMD_EMBED_MODEL = "hf:env/embed-model.gguf";
|
||||
setConfigSource({ config: { collections: {} } });
|
||||
|
||||
try {
|
||||
expect(resolveEmbedModelForCli()).toBe("hf:env/embed-model.gguf");
|
||||
} finally {
|
||||
setConfigSource();
|
||||
if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
|
||||
else process.env.QMD_EMBED_MODEL = prev;
|
||||
}
|
||||
@ -326,10 +329,12 @@ describe("CLI Embed", () => {
|
||||
test("falls back to the default embed model when QMD_EMBED_MODEL is unset", () => {
|
||||
const prev = process.env.QMD_EMBED_MODEL;
|
||||
delete process.env.QMD_EMBED_MODEL;
|
||||
setConfigSource({ config: { collections: {} } });
|
||||
|
||||
try {
|
||||
expect(resolveEmbedModelForCli()).toBe(DEFAULT_EMBED_MODEL_URI);
|
||||
} finally {
|
||||
setConfigSource();
|
||||
if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
|
||||
else process.env.QMD_EMBED_MODEL = prev;
|
||||
}
|
||||
@ -429,6 +434,36 @@ describe("CLI Skill Commands", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("CLI Init Command", () => {
|
||||
test("creates a project-local .qmd index", async () => {
|
||||
const projectDir = join(testDir, "init-project");
|
||||
await mkdir(projectDir, { recursive: true });
|
||||
|
||||
const { stdout, exitCode } = await runQmd(["init"], { cwd: projectDir });
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout.trim()).toBe("ready to go with new local index");
|
||||
expect(existsSync(join(projectDir, ".qmd", "index.yml"))).toBe(true);
|
||||
expect(existsSync(join(projectDir, ".qmd", "index.sqlite"))).toBe(true);
|
||||
const configText = readFileSync(join(projectDir, ".qmd", "index.yml"), "utf-8");
|
||||
expect(configText).toContain("collections: {}");
|
||||
expect(configText).toContain("models:");
|
||||
});
|
||||
|
||||
test("refuses to initialize in HOME", async () => {
|
||||
const fakeHome = join(testDir, "init-home");
|
||||
await mkdir(fakeHome, { recursive: true });
|
||||
|
||||
const { stderr, exitCode } = await runQmd(["init"], {
|
||||
cwd: fakeHome,
|
||||
env: { HOME: fakeHome },
|
||||
});
|
||||
expect(exitCode).toBe(1);
|
||||
expect(stderr).toContain("Refusing to initialize a local index in $HOME");
|
||||
expect(stderr).toContain("global index is automatically created");
|
||||
expect(existsSync(join(fakeHome, ".qmd", "index.yml"))).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("CLI Add Command", () => {
|
||||
test("adds files from current directory", async () => {
|
||||
const { stdout, exitCode } = await runQmd(["collection", "add", "."]);
|
||||
@ -491,7 +526,28 @@ describe("CLI Status Command", () => {
|
||||
expect(configText).toContain(DEFAULT_EMBED_MODEL_URI);
|
||||
expect(configText).toContain(DEFAULT_GENERATE_MODEL_URI);
|
||||
expect(configText).toContain(DEFAULT_RERANK_MODEL_URI);
|
||||
});
|
||||
}, 20000);
|
||||
|
||||
test("qmd doctor warns when no collections are configured", async () => {
|
||||
const env = await createIsolatedTestEnv("doctor-no-collections");
|
||||
const { stdout, exitCode } = await runQmd(["doctor"], { dbPath: env.dbPath, configDir: env.configDir });
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toContain("index config");
|
||||
expect(stdout).toContain("no collections configured");
|
||||
expect(stdout).toContain("qmd collection add .");
|
||||
}, 20000);
|
||||
|
||||
test("qmd doctor reports invalid index.yml without crashing", async () => {
|
||||
const env = await createIsolatedTestEnv("doctor-invalid-config");
|
||||
await writeFile(join(env.configDir, "index.yml"), "collections:\n bad: [unterminated\n");
|
||||
|
||||
const { stdout, exitCode } = await runQmd(["doctor"], { dbPath: env.dbPath, configDir: env.configDir });
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toContain("index config");
|
||||
expect(stdout).toContain("invalid index.yml at");
|
||||
expect(stdout).toContain(join(env.configDir, "index.yml"));
|
||||
expect(stdout).toContain("fix the YAML");
|
||||
}, 20000);
|
||||
|
||||
test("qmd doctor warns when configured models differ from code defaults", async () => {
|
||||
const env = await createIsolatedTestEnv("doctor-custom-models");
|
||||
@ -504,7 +560,32 @@ describe("CLI Status Command", () => {
|
||||
expect(stdout).toContain("index hf:example/custom-embed/custom.gguf");
|
||||
expect(stdout).toContain("might be ok");
|
||||
expect(stdout).toContain("qmd pull");
|
||||
});
|
||||
}, 20000);
|
||||
|
||||
test("qmd doctor identifies cached non-GGUF model files", async () => {
|
||||
const env = await createIsolatedTestEnv("doctor-invalid-model-cache");
|
||||
const model = "hf:example/custom-model/custom.gguf";
|
||||
await writeFile(join(env.configDir, "index.yml"), `collections: {}\nmodels:\n embed: ${model}\n generate: ${model}\n rerank: ${model}\n`);
|
||||
const cacheRoot = join(env.configDir, "cache");
|
||||
const modelCacheDir = join(cacheRoot, "qmd", "models");
|
||||
await mkdir(modelCacheDir, { recursive: true });
|
||||
const badModelPath = join(modelCacheDir, "custom.gguf");
|
||||
await writeFile(badModelPath, "<!doctype html><html>blocked</html>");
|
||||
|
||||
const { stdout, exitCode } = await runQmd(["doctor"], {
|
||||
dbPath: env.dbPath,
|
||||
configDir: env.configDir,
|
||||
env: {
|
||||
XDG_CACHE_HOME: cacheRoot,
|
||||
QMD_DOCTOR_DEVICE_PROBE: "0",
|
||||
},
|
||||
});
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toContain("model cache");
|
||||
expect(stdout).toContain("invalid 1");
|
||||
expect(stdout).toContain("HTML page, not a GGUF model");
|
||||
expect(stdout).toContain("qmd pull --refresh");
|
||||
}, 20000);
|
||||
|
||||
test("qmd doctor says when models are overridden by env", async () => {
|
||||
const env = await createIsolatedTestEnv("doctor-env-models");
|
||||
@ -523,7 +604,60 @@ describe("CLI Status Command", () => {
|
||||
expect(stdout).toContain("environment overrides");
|
||||
expect(stdout).toContain(`QMD_EMBED_MODEL=${customEmbed}`);
|
||||
expect(stdout).toContain("sets the active embed model");
|
||||
});
|
||||
}, 20000);
|
||||
|
||||
test("qmd doctor shows CPU-forced device mode with QMD_FORCE_CPU=1", async () => {
|
||||
const env = await createIsolatedTestEnv("doctor-force-cpu");
|
||||
const { stdout, exitCode } = await runQmd(["doctor"], {
|
||||
dbPath: env.dbPath,
|
||||
configDir: env.configDir,
|
||||
env: {
|
||||
QMD_FORCE_CPU: "1",
|
||||
QMD_DOCTOR_DEVICE_PROBE: "0",
|
||||
},
|
||||
});
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toContain("QMD_FORCE_CPU=1");
|
||||
expect(stdout).toContain("forces llama.cpp to bypass GPU backends");
|
||||
expect(stdout).toContain("device mode: CPU forced (QMD_FORCE_CPU)");
|
||||
}, 20000);
|
||||
|
||||
test("qmd doctor lists known environment overrides and consequences", async () => {
|
||||
const env = await createIsolatedTestEnv("doctor-env-overrides");
|
||||
const overrides = {
|
||||
XDG_CACHE_HOME: join(env.configDir, "cache"),
|
||||
QMD_DOCTOR_DEVICE_PROBE: "0",
|
||||
QMD_STATUS_DEVICE_PROBE: "1",
|
||||
QMD_FORCE_CPU: "1",
|
||||
QMD_LLAMA_GPU: "metal",
|
||||
QMD_EMBED_PARALLELISM: "2",
|
||||
QMD_EXPAND_CONTEXT_SIZE: "4096",
|
||||
QMD_RERANK_CONTEXT_SIZE: "8192",
|
||||
QMD_EMBED_CONTEXT_SIZE: "1024",
|
||||
QMD_EDITOR_URI: "vscode://file/{file}:{line}:{col}",
|
||||
QMD_SKILLS_DIR: "/tmp/qmd-skills",
|
||||
QMD_DISABLE_DARWIN_QUERY_JSON_SAFE_EXIT: "1",
|
||||
NO_COLOR: "1",
|
||||
CI: "1",
|
||||
HF_ENDPOINT: "https://hf-mirror.com",
|
||||
WSL_DISTRO_NAME: "Ubuntu",
|
||||
WSL_INTEROP: "1",
|
||||
};
|
||||
|
||||
const { stdout, exitCode } = await runQmd(["doctor"], {
|
||||
dbPath: env.dbPath,
|
||||
configDir: env.configDir,
|
||||
env: overrides,
|
||||
});
|
||||
expect(exitCode).toBe(0);
|
||||
for (const name of Object.keys(overrides)) {
|
||||
expect(stdout).toContain(name);
|
||||
}
|
||||
expect(stdout).toContain("forces llama.cpp to bypass GPU backends");
|
||||
expect(stdout).toContain("moves the default index cache");
|
||||
expect(stdout).toContain("disables real LLM operations");
|
||||
expect(stdout).toContain("changes Hugging Face download endpoint");
|
||||
}, 20000);
|
||||
|
||||
test("qmd doctor flags mixed embedding fingerprints", async () => {
|
||||
const db = openDatabase(testDbPath);
|
||||
@ -538,7 +672,7 @@ describe("CLI Status Command", () => {
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toContain("embedding fingerprints");
|
||||
expect(stdout).toContain("stale1");
|
||||
});
|
||||
}, 20000);
|
||||
|
||||
test("shows index status", async () => {
|
||||
const { stdout, exitCode } = await runQmd(["status"]);
|
||||
@ -1620,7 +1754,7 @@ describe("status and collection list hide filesystem paths", () => {
|
||||
const lines = stdout.split('\n').filter(l => !l.includes('Index:'));
|
||||
const pathLines = lines.filter(l => l.includes('/Users/') || l.includes('/home/') || l.includes('/tmp/'));
|
||||
expect(pathLines.length).toBe(0);
|
||||
});
|
||||
}, 20000);
|
||||
|
||||
test("collection list does not show full filesystem paths", async () => {
|
||||
const { stdout, exitCode } = await runQmd(["collection", "list"], { dbPath: localDbPath, configDir: localConfigDir });
|
||||
|
||||
@ -6,9 +6,11 @@
|
||||
*/
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from "vitest";
|
||||
import { mkdtemp, rm, writeFile } from "fs/promises";
|
||||
import { tmpdir } from "os";
|
||||
import { join } from "path";
|
||||
import { qmdHomedir } from "../src/paths.js";
|
||||
import { getConfigPath, setConfigIndexName } from "../src/collections.js";
|
||||
import { getConfigPath, loadConfig, setConfigIndexName } from "../src/collections.js";
|
||||
|
||||
// Save/restore env vars around each test
|
||||
let savedEnv: Record<string, string | undefined>;
|
||||
@ -82,4 +84,15 @@ describe("getConfigDir via getConfigPath", () => {
|
||||
setConfigIndexName("myindex");
|
||||
expect(getConfigPath()).toBe(join("/xdg/config", "qmd", "myindex.yml"));
|
||||
});
|
||||
|
||||
test("loadConfig treats an empty YAML file as an empty config", async () => {
|
||||
const dir = await mkdtemp(join(tmpdir(), "qmd-empty-config-"));
|
||||
try {
|
||||
process.env.QMD_CONFIG_DIR = dir;
|
||||
await writeFile(join(dir, "index.yml"), "");
|
||||
expect(loadConfig()).toEqual({ collections: {} });
|
||||
} finally {
|
||||
await rm(dir, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@ -273,6 +273,63 @@ describe("native llama stdout containment", () => {
|
||||
else process.env.QMD_FORCE_CPU = prevForceCpu;
|
||||
}
|
||||
});
|
||||
|
||||
test("embeds hello world with QMD_FORCE_CPU=1 without throwing", async () => {
|
||||
const prevGpu = process.env.QMD_LLAMA_GPU;
|
||||
const prevForceCpu = process.env.QMD_FORCE_CPU;
|
||||
process.env.QMD_FORCE_CPU = "1";
|
||||
process.env.QMD_LLAMA_GPU = "metal";
|
||||
|
||||
const getEmbeddingFor = vi.fn(async (text: string) => ({
|
||||
vector: new Float32Array([0.1, 0.2, 0.3]),
|
||||
text,
|
||||
}));
|
||||
const createEmbeddingContext = vi.fn(async () => ({
|
||||
getEmbeddingFor,
|
||||
dispose: vi.fn(async () => {}),
|
||||
}));
|
||||
const loadModel = vi.fn(async () => ({
|
||||
trainContextSize: 2048,
|
||||
tokenize: (text: string) => Array.from(text),
|
||||
detokenize: (tokens: string[]) => tokens.join(""),
|
||||
createEmbeddingContext,
|
||||
dispose: vi.fn(async () => {}),
|
||||
}));
|
||||
const getLlama = vi.fn(async (options: Record<string, unknown>) => ({
|
||||
gpu: false,
|
||||
cpuMathCores: 4,
|
||||
loadModel,
|
||||
dispose: vi.fn(async () => {}),
|
||||
}) as any);
|
||||
|
||||
setNodeLlamaCppModuleForTest({
|
||||
LlamaLogLevel: { error: "error" },
|
||||
resolveModelFile: vi.fn(async () => "/tmp/nonexistent-model.gguf"),
|
||||
LlamaChatSession: vi.fn() as any,
|
||||
getLlama,
|
||||
});
|
||||
|
||||
const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
|
||||
const llm = new LlamaCpp();
|
||||
try {
|
||||
const result = await llm.embed("hello world");
|
||||
expect(result).toEqual({
|
||||
embedding: [0.10000000149011612, 0.20000000298023224, 0.30000001192092896],
|
||||
model: llm.embedModelName,
|
||||
});
|
||||
expect(getLlama).toHaveBeenCalledWith(expect.objectContaining({ gpu: false, build: "never" }));
|
||||
expect(loadModel).toHaveBeenCalledWith(expect.objectContaining({ gpuLayers: 0 }));
|
||||
expect(getEmbeddingFor).toHaveBeenCalledWith("hello world");
|
||||
} finally {
|
||||
await llm.dispose();
|
||||
stderrSpy.mockRestore();
|
||||
setNodeLlamaCppModuleForTest(null);
|
||||
if (prevGpu === undefined) delete process.env.QMD_LLAMA_GPU;
|
||||
else process.env.QMD_LLAMA_GPU = prevGpu;
|
||||
if (prevForceCpu === undefined) delete process.env.QMD_FORCE_CPU;
|
||||
else process.env.QMD_FORCE_CPU = prevForceCpu;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("LLM context parallelism safety", () => {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user