fix embed model and qmd home resolution

This commit is contained in:
Tobi Lütke 2026-05-09 18:08:56 +00:00
parent b775592230
commit ddc969a5f4
No known key found for this signature in database
8 changed files with 184 additions and 18 deletions

View File

@ -14,6 +14,8 @@
- CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index <path>` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634
- Windows CUDA: serialize llama.cpp embedding/reranking contexts by default to avoid intermittent `ggml-cuda.cu:98` crashes in `qmd query`; set `QMD_EMBED_PARALLELISM` to opt back into parallel contexts if your driver is stable. #519
- MCP: make `qmd mcp --index <name>` use the selected index for both foreground and daemon HTTP servers instead of falling back to the default store. #343
- Embedding: respect `QMD_EMBED_MODEL` consistently for vector indexing and vector-backed search, with default-model fallback when unset.
- Config: use one home-directory resolver for YAML config and the default SQLite cache path, avoiding Windows CLI/MCP split-brain when `HOME` is unset.
- GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
- Fix: preserve original filename case in `handelize()`. The previous
`.toLowerCase()` call made indexed paths unreachable on case-sensitive

View File

@ -1679,8 +1679,12 @@ function parseChunkStrategy(value: unknown): ChunkStrategy | undefined {
throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`);
}
export function resolveEmbedModelForCli(): string {
return process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL_URI;
}
async function vectorIndex(
model: string = DEFAULT_EMBED_MODEL_URI,
model: string = resolveEmbedModelForCli(),
force: boolean = false,
batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy; collection?: string },
): Promise<void> {
@ -3125,7 +3129,7 @@ if (isMain) {
// embed operates on a single collection; only the first value is used.
const embedValidatedCollections = resolveCollectionFilter(cli.opts.collection, false);
const embedCollection = embedValidatedCollections[0];
await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
await vectorIndex(resolveEmbedModelForCli(), !!cli.values.force, {
maxDocsPerBatch,
maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
chunkStrategy: embedChunkStrategy,

View File

@ -7,7 +7,7 @@
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
import { join, dirname, resolve } from "path";
import { homedir } from "os";
import { qmdHomedir } from "./paths.js";
import YAML from "yaml";
// ============================================================================
@ -118,7 +118,7 @@ function getConfigDir(): string {
if (process.env.XDG_CONFIG_HOME) {
return join(process.env.XDG_CONFIG_HOME, "qmd");
}
return join(homedir(), ".config", "qmd");
return join(qmdHomedir(), ".config", "qmd");
}
function getConfigFilePath(): string {

5
src/paths.ts Normal file
View File

@ -0,0 +1,5 @@
import { homedir as osHomedir } from "node:os";
export function qmdHomedir(): string {
return process.env.HOME || process.env.USERPROFILE || osHomedir() || "/tmp";
}

View File

@ -18,6 +18,7 @@ import { createHash } from "crypto";
import { readFileSync, realpathSync, statSync, mkdirSync } from "node:fs";
// Note: node:path resolve is not imported — we export our own cross-platform resolve()
import fastGlob from "fast-glob";
import { qmdHomedir } from "./paths.js";
import {
LlamaCpp,
getDefaultLlamaCpp,
@ -38,7 +39,6 @@ import type {
// Configuration
// =============================================================================
const HOME = process.env.HOME || process.env.USERPROFILE || "/tmp";
export const DEFAULT_EMBED_MODEL = "embeddinggemma";
export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
@ -334,7 +334,7 @@ export type ExpandedQuery = {
// =============================================================================
export function homedir(): string {
return HOME;
return qmdHomedir();
}
/**
@ -1492,7 +1492,8 @@ export async function generateEmbeddings(
options?: EmbedOptions
): Promise<EmbedResult> {
const db = store.db;
const model = options?.model ?? DEFAULT_EMBED_MODEL;
const llm = getLlm(store);
const model = options?.model ?? llm.embedModelName ?? DEFAULT_EMBED_MODEL;
const now = new Date().toISOString();
const { maxDocsPerBatch, maxBatchBytes } = resolveEmbedOptions(options);
const encoder = new TextEncoder();
@ -1511,8 +1512,7 @@ export async function generateEmbeddings(
const startTime = Date.now();
// Use store's LlamaCpp or global singleton, wrapped in a session
const llm = getLlm(store);
const embedModelUri = llm.embedModelName;
const embedModelUri = model;
// Create a session manager for this llm instance
const result = await withLLMSessionForLlm(llm, async (session) => {
@ -4276,7 +4276,8 @@ export async function hybridQuery(
// Batch embed all vector queries in a single call
const llm = getLlm(store);
const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, llm.embedModelName));
const embedModel = llm.embedModelName;
const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, embedModel));
hooks?.onEmbedStart?.(textsToEmbed.length);
const embedStart = Date.now();
const embeddings = await llm.embedBatch(textsToEmbed);
@ -4288,7 +4289,7 @@ export async function hybridQuery(
if (!embedding) continue;
const vecResults = await store.searchVec(
vecQueries[i]!.text, DEFAULT_EMBED_MODEL, 20, collection,
vecQueries[i]!.text, embedModel, 20, collection,
undefined, embedding
);
if (vecResults.length > 0) {
@ -4519,10 +4520,11 @@ export async function vectorSearchQuery(
options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
// Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
const embedModel = getLlm(store).embedModelName;
const queryTexts = [query, ...vecExpanded.map(q => q.query)];
const allResults = new Map<string, VectorSearchResult>();
for (const q of queryTexts) {
const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit, collection);
const vecResults = await store.searchVec(q, embedModel, limit, collection);
for (const r of vecResults) {
const existing = allResults.get(r.filepath);
if (!existing || r.score > existing.score) {
@ -4660,7 +4662,8 @@ export async function structuredSearch(
);
if (vecSearches.length > 0) {
const llm = getLlm(store);
const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, llm.embedModelName));
const embedModel = llm.embedModelName;
const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, embedModel));
hooks?.onEmbedStart?.(textsToEmbed.length);
const embedStart = Date.now();
const embeddings = await llm.embedBatch(textsToEmbed);
@ -4672,7 +4675,7 @@ export async function structuredSearch(
for (const coll of collectionList) {
const vecResults = await store.searchVec(
vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, coll,
vecSearches[i]!.query, embedModel, 20, coll,
undefined, embedding
);
if (vecResults.length > 0) {

View File

@ -13,7 +13,8 @@ import { join, dirname } from "path";
import { fileURLToPath } from "url";
import { spawn } from "child_process";
import { setTimeout as sleep } from "timers/promises";
import { buildEditorUri, termLink } from "../src/cli/qmd.ts";
import { buildEditorUri, termLink, resolveEmbedModelForCli } from "../src/cli/qmd.ts";
import { DEFAULT_EMBED_MODEL_URI } from "../src/llm.ts";
// Test fixtures directory and database path
let testDir: string;
@ -243,6 +244,30 @@ describe("CLI Help", () => {
});
describe("CLI Embed", () => {
test("prefers QMD_EMBED_MODEL for qmd embed", () => {
const prev = process.env.QMD_EMBED_MODEL;
process.env.QMD_EMBED_MODEL = "hf:env/embed-model.gguf";
try {
expect(resolveEmbedModelForCli()).toBe("hf:env/embed-model.gguf");
} finally {
if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
else process.env.QMD_EMBED_MODEL = prev;
}
});
test("falls back to the default embed model when QMD_EMBED_MODEL is unset", () => {
const prev = process.env.QMD_EMBED_MODEL;
delete process.env.QMD_EMBED_MODEL;
try {
expect(resolveEmbedModelForCli()).toBe(DEFAULT_EMBED_MODEL_URI);
} finally {
if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
else process.env.QMD_EMBED_MODEL = prev;
}
});
test("rejects invalid --max-docs-per-batch", async () => {
const { stderr, exitCode } = await runQmd(["embed", "--max-docs-per-batch", "0"]);
expect(exitCode).toBe(1);

View File

@ -7,7 +7,7 @@
import { describe, test, expect, beforeEach, afterEach } from "vitest";
import { join } from "path";
import { homedir } from "os";
import { qmdHomedir } from "../src/paths.js";
import { getConfigPath, setConfigIndexName } from "../src/collections.js";
// Save/restore env vars around each test
@ -15,6 +15,8 @@ let savedEnv: Record<string, string | undefined>;
beforeEach(() => {
savedEnv = {
HOME: process.env.HOME,
USERPROFILE: process.env.USERPROFILE,
QMD_CONFIG_DIR: process.env.QMD_CONFIG_DIR,
XDG_CONFIG_HOME: process.env.XDG_CONFIG_HOME,
};
@ -38,7 +40,16 @@ describe("getConfigDir via getConfigPath", () => {
test("defaults to ~/.config/qmd when no env vars are set", () => {
delete process.env.QMD_CONFIG_DIR;
delete process.env.XDG_CONFIG_HOME;
expect(getConfigPath()).toBe(join(homedir(), ".config", "qmd", "index.yml"));
expect(getConfigPath()).toBe(join(qmdHomedir(), ".config", "qmd", "index.yml"));
});
test("uses the same USERPROFILE fallback as default DB path when HOME is unset", () => {
delete process.env.HOME;
delete process.env.QMD_CONFIG_DIR;
delete process.env.XDG_CONFIG_HOME;
process.env.USERPROFILE = "/Users/windows-user";
expect(getConfigPath()).toBe(join("/Users/windows-user", ".config", "qmd", "index.yml"));
});
test("QMD_CONFIG_DIR takes highest priority", () => {

View File

@ -53,6 +53,10 @@ import {
insertDocument,
generateEmbeddings,
getHybridRrfWeights,
_resetProductionModeForTesting,
hybridQuery,
structuredSearch,
vectorSearchQuery,
type Store,
type DocumentResult,
type SearchResult,
@ -282,7 +286,9 @@ afterAll(async () => {
describe("Store Creation", () => {
test("createStore throws without explicit path in test mode", () => {
// In test mode, createStore without path should throw to prevent accidental writes
// In test mode, createStore without path should throw to prevent accidental writes.
// Other tests may enable production mode in the same Bun process, so reset first.
_resetProductionModeForTesting();
const originalIndexPath = process.env.INDEX_PATH;
delete process.env.INDEX_PATH;
@ -3021,6 +3027,116 @@ describe("Embedding batching", () => {
}
});
test("generateEmbeddings uses the active llm embed model when no explicit model is passed", async () => {
const store = await createTestStore();
const db = store.db;
const fakeLlm = createFakeEmbedLlm();
const model = "hf:env/embed-model.gguf";
setDefaultLlamaCpp(createFakeTokenizer() as any);
store.llm = { ...fakeLlm, embedModelName: model } as any;
try {
await insertTestDocument(db, "docs", { name: "one", body: "# One\n\nAlpha" });
const result = await generateEmbeddings(store);
expect(result.chunksEmbedded).toBe(1);
expect(fakeLlm.embedCalls[0]?.options?.model).toBe(model);
expect(fakeLlm.embedBatchModelCalls).toEqual([{ model }]);
expect(db.prepare(`SELECT DISTINCT model FROM content_vectors`).all()).toEqual([{ model }]);
} finally {
setDefaultLlamaCpp(null);
await cleanupTestDb(store);
}
});
test("vectorSearchQuery uses the active llm embed model for vector lookups", async () => {
const store = await createTestStore();
const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
store.llm = { embedModelName: model } as any;
store.searchVec = searchVecSpy as any;
store.expandQuery = vi.fn(async () => []) as any;
try {
await vectorSearchQuery(store, "custom query", { limit: 7, minScore: 0 });
expect(searchVecSpy).toHaveBeenCalledTimes(1);
expect(searchVecSpy.mock.calls[0]?.[0]).toBe("custom query");
expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
expect(searchVecSpy.mock.calls[0]?.[2]).toBe(7);
} finally {
await cleanupTestDb(store);
}
});
test("hybridQuery uses the active llm embed model for precomputed vector lookups", async () => {
const store = await createTestStore();
const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
const embedBatchSpy = vi.fn(async (texts: string[]) => texts.map(() => ({
embedding: [1, 2, 3],
model,
})));
const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
store.llm = {
embedModelName: model,
embedBatch: embedBatchSpy,
} as any;
store.searchVec = searchVecSpy as any;
store.searchFTS = vi.fn(() => []) as any;
store.expandQuery = vi.fn(async () => []) as any;
try {
await hybridQuery(store, "hybrid query", { limit: 5, minScore: 0, skipRerank: true });
expect(embedBatchSpy).toHaveBeenCalledTimes(1);
expect(searchVecSpy).toHaveBeenCalledTimes(1);
expect(searchVecSpy.mock.calls[0]?.[0]).toBe("hybrid query");
expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
expect(searchVecSpy.mock.calls[0]?.[5]).toEqual([1, 2, 3]);
} finally {
await cleanupTestDb(store);
}
});
test("structuredSearch uses the active llm embed model for precomputed vector lookups", async () => {
const store = await createTestStore();
const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
const embedBatchSpy = vi.fn(async (texts: string[]) => texts.map(() => ({
embedding: [1, 2, 3],
model,
})));
const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
store.llm = {
embedModelName: model,
embedBatch: embedBatchSpy,
} as any;
store.searchVec = searchVecSpy as any;
try {
await structuredSearch(store, [{ type: "vec", query: "structured query" }], {
limit: 5,
minScore: 0,
skipRerank: true,
});
expect(embedBatchSpy).toHaveBeenCalledTimes(1);
expect(searchVecSpy).toHaveBeenCalledTimes(1);
expect(searchVecSpy.mock.calls[0]?.[0]).toBe("structured query");
expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
expect(searchVecSpy.mock.calls[0]?.[5]).toEqual([1, 2, 3]);
} finally {
await cleanupTestDb(store);
}
});
test("generateEmbeddings rejects invalid batch limits", async () => {
const store = await createTestStore();