fix embed model and qmd home resolution
This commit is contained in:
parent
b775592230
commit
ddc969a5f4
@ -14,6 +14,8 @@
|
||||
- CLI: remove CommonJS `require()` calls from ESM index path normalization so `qmd --index <path>` no longer crashes with `ERR_AMBIGUOUS_MODULE_SYNTAX` on Node 22+. #634
|
||||
- Windows CUDA: serialize llama.cpp embedding/reranking contexts by default to avoid intermittent `ggml-cuda.cu:98` crashes in `qmd query`; set `QMD_EMBED_PARALLELISM` to opt back into parallel contexts if your driver is stable. #519
|
||||
- MCP: make `qmd mcp --index <name>` use the selected index for both foreground and daemon HTTP servers instead of falling back to the default store. #343
|
||||
- Embedding: respect `QMD_EMBED_MODEL` consistently for vector indexing and vector-backed search, with default-model fallback when unset.
|
||||
- Config: use one home-directory resolver for YAML config and the default SQLite cache path, avoiding Windows CLI/MCP split-brain when `HOME` is unset.
|
||||
- GPU: respect explicit `QMD_LLAMA_GPU=metal|vulkan|cuda` backend overrides instead of always using auto GPU selection. #529
|
||||
- Fix: preserve original filename case in `handelize()`. The previous
|
||||
`.toLowerCase()` call made indexed paths unreachable on case-sensitive
|
||||
|
||||
@ -1679,8 +1679,12 @@ function parseChunkStrategy(value: unknown): ChunkStrategy | undefined {
|
||||
throw new Error(`--chunk-strategy must be "auto" or "regex" (got "${s}")`);
|
||||
}
|
||||
|
||||
export function resolveEmbedModelForCli(): string {
|
||||
return process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL_URI;
|
||||
}
|
||||
|
||||
async function vectorIndex(
|
||||
model: string = DEFAULT_EMBED_MODEL_URI,
|
||||
model: string = resolveEmbedModelForCli(),
|
||||
force: boolean = false,
|
||||
batchOptions?: { maxDocsPerBatch?: number; maxBatchBytes?: number; chunkStrategy?: ChunkStrategy; collection?: string },
|
||||
): Promise<void> {
|
||||
@ -3125,7 +3129,7 @@ if (isMain) {
|
||||
// embed operates on a single collection; only the first value is used.
|
||||
const embedValidatedCollections = resolveCollectionFilter(cli.opts.collection, false);
|
||||
const embedCollection = embedValidatedCollections[0];
|
||||
await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
|
||||
await vectorIndex(resolveEmbedModelForCli(), !!cli.values.force, {
|
||||
maxDocsPerBatch,
|
||||
maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
|
||||
chunkStrategy: embedChunkStrategy,
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
||||
import { join, dirname, resolve } from "path";
|
||||
import { homedir } from "os";
|
||||
import { qmdHomedir } from "./paths.js";
|
||||
import YAML from "yaml";
|
||||
|
||||
// ============================================================================
|
||||
@ -118,7 +118,7 @@ function getConfigDir(): string {
|
||||
if (process.env.XDG_CONFIG_HOME) {
|
||||
return join(process.env.XDG_CONFIG_HOME, "qmd");
|
||||
}
|
||||
return join(homedir(), ".config", "qmd");
|
||||
return join(qmdHomedir(), ".config", "qmd");
|
||||
}
|
||||
|
||||
function getConfigFilePath(): string {
|
||||
|
||||
5
src/paths.ts
Normal file
5
src/paths.ts
Normal file
@ -0,0 +1,5 @@
|
||||
import { homedir as osHomedir } from "node:os";
|
||||
|
||||
export function qmdHomedir(): string {
|
||||
return process.env.HOME || process.env.USERPROFILE || osHomedir() || "/tmp";
|
||||
}
|
||||
23
src/store.ts
23
src/store.ts
@ -18,6 +18,7 @@ import { createHash } from "crypto";
|
||||
import { readFileSync, realpathSync, statSync, mkdirSync } from "node:fs";
|
||||
// Note: node:path resolve is not imported — we export our own cross-platform resolve()
|
||||
import fastGlob from "fast-glob";
|
||||
import { qmdHomedir } from "./paths.js";
|
||||
import {
|
||||
LlamaCpp,
|
||||
getDefaultLlamaCpp,
|
||||
@ -38,7 +39,6 @@ import type {
|
||||
// Configuration
|
||||
// =============================================================================
|
||||
|
||||
const HOME = process.env.HOME || process.env.USERPROFILE || "/tmp";
|
||||
export const DEFAULT_EMBED_MODEL = "embeddinggemma";
|
||||
export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
|
||||
export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
|
||||
@ -334,7 +334,7 @@ export type ExpandedQuery = {
|
||||
// =============================================================================
|
||||
|
||||
export function homedir(): string {
|
||||
return HOME;
|
||||
return qmdHomedir();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1492,7 +1492,8 @@ export async function generateEmbeddings(
|
||||
options?: EmbedOptions
|
||||
): Promise<EmbedResult> {
|
||||
const db = store.db;
|
||||
const model = options?.model ?? DEFAULT_EMBED_MODEL;
|
||||
const llm = getLlm(store);
|
||||
const model = options?.model ?? llm.embedModelName ?? DEFAULT_EMBED_MODEL;
|
||||
const now = new Date().toISOString();
|
||||
const { maxDocsPerBatch, maxBatchBytes } = resolveEmbedOptions(options);
|
||||
const encoder = new TextEncoder();
|
||||
@ -1511,8 +1512,7 @@ export async function generateEmbeddings(
|
||||
const startTime = Date.now();
|
||||
|
||||
// Use store's LlamaCpp or global singleton, wrapped in a session
|
||||
const llm = getLlm(store);
|
||||
const embedModelUri = llm.embedModelName;
|
||||
const embedModelUri = model;
|
||||
|
||||
// Create a session manager for this llm instance
|
||||
const result = await withLLMSessionForLlm(llm, async (session) => {
|
||||
@ -4276,7 +4276,8 @@ export async function hybridQuery(
|
||||
|
||||
// Batch embed all vector queries in a single call
|
||||
const llm = getLlm(store);
|
||||
const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, llm.embedModelName));
|
||||
const embedModel = llm.embedModelName;
|
||||
const textsToEmbed = vecQueries.map(q => formatQueryForEmbedding(q.text, embedModel));
|
||||
hooks?.onEmbedStart?.(textsToEmbed.length);
|
||||
const embedStart = Date.now();
|
||||
const embeddings = await llm.embedBatch(textsToEmbed);
|
||||
@ -4288,7 +4289,7 @@ export async function hybridQuery(
|
||||
if (!embedding) continue;
|
||||
|
||||
const vecResults = await store.searchVec(
|
||||
vecQueries[i]!.text, DEFAULT_EMBED_MODEL, 20, collection,
|
||||
vecQueries[i]!.text, embedModel, 20, collection,
|
||||
undefined, embedding
|
||||
);
|
||||
if (vecResults.length > 0) {
|
||||
@ -4519,10 +4520,11 @@ export async function vectorSearchQuery(
|
||||
options?.hooks?.onExpand?.(query, vecExpanded, Date.now() - expandStart);
|
||||
|
||||
// Run original + vec/hyde expanded through vector, sequentially — concurrent embed() hangs
|
||||
const embedModel = getLlm(store).embedModelName;
|
||||
const queryTexts = [query, ...vecExpanded.map(q => q.query)];
|
||||
const allResults = new Map<string, VectorSearchResult>();
|
||||
for (const q of queryTexts) {
|
||||
const vecResults = await store.searchVec(q, DEFAULT_EMBED_MODEL, limit, collection);
|
||||
const vecResults = await store.searchVec(q, embedModel, limit, collection);
|
||||
for (const r of vecResults) {
|
||||
const existing = allResults.get(r.filepath);
|
||||
if (!existing || r.score > existing.score) {
|
||||
@ -4660,7 +4662,8 @@ export async function structuredSearch(
|
||||
);
|
||||
if (vecSearches.length > 0) {
|
||||
const llm = getLlm(store);
|
||||
const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, llm.embedModelName));
|
||||
const embedModel = llm.embedModelName;
|
||||
const textsToEmbed = vecSearches.map(s => formatQueryForEmbedding(s.query, embedModel));
|
||||
hooks?.onEmbedStart?.(textsToEmbed.length);
|
||||
const embedStart = Date.now();
|
||||
const embeddings = await llm.embedBatch(textsToEmbed);
|
||||
@ -4672,7 +4675,7 @@ export async function structuredSearch(
|
||||
|
||||
for (const coll of collectionList) {
|
||||
const vecResults = await store.searchVec(
|
||||
vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, coll,
|
||||
vecSearches[i]!.query, embedModel, 20, coll,
|
||||
undefined, embedding
|
||||
);
|
||||
if (vecResults.length > 0) {
|
||||
|
||||
@ -13,7 +13,8 @@ import { join, dirname } from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
import { spawn } from "child_process";
|
||||
import { setTimeout as sleep } from "timers/promises";
|
||||
import { buildEditorUri, termLink } from "../src/cli/qmd.ts";
|
||||
import { buildEditorUri, termLink, resolveEmbedModelForCli } from "../src/cli/qmd.ts";
|
||||
import { DEFAULT_EMBED_MODEL_URI } from "../src/llm.ts";
|
||||
|
||||
// Test fixtures directory and database path
|
||||
let testDir: string;
|
||||
@ -243,6 +244,30 @@ describe("CLI Help", () => {
|
||||
});
|
||||
|
||||
describe("CLI Embed", () => {
|
||||
test("prefers QMD_EMBED_MODEL for qmd embed", () => {
|
||||
const prev = process.env.QMD_EMBED_MODEL;
|
||||
process.env.QMD_EMBED_MODEL = "hf:env/embed-model.gguf";
|
||||
|
||||
try {
|
||||
expect(resolveEmbedModelForCli()).toBe("hf:env/embed-model.gguf");
|
||||
} finally {
|
||||
if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
|
||||
else process.env.QMD_EMBED_MODEL = prev;
|
||||
}
|
||||
});
|
||||
|
||||
test("falls back to the default embed model when QMD_EMBED_MODEL is unset", () => {
|
||||
const prev = process.env.QMD_EMBED_MODEL;
|
||||
delete process.env.QMD_EMBED_MODEL;
|
||||
|
||||
try {
|
||||
expect(resolveEmbedModelForCli()).toBe(DEFAULT_EMBED_MODEL_URI);
|
||||
} finally {
|
||||
if (prev === undefined) delete process.env.QMD_EMBED_MODEL;
|
||||
else process.env.QMD_EMBED_MODEL = prev;
|
||||
}
|
||||
});
|
||||
|
||||
test("rejects invalid --max-docs-per-batch", async () => {
|
||||
const { stderr, exitCode } = await runQmd(["embed", "--max-docs-per-batch", "0"]);
|
||||
expect(exitCode).toBe(1);
|
||||
|
||||
@ -7,7 +7,7 @@
|
||||
|
||||
import { describe, test, expect, beforeEach, afterEach } from "vitest";
|
||||
import { join } from "path";
|
||||
import { homedir } from "os";
|
||||
import { qmdHomedir } from "../src/paths.js";
|
||||
import { getConfigPath, setConfigIndexName } from "../src/collections.js";
|
||||
|
||||
// Save/restore env vars around each test
|
||||
@ -15,6 +15,8 @@ let savedEnv: Record<string, string | undefined>;
|
||||
|
||||
beforeEach(() => {
|
||||
savedEnv = {
|
||||
HOME: process.env.HOME,
|
||||
USERPROFILE: process.env.USERPROFILE,
|
||||
QMD_CONFIG_DIR: process.env.QMD_CONFIG_DIR,
|
||||
XDG_CONFIG_HOME: process.env.XDG_CONFIG_HOME,
|
||||
};
|
||||
@ -38,7 +40,16 @@ describe("getConfigDir via getConfigPath", () => {
|
||||
test("defaults to ~/.config/qmd when no env vars are set", () => {
|
||||
delete process.env.QMD_CONFIG_DIR;
|
||||
delete process.env.XDG_CONFIG_HOME;
|
||||
expect(getConfigPath()).toBe(join(homedir(), ".config", "qmd", "index.yml"));
|
||||
expect(getConfigPath()).toBe(join(qmdHomedir(), ".config", "qmd", "index.yml"));
|
||||
});
|
||||
|
||||
test("uses the same USERPROFILE fallback as default DB path when HOME is unset", () => {
|
||||
delete process.env.HOME;
|
||||
delete process.env.QMD_CONFIG_DIR;
|
||||
delete process.env.XDG_CONFIG_HOME;
|
||||
process.env.USERPROFILE = "/Users/windows-user";
|
||||
|
||||
expect(getConfigPath()).toBe(join("/Users/windows-user", ".config", "qmd", "index.yml"));
|
||||
});
|
||||
|
||||
test("QMD_CONFIG_DIR takes highest priority", () => {
|
||||
|
||||
@ -53,6 +53,10 @@ import {
|
||||
insertDocument,
|
||||
generateEmbeddings,
|
||||
getHybridRrfWeights,
|
||||
_resetProductionModeForTesting,
|
||||
hybridQuery,
|
||||
structuredSearch,
|
||||
vectorSearchQuery,
|
||||
type Store,
|
||||
type DocumentResult,
|
||||
type SearchResult,
|
||||
@ -282,7 +286,9 @@ afterAll(async () => {
|
||||
|
||||
describe("Store Creation", () => {
|
||||
test("createStore throws without explicit path in test mode", () => {
|
||||
// In test mode, createStore without path should throw to prevent accidental writes
|
||||
// In test mode, createStore without path should throw to prevent accidental writes.
|
||||
// Other tests may enable production mode in the same Bun process, so reset first.
|
||||
_resetProductionModeForTesting();
|
||||
const originalIndexPath = process.env.INDEX_PATH;
|
||||
delete process.env.INDEX_PATH;
|
||||
|
||||
@ -3021,6 +3027,116 @@ describe("Embedding batching", () => {
|
||||
}
|
||||
});
|
||||
|
||||
test("generateEmbeddings uses the active llm embed model when no explicit model is passed", async () => {
|
||||
const store = await createTestStore();
|
||||
const db = store.db;
|
||||
const fakeLlm = createFakeEmbedLlm();
|
||||
const model = "hf:env/embed-model.gguf";
|
||||
|
||||
setDefaultLlamaCpp(createFakeTokenizer() as any);
|
||||
store.llm = { ...fakeLlm, embedModelName: model } as any;
|
||||
|
||||
try {
|
||||
await insertTestDocument(db, "docs", { name: "one", body: "# One\n\nAlpha" });
|
||||
|
||||
const result = await generateEmbeddings(store);
|
||||
|
||||
expect(result.chunksEmbedded).toBe(1);
|
||||
expect(fakeLlm.embedCalls[0]?.options?.model).toBe(model);
|
||||
expect(fakeLlm.embedBatchModelCalls).toEqual([{ model }]);
|
||||
expect(db.prepare(`SELECT DISTINCT model FROM content_vectors`).all()).toEqual([{ model }]);
|
||||
} finally {
|
||||
setDefaultLlamaCpp(null);
|
||||
await cleanupTestDb(store);
|
||||
}
|
||||
});
|
||||
|
||||
test("vectorSearchQuery uses the active llm embed model for vector lookups", async () => {
|
||||
const store = await createTestStore();
|
||||
const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
|
||||
const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
|
||||
|
||||
store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
|
||||
store.llm = { embedModelName: model } as any;
|
||||
store.searchVec = searchVecSpy as any;
|
||||
store.expandQuery = vi.fn(async () => []) as any;
|
||||
|
||||
try {
|
||||
await vectorSearchQuery(store, "custom query", { limit: 7, minScore: 0 });
|
||||
|
||||
expect(searchVecSpy).toHaveBeenCalledTimes(1);
|
||||
expect(searchVecSpy.mock.calls[0]?.[0]).toBe("custom query");
|
||||
expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
|
||||
expect(searchVecSpy.mock.calls[0]?.[2]).toBe(7);
|
||||
} finally {
|
||||
await cleanupTestDb(store);
|
||||
}
|
||||
});
|
||||
|
||||
test("hybridQuery uses the active llm embed model for precomputed vector lookups", async () => {
|
||||
const store = await createTestStore();
|
||||
const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
|
||||
const embedBatchSpy = vi.fn(async (texts: string[]) => texts.map(() => ({
|
||||
embedding: [1, 2, 3],
|
||||
model,
|
||||
})));
|
||||
const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
|
||||
|
||||
store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
|
||||
store.llm = {
|
||||
embedModelName: model,
|
||||
embedBatch: embedBatchSpy,
|
||||
} as any;
|
||||
store.searchVec = searchVecSpy as any;
|
||||
store.searchFTS = vi.fn(() => []) as any;
|
||||
store.expandQuery = vi.fn(async () => []) as any;
|
||||
|
||||
try {
|
||||
await hybridQuery(store, "hybrid query", { limit: 5, minScore: 0, skipRerank: true });
|
||||
|
||||
expect(embedBatchSpy).toHaveBeenCalledTimes(1);
|
||||
expect(searchVecSpy).toHaveBeenCalledTimes(1);
|
||||
expect(searchVecSpy.mock.calls[0]?.[0]).toBe("hybrid query");
|
||||
expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
|
||||
expect(searchVecSpy.mock.calls[0]?.[5]).toEqual([1, 2, 3]);
|
||||
} finally {
|
||||
await cleanupTestDb(store);
|
||||
}
|
||||
});
|
||||
|
||||
test("structuredSearch uses the active llm embed model for precomputed vector lookups", async () => {
|
||||
const store = await createTestStore();
|
||||
const model = "hf:Qwen/Qwen3-Embedding-0.6B-GGUF/Qwen3-Embedding-0.6B-Q8_0.gguf";
|
||||
const embedBatchSpy = vi.fn(async (texts: string[]) => texts.map(() => ({
|
||||
embedding: [1, 2, 3],
|
||||
model,
|
||||
})));
|
||||
const searchVecSpy = vi.fn(async () => [] as SearchResult[]) as any;
|
||||
|
||||
store.db.exec(`CREATE TABLE vectors_vec (hash_seq TEXT PRIMARY KEY, embedding BLOB)`);
|
||||
store.llm = {
|
||||
embedModelName: model,
|
||||
embedBatch: embedBatchSpy,
|
||||
} as any;
|
||||
store.searchVec = searchVecSpy as any;
|
||||
|
||||
try {
|
||||
await structuredSearch(store, [{ type: "vec", query: "structured query" }], {
|
||||
limit: 5,
|
||||
minScore: 0,
|
||||
skipRerank: true,
|
||||
});
|
||||
|
||||
expect(embedBatchSpy).toHaveBeenCalledTimes(1);
|
||||
expect(searchVecSpy).toHaveBeenCalledTimes(1);
|
||||
expect(searchVecSpy.mock.calls[0]?.[0]).toBe("structured query");
|
||||
expect(searchVecSpy.mock.calls[0]?.[1]).toBe(model);
|
||||
expect(searchVecSpy.mock.calls[0]?.[5]).toEqual([1, 2, 3]);
|
||||
} finally {
|
||||
await cleanupTestDb(store);
|
||||
}
|
||||
});
|
||||
|
||||
test("generateEmbeddings rejects invalid batch limits", async () => {
|
||||
const store = await createTestStore();
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user