Merge pull request #273 from daocoding/feature/configurable-embed-model

feat: add QMD_EMBED_MODEL env var for multilingual embedding support
This commit is contained in:
Tobias Lütke 2026-03-07 14:28:59 -04:00 committed by GitHub
commit 7904ab9a9d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 52 additions and 9 deletions

View File

@ -252,12 +252,34 @@ QMD uses three local GGUF models (auto-downloaded on first use):
| Model | Purpose | Size |
|-------|---------|------|
| `embeddinggemma-300M-Q8_0` | Vector embeddings | ~300MB |
| `embeddinggemma-300M-Q8_0` | Vector embeddings (default) | ~300MB |
| `qwen3-reranker-0.6b-q8_0` | Re-ranking | ~640MB |
| `qmd-query-expansion-1.7B-q4_k_m` | Query expansion (fine-tuned) | ~1.1GB |
Models are downloaded from HuggingFace and cached in `~/.cache/qmd/models/`.
### Custom Embedding Model
Override the default embedding model via the `QMD_EMBED_MODEL` environment variable.
This is useful for multilingual corpora (e.g. Chinese, Japanese, Korean) where
`embeddinggemma-300M` has limited coverage.
```sh
# Use Qwen3-Embedding-0.6B for better multilingual (CJK) support
export QMD_EMBED_MODEL="hf:Qwen/Qwen3-Embedding-0.6B-GGUF/qwen3-embedding-0.6b-q8_0.gguf"
# After changing the model, re-embed all collections:
qmd embed -f
```
Supported model families:
- **embeddinggemma** (default) — English-optimized, small footprint
- **Qwen3-Embedding** — Multilingual (119 languages including CJK), MTEB top-ranked
> **Note:** When switching embedding models, you must re-index with `qmd embed -f`
> since vectors are not cross-compatible between models. The prompt format is
> automatically adjusted for each model family.
## Installation
```sh

View File

@ -23,18 +23,37 @@ import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync,
// =============================================================================
/**
* Format a query for embedding.
* Uses nomic-style task prefix format for embeddinggemma.
* Detect if a model URI uses the Qwen3-Embedding format.
* Qwen3-Embedding uses a different prompting style than nomic/embeddinggemma.
*/
export function formatQueryForEmbedding(query: string): string {
export function isQwen3EmbeddingModel(modelUri: string): boolean {
return /qwen.*embed/i.test(modelUri) || /embed.*qwen/i.test(modelUri);
}
/**
* Format a query for embedding.
* Uses nomic-style task prefix format for embeddinggemma (default).
* Uses Qwen3-Embedding instruct format when a Qwen embedding model is active.
*/
export function formatQueryForEmbedding(query: string, modelUri?: string): string {
const uri = modelUri ?? process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL;
if (isQwen3EmbeddingModel(uri)) {
return `Instruct: Retrieve relevant documents for the given query\nQuery: ${query}`;
}
return `task: search result | query: ${query}`;
}
/**
* Format a document for embedding.
* Uses nomic-style format with title and text fields.
* Uses nomic-style format with title and text fields (default).
* Qwen3-Embedding encodes documents as raw text without special prefixes.
*/
export function formatDocForEmbedding(text: string, title?: string): string {
export function formatDocForEmbedding(text: string, title?: string, modelUri?: string): string {
const uri = modelUri ?? process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL;
if (isQwen3EmbeddingModel(uri)) {
// Qwen3-Embedding: documents are raw text, no task prefix
return title ? `${title}\n${text}` : text;
}
return `title: ${title || "none"} | text: ${text}`;
}
@ -173,7 +192,8 @@ export type RerankDocument = {
// HuggingFace model URIs for node-llama-cpp
// Format: hf:<user>/<repo>/<file>
const DEFAULT_EMBED_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
// Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/qwen3-embedding-0.6b-q8_0.gguf)
const DEFAULT_EMBED_MODEL = process.env.QMD_EMBED_MODEL ?? "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
// const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf";
const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
@ -1409,7 +1429,8 @@ let defaultLlamaCpp: LlamaCpp | null = null;
*/
export function getDefaultLlamaCpp(): LlamaCpp {
if (!defaultLlamaCpp) {
defaultLlamaCpp = new LlamaCpp();
const embedModel = process.env.QMD_EMBED_MODEL;
defaultLlamaCpp = new LlamaCpp(embedModel ? { embedModel } : {});
}
return defaultLlamaCpp;
}

View File

@ -2255,7 +2255,7 @@ export async function searchVec(db: Database, query: string, model: string, limi
async function getEmbedding(text: string, model: string, isQuery: boolean, session?: ILLMSession): Promise<number[] | null> {
// Format text using the appropriate prompt template
const formattedText = isQuery ? formatQueryForEmbedding(text) : formatDocForEmbedding(text);
const formattedText = isQuery ? formatQueryForEmbedding(text, model) : formatDocForEmbedding(text, undefined, model);
const result = session
? await session.embed(formattedText, { model, isQuery })
: await getDefaultLlamaCpp().embed(formattedText, { model, isQuery });