Merge pull request #273 from daocoding/feature/configurable-embed-model
feat: add QMD_EMBED_MODEL env var for multilingual embedding support
This commit is contained in:
commit
7904ab9a9d
24
README.md
24
README.md
@ -252,12 +252,34 @@ QMD uses three local GGUF models (auto-downloaded on first use):
|
||||
|
||||
| Model | Purpose | Size |
|
||||
|-------|---------|------|
|
||||
| `embeddinggemma-300M-Q8_0` | Vector embeddings | ~300MB |
|
||||
| `embeddinggemma-300M-Q8_0` | Vector embeddings (default) | ~300MB |
|
||||
| `qwen3-reranker-0.6b-q8_0` | Re-ranking | ~640MB |
|
||||
| `qmd-query-expansion-1.7B-q4_k_m` | Query expansion (fine-tuned) | ~1.1GB |
|
||||
|
||||
Models are downloaded from HuggingFace and cached in `~/.cache/qmd/models/`.
|
||||
|
||||
### Custom Embedding Model
|
||||
|
||||
Override the default embedding model via the `QMD_EMBED_MODEL` environment variable.
|
||||
This is useful for multilingual corpora (e.g. Chinese, Japanese, Korean) where
|
||||
`embeddinggemma-300M` has limited coverage.
|
||||
|
||||
```sh
|
||||
# Use Qwen3-Embedding-0.6B for better multilingual (CJK) support
|
||||
export QMD_EMBED_MODEL="hf:Qwen/Qwen3-Embedding-0.6B-GGUF/qwen3-embedding-0.6b-q8_0.gguf"
|
||||
|
||||
# After changing the model, re-embed all collections:
|
||||
qmd embed -f
|
||||
```
|
||||
|
||||
Supported model families:
|
||||
- **embeddinggemma** (default) — English-optimized, small footprint
|
||||
- **Qwen3-Embedding** — Multilingual (119 languages including CJK), MTEB top-ranked
|
||||
|
||||
> **Note:** When switching embedding models, you must re-index with `qmd embed -f`
|
||||
> since vectors are not cross-compatible between models. The prompt format is
|
||||
> automatically adjusted for each model family.
|
||||
|
||||
## Installation
|
||||
|
||||
```sh
|
||||
|
||||
35
src/llm.ts
35
src/llm.ts
@ -23,18 +23,37 @@ import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync,
|
||||
// =============================================================================
|
||||
|
||||
/**
|
||||
* Format a query for embedding.
|
||||
* Uses nomic-style task prefix format for embeddinggemma.
|
||||
* Detect if a model URI uses the Qwen3-Embedding format.
|
||||
* Qwen3-Embedding uses a different prompting style than nomic/embeddinggemma.
|
||||
*/
|
||||
export function formatQueryForEmbedding(query: string): string {
|
||||
export function isQwen3EmbeddingModel(modelUri: string): boolean {
|
||||
return /qwen.*embed/i.test(modelUri) || /embed.*qwen/i.test(modelUri);
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a query for embedding.
|
||||
* Uses nomic-style task prefix format for embeddinggemma (default).
|
||||
* Uses Qwen3-Embedding instruct format when a Qwen embedding model is active.
|
||||
*/
|
||||
export function formatQueryForEmbedding(query: string, modelUri?: string): string {
|
||||
const uri = modelUri ?? process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL;
|
||||
if (isQwen3EmbeddingModel(uri)) {
|
||||
return `Instruct: Retrieve relevant documents for the given query\nQuery: ${query}`;
|
||||
}
|
||||
return `task: search result | query: ${query}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a document for embedding.
|
||||
* Uses nomic-style format with title and text fields.
|
||||
* Uses nomic-style format with title and text fields (default).
|
||||
* Qwen3-Embedding encodes documents as raw text without special prefixes.
|
||||
*/
|
||||
export function formatDocForEmbedding(text: string, title?: string): string {
|
||||
export function formatDocForEmbedding(text: string, title?: string, modelUri?: string): string {
|
||||
const uri = modelUri ?? process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL;
|
||||
if (isQwen3EmbeddingModel(uri)) {
|
||||
// Qwen3-Embedding: documents are raw text, no task prefix
|
||||
return title ? `${title}\n${text}` : text;
|
||||
}
|
||||
return `title: ${title || "none"} | text: ${text}`;
|
||||
}
|
||||
|
||||
@ -173,7 +192,8 @@ export type RerankDocument = {
|
||||
|
||||
// HuggingFace model URIs for node-llama-cpp
|
||||
// Format: hf:<user>/<repo>/<file>
|
||||
const DEFAULT_EMBED_MODEL = "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
|
||||
// Override via QMD_EMBED_MODEL env var (e.g. hf:Qwen/Qwen3-Embedding-0.6B-GGUF/qwen3-embedding-0.6b-q8_0.gguf)
|
||||
const DEFAULT_EMBED_MODEL = process.env.QMD_EMBED_MODEL ?? "hf:ggml-org/embeddinggemma-300M-GGUF/embeddinggemma-300M-Q8_0.gguf";
|
||||
const DEFAULT_RERANK_MODEL = "hf:ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF/qwen3-reranker-0.6b-q8_0.gguf";
|
||||
// const DEFAULT_GENERATE_MODEL = "hf:ggml-org/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf";
|
||||
const DEFAULT_GENERATE_MODEL = "hf:tobil/qmd-query-expansion-1.7B-gguf/qmd-query-expansion-1.7B-q4_k_m.gguf";
|
||||
@ -1409,7 +1429,8 @@ let defaultLlamaCpp: LlamaCpp | null = null;
|
||||
*/
|
||||
export function getDefaultLlamaCpp(): LlamaCpp {
|
||||
if (!defaultLlamaCpp) {
|
||||
defaultLlamaCpp = new LlamaCpp();
|
||||
const embedModel = process.env.QMD_EMBED_MODEL;
|
||||
defaultLlamaCpp = new LlamaCpp(embedModel ? { embedModel } : {});
|
||||
}
|
||||
return defaultLlamaCpp;
|
||||
}
|
||||
|
||||
@ -2255,7 +2255,7 @@ export async function searchVec(db: Database, query: string, model: string, limi
|
||||
|
||||
async function getEmbedding(text: string, model: string, isQuery: boolean, session?: ILLMSession): Promise<number[] | null> {
|
||||
// Format text using the appropriate prompt template
|
||||
const formattedText = isQuery ? formatQueryForEmbedding(text) : formatDocForEmbedding(text);
|
||||
const formattedText = isQuery ? formatQueryForEmbedding(text, model) : formatDocForEmbedding(text, undefined, model);
|
||||
const result = session
|
||||
? await session.embed(formattedText, { model, isQuery })
|
||||
: await getDefaultLlamaCpp().embed(formattedText, { model, isQuery });
|
||||
|
||||
Loading…
Reference in New Issue
Block a user