Merge pull request #656 from tobi/fix/gpu-status-warning
Fix GPU status guidance and benchmark warnings
This commit is contained in:
commit
ddbd6bd8be
@ -8,6 +8,11 @@
|
||||
|
||||
### Fixes
|
||||
|
||||
- Skill: expand the packaged QMD skill with retrieval-first workflows, structured query examples, wiki/source collection guidance, and safe fallbacks when model-backed search is unavailable.
|
||||
- Tests: make `bun run test` execute the local unit suite under both Node/Vitest and Bun (`test:node` + `test:bun`) so runtime-specific regressions are caught before CI.
|
||||
- Model config: centralize embedding/rerank/generation model resolution so `qmd embed`, `status`, `query`, `vsearch`, `pull`, SDK vector search, and `bench` use the same active `.qmd/index.yaml` model hints and environment fallbacks.
|
||||
- GPU/status: `qmd status` now uses the same embedding model identity as `qmd embed` when computing pending embeddings, so URI-backed embeddings are not incorrectly reported as pending under the legacy `embeddinggemma` alias.
|
||||
- GPU status: `qmd status` now always shows GPU mode/configuration without unsafe native probing, and CPU-fallback warnings point to `QMD_STATUS_DEVICE_PROBE=1 qmd status` for an actual backend probe. The no-GPU warning is emitted once per process instead of once per LLM instance during benchmarks.
|
||||
- GPU: add `QMD_FORCE_CPU=1` / `--no-gpu` to bypass CUDA/Vulkan/Metal probing entirely, and route native llama.cpp stdout noise to stderr so JSON output stays parseable during search/query commands.
|
||||
- Snippet line numbers: `qmd_query` (MCP), HTTP `/query`, and `qmd query`
|
||||
(CLI JSON output and snippet headers) now return absolute source-file
|
||||
|
||||
@ -25,7 +25,10 @@
|
||||
"scripts": {
|
||||
"prepare": "[ -d .git ] && ./scripts/install-hooks.sh || true",
|
||||
"build": "tsc -p tsconfig.build.json && printf '#!/usr/bin/env node\n' | cat - dist/cli/qmd.js > dist/cli/qmd.tmp && mv dist/cli/qmd.tmp dist/cli/qmd.js && chmod +x dist/cli/qmd.js",
|
||||
"test": "vitest run --reporter=verbose test/",
|
||||
"test": "bun run test:unit",
|
||||
"test:node": "node ./node_modules/vitest/vitest.mjs run --reporter=verbose",
|
||||
"test:bun": "bun test --preload ./src/test-preload.ts",
|
||||
"test:unit": "bun run test:node -- test/ && bun run test:bun -- test/",
|
||||
"qmd": "tsx src/cli/qmd.ts",
|
||||
"index": "tsx src/cli/qmd.ts index",
|
||||
"vector": "tsx src/cli/qmd.ts vector",
|
||||
|
||||
@ -1,138 +1,161 @@
|
||||
---
|
||||
name: qmd
|
||||
description: Search markdown knowledge bases, notes, and documentation using QMD. Use when users ask to search notes, find documents, or look up information.
|
||||
description: Search local markdown knowledge bases, notes, docs, and wikis with QMD. Use when users ask to find notes, retrieve documents, inspect a wiki, answer from indexed markdown, or set up QMD access.
|
||||
license: MIT
|
||||
compatibility: Requires qmd CLI or MCP server. Install via `npm install -g @tobilu/qmd`.
|
||||
metadata:
|
||||
author: tobi
|
||||
version: "2.0.0"
|
||||
version: "2.1.0"
|
||||
allowed-tools: Bash(qmd:*), mcp__qmd__*
|
||||
---
|
||||
|
||||
# QMD - Quick Markdown Search
|
||||
# QMD - Query Markdown Documents
|
||||
|
||||
Local search engine for markdown content.
|
||||
QMD is a local search and retrieval engine for markdown collections: notes, docs,
|
||||
wikis, transcripts, and project knowledge bases. Use it before generic web search
|
||||
when the user is asking about something that may already live in their indexed
|
||||
local markdown.
|
||||
|
||||
## Status
|
||||
## Status Check
|
||||
|
||||
!`qmd status 2>/dev/null || echo "Not installed: npm install -g @tobilu/qmd"`
|
||||
Start by checking what QMD can see:
|
||||
|
||||
## MCP: `query`
|
||||
```bash
|
||||
qmd collection list
|
||||
qmd ls
|
||||
```
|
||||
|
||||
For health details:
|
||||
|
||||
```bash
|
||||
qmd status
|
||||
```
|
||||
|
||||
If QMD is missing:
|
||||
|
||||
```bash
|
||||
npm install -g @tobilu/qmd
|
||||
```
|
||||
|
||||
## Retrieval Workflow
|
||||
|
||||
1. **Discover collections** with `qmd collection list` or `qmd ls`.
|
||||
2. **Search first**, usually with a small result count.
|
||||
3. **Retrieve source documents** with `qmd get` or `qmd multi-get`.
|
||||
4. **Answer from the retrieved text**, citing file paths or docids.
|
||||
5. **If results are weak**, rewrite the query using a different search mode.
|
||||
|
||||
Do not answer from search-result snippets alone when the user needs substance.
|
||||
Fetch the document.
|
||||
|
||||
## Search Modes
|
||||
|
||||
### Fast lexical search
|
||||
|
||||
Use BM25 when you know names, exact terms, titles, identifiers, or code symbols:
|
||||
|
||||
```bash
|
||||
qmd search "cockpit OKR Goodhart" -n 10
|
||||
qmd search '"AI Before Headcount"' -c concepts -n 5
|
||||
```
|
||||
|
||||
Good `lex` queries are short: 2-6 discriminative terms, quoted phrases when exact,
|
||||
and no filler words.
|
||||
|
||||
### Hybrid query search
|
||||
|
||||
Use `qmd query` when semantic recall, query expansion, vector search, or reranking
|
||||
matters more than speed:
|
||||
|
||||
```bash
|
||||
qmd query "decision quality depends on surfacing assumptions and context" -n 10
|
||||
qmd query --json --explain "metrics as cockpit instruments but not OKRs"
|
||||
```
|
||||
|
||||
`qmd query` may initialize local models. If models/GPU are unavailable, slow, or
|
||||
crashing, fall back to `qmd search` and use better lexical terms.
|
||||
|
||||
### Structured queries
|
||||
|
||||
For subtle wiki/doc searches, structured queries are usually strongest:
|
||||
|
||||
```bash
|
||||
qmd query $'intent: Find the concept note about metrics as instruments without letting OKRs replace judgment.\nlex: cockpit instruments OKR Goodhart metrics judgment\nvec: data informed not metric driven product judgment\nhyde: A concept note says metrics are useful like cockpit instruments, but leaders should remain data-informed rather than metric-driven because OKRs and dashboards can Goodhart product judgment.'
|
||||
```
|
||||
|
||||
Use this pattern when the user's wording is indirect:
|
||||
|
||||
- `intent:` disambiguates the target.
|
||||
- `lex:` anchors exact names, phrases, aliases, and rare terms.
|
||||
- `vec:` adds the semantic paraphrase.
|
||||
- `hyde:` describes the document that would answer the query.
|
||||
|
||||
Put the best query first; early searches receive more weight in fusion.
|
||||
|
||||
## MCP Tool: `query`
|
||||
|
||||
When using the MCP server, prefer structured searches:
|
||||
|
||||
```json
|
||||
{
|
||||
"searches": [
|
||||
{ "type": "lex", "query": "CAP theorem consistency" },
|
||||
{ "type": "vec", "query": "tradeoff between consistency and availability" }
|
||||
{ "type": "lex", "query": "cockpit OKR Goodhart" },
|
||||
{ "type": "vec", "query": "data informed not metric driven product judgment" },
|
||||
{ "type": "hyde", "query": "A concept note explains that metrics are useful as instruments, but leaders should not let OKRs or dashboards replace judgment." }
|
||||
],
|
||||
"collections": ["docs"],
|
||||
"intent": "Find the concept note about using metrics as instruments without becoming metric-driven.",
|
||||
"collections": ["concepts"],
|
||||
"limit": 10
|
||||
}
|
||||
```
|
||||
|
||||
### Query Types
|
||||
|
||||
| Type | Method | Input |
|
||||
|------|--------|-------|
|
||||
| `lex` | BM25 | Keywords — exact terms, names, code |
|
||||
| `vec` | Vector | Question — natural language |
|
||||
| `hyde` | Vector | Answer — hypothetical result (50-100 words) |
|
||||
- `lex` — BM25 keyword search. Best for exact terms, names, titles, and code.
|
||||
- `vec` — vector semantic search. Best for natural-language concepts.
|
||||
- `hyde` — vector search using a hypothetical answer/document passage.
|
||||
|
||||
### Writing Good Queries
|
||||
|
||||
**lex (keyword)**
|
||||
- 2-5 terms, no filler words
|
||||
- Exact phrase: `"connection pool"` (quoted)
|
||||
- Exclude terms: `performance -sports` (minus prefix)
|
||||
- Code identifiers work: `handleError async`
|
||||
|
||||
**vec (semantic)**
|
||||
- Full natural language question
|
||||
- Be specific: `"how does the rate limiter handle burst traffic"`
|
||||
- Include context: `"in the payment service, how are refunds processed"`
|
||||
|
||||
**hyde (hypothetical document)**
|
||||
- Write 50-100 words of what the *answer* looks like
|
||||
- Use the vocabulary you expect in the result
|
||||
|
||||
**expand (auto-expand)**
|
||||
- Use a single-line query (implicit) or `expand: question` on its own line
|
||||
- Lets the local LLM generate lex/vec/hyde variations
|
||||
- Do not mix `expand:` with other typed lines — it's either a standalone expand query or a full query document
|
||||
|
||||
### Intent (Disambiguation)
|
||||
|
||||
When a query term is ambiguous, add `intent` to steer results:
|
||||
|
||||
```json
|
||||
{
|
||||
"searches": [
|
||||
{ "type": "lex", "query": "performance" }
|
||||
],
|
||||
"intent": "web page load times and Core Web Vitals"
|
||||
}
|
||||
```
|
||||
|
||||
Intent affects expansion, reranking, chunk selection, and snippet extraction. It does not search on its own — it's a steering signal that disambiguates queries like "performance" (web-perf vs team health vs fitness).
|
||||
|
||||
### Combining Types
|
||||
|
||||
| Goal | Approach |
|
||||
|------|----------|
|
||||
| Know exact terms | `lex` only |
|
||||
| Don't know vocabulary | Use a single-line query (implicit `expand:`) or `vec` |
|
||||
| Best recall | `lex` + `vec` |
|
||||
| Complex topic | `lex` + `vec` + `hyde` |
|
||||
| Ambiguous query | Add `intent` to any combination above |
|
||||
|
||||
First query gets 2x weight in fusion — put your best guess first.
|
||||
|
||||
### Lex Query Syntax
|
||||
|
||||
| Syntax | Meaning | Example |
|
||||
|--------|---------|---------|
|
||||
| `term` | Prefix match | `perf` matches "performance" |
|
||||
| `"phrase"` | Exact phrase | `"rate limiter"` |
|
||||
| `-term` | Exclude | `performance -sports` |
|
||||
|
||||
Note: `-term` only works in lex queries, not vec/hyde.
|
||||
|
||||
### Collection Filtering
|
||||
|
||||
```json
|
||||
{ "collections": ["docs"] } // Single
|
||||
{ "collections": ["docs", "notes"] } // Multiple (OR)
|
||||
```
|
||||
|
||||
Omit to search all collections.
|
||||
|
||||
## Other MCP Tools
|
||||
|
||||
| Tool | Use |
|
||||
|------|-----|
|
||||
| `get` | Retrieve doc by path or `#docid` |
|
||||
| `multi_get` | Retrieve multiple by glob/list |
|
||||
| `status` | Collections and health |
|
||||
|
||||
## CLI
|
||||
## Retrieval Commands
|
||||
|
||||
```bash
|
||||
qmd query "question" # Auto-expand + rerank
|
||||
qmd query $'lex: X\nvec: Y' # Structured
|
||||
qmd query $'expand: question' # Explicit expand
|
||||
qmd query --json --explain "q" # Show score traces (RRF + rerank blend)
|
||||
qmd search "keywords" # BM25 only (no LLM)
|
||||
qmd get "#abc123" # By docid
|
||||
qmd multi-get "journals/2026-*.md" -l 40 # Batch pull snippets by glob
|
||||
qmd multi-get notes/foo.md,notes/bar.md # Comma-separated list, preserves order
|
||||
qmd get "#abc123" # retrieve by docid
|
||||
qmd get qmd://concepts/ai-before-headcount.md --full
|
||||
qmd multi-get 'concepts/{ai-before-headcount.md,data-informed-not-metric-driven.md}' --md
|
||||
qmd multi-get 'sources/podcast-2025-*.md' -l 80
|
||||
```
|
||||
|
||||
## HTTP API
|
||||
Use `multi-get` when comparing several hits or gathering context across pages.
|
||||
Use `--full` when the exact source matters.
|
||||
|
||||
## Collection Filtering
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8181/query \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"searches": [{"type": "lex", "query": "test"}]}'
|
||||
qmd search "headcount autonomous agents" -c concepts -n 10
|
||||
qmd query "merchant support product reality" -c concepts -c sources -n 10
|
||||
```
|
||||
|
||||
Omit `-c` / `collections` to search everything. Add collection filters when a
|
||||
broad query drifts into the wrong corpus.
|
||||
|
||||
## Query Craft
|
||||
|
||||
Good QMD searches mix three things:
|
||||
|
||||
1. **Title/alias anchors:** exact page titles, named entities, phrases.
|
||||
2. **Semantic paraphrase:** how a human would describe the idea.
|
||||
3. **Negative space:** enough intent to avoid nearby-but-wrong concepts.
|
||||
|
||||
Examples:
|
||||
|
||||
```bash
|
||||
# Exact-ish title lookup
|
||||
qmd search '"arm the rebels" merchants tools big companies' -c concepts
|
||||
|
||||
# Semantic concept lookup
|
||||
qmd query $'intent: Find the customer proximity concept, not generic customer delight.\nlex: support pseudonymous merchant customer interviews\nvec: founder stays close to merchant reality through support and product use'
|
||||
|
||||
# Source lookup
|
||||
qmd search "six-week cadence WhatsApp merchant relationships Shawn Ryan" -c sources -n 10
|
||||
```
|
||||
|
||||
## Setup
|
||||
@ -142,3 +165,28 @@ npm install -g @tobilu/qmd
|
||||
qmd collection add ~/notes --name notes
|
||||
qmd embed
|
||||
```
|
||||
|
||||
Only add collections or generate embeddings when the user asked for setup or
|
||||
index maintenance. Searching and retrieving are safe; collection/index mutation is
|
||||
not a casual first step.
|
||||
|
||||
## MCP Setup
|
||||
|
||||
See `references/mcp-setup.md` for Claude Code, Claude Desktop, OpenClaw, and HTTP
|
||||
server configuration.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
- **Do not stop at snippets.** Fetch documents before making claims.
|
||||
- **Do not overuse semantic search.** If you know exact titles or terms, BM25 is
|
||||
faster and often better.
|
||||
- **Do not mutate indexes casually.** `qmd collection add`, `qmd update`, and
|
||||
`qmd embed` change local state and can be expensive.
|
||||
- **Model-backed commands can be environment-sensitive.** If `qmd query`,
|
||||
`qmd vsearch`, or reranking fails because local models/GPU are unavailable,
|
||||
use `qmd search` and stronger lexical/structured terms.
|
||||
- **Ambiguous user wording needs intent.** Add `intent:` rather than hoping query
|
||||
expansion guesses the right domain.
|
||||
- **Collection names matter.** Search `concepts` for synthesized wiki pages,
|
||||
`sources` for transcripts/raw source pages, and docs collections for code/project
|
||||
documentation.
|
||||
|
||||
@ -78,7 +78,7 @@ import {
|
||||
type ReindexResult,
|
||||
type ChunkStrategy,
|
||||
} from "../store.js";
|
||||
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
|
||||
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels } from "../llm.js";
|
||||
import {
|
||||
formatSearchResults,
|
||||
formatDocuments,
|
||||
@ -311,8 +311,8 @@ function formatETA(seconds: number): string {
|
||||
|
||||
|
||||
// Check index health and print warnings/tips
|
||||
function checkIndexHealth(db: Database): void {
|
||||
const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db);
|
||||
function checkIndexHealth(db: Database, model: string = resolveEmbedModelForCli()): void {
|
||||
const { needsEmbedding, totalDocs, daysStale } = getIndexHealth(db, model);
|
||||
|
||||
// Warn if many docs need embedding
|
||||
if (needsEmbedding > 0) {
|
||||
@ -410,7 +410,8 @@ async function showStatus(): Promise<void> {
|
||||
// Overall stats
|
||||
const totalDocs = db.prepare(`SELECT COUNT(*) as count FROM documents WHERE active = 1`).get() as { count: number };
|
||||
const vectorCount = db.prepare(`SELECT COUNT(*) as count FROM content_vectors`).get() as { count: number };
|
||||
const needsEmbedding = getHashesNeedingEmbedding(db);
|
||||
const statusEmbedModel = resolveEmbedModelForCli();
|
||||
const needsEmbedding = getHashesNeedingEmbedding(db, undefined, statusEmbedModel);
|
||||
|
||||
// Most recent update across all collections
|
||||
const mostRecent = db.prepare(`SELECT MAX(modified_at) as latest FROM documents WHERE active = 1`).get() as { latest: string | null };
|
||||
@ -536,18 +537,26 @@ async function showStatus(): Promise<void> {
|
||||
const match = uri.match(/^hf:([^/]+\/[^/]+)\//);
|
||||
return match ? `https://huggingface.co/${match[1]}` : uri;
|
||||
};
|
||||
const activeModels = resolveModelsForCli();
|
||||
console.log(`\n${c.bold}Models${c.reset}`);
|
||||
console.log(` Embedding: ${hfLink(DEFAULT_EMBED_MODEL_URI)}`);
|
||||
console.log(` Reranking: ${hfLink(DEFAULT_RERANK_MODEL_URI)}`);
|
||||
console.log(` Generation: ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`);
|
||||
console.log(` Embedding: ${hfLink(activeModels.embed)}`);
|
||||
console.log(` Reranking: ${hfLink(activeModels.rerank)}`);
|
||||
console.log(` Generation: ${hfLink(activeModels.generate)}`);
|
||||
}
|
||||
|
||||
// Device / GPU info
|
||||
// Important: probing node-llama-cpp can abort the whole process on machines with
|
||||
// incompatible GPU drivers (for example Vulkan loader present but no usable driver).
|
||||
// Keep `qmd status` safe by default and make the expensive/native probe opt-in.
|
||||
if (process.env.QMD_STATUS_DEVICE_PROBE === "1") {
|
||||
console.log(`\n${c.bold}Device${c.reset}`);
|
||||
// Keep the native probe opt-in, but always show how QMD is configured and how to probe.
|
||||
console.log(`\n${c.bold}Device${c.reset}`);
|
||||
const configuredGpuMode = process.env.QMD_FORCE_CPU && !["false", "off", "none", "disable", "disabled", "0"].includes(process.env.QMD_FORCE_CPU.trim().toLowerCase())
|
||||
? "CPU forced (QMD_FORCE_CPU)"
|
||||
: (process.env.QMD_LLAMA_GPU?.trim() || "auto");
|
||||
console.log(` Mode: ${configuredGpuMode}`);
|
||||
if (process.env.QMD_STATUS_DEVICE_PROBE !== "1") {
|
||||
console.log(` Status: ${c.dim}not probed${c.reset} (set QMD_STATUS_DEVICE_PROBE=1 to test GPU/CPU backend)`);
|
||||
} else {
|
||||
console.log(` Status: probing native llama backend...`);
|
||||
try {
|
||||
const llm = getDefaultLlamaCpp();
|
||||
const device = await llm.getDeviceInfo({ allowBuild: false });
|
||||
@ -1794,7 +1803,35 @@ function parseChunkStrategy(value: unknown): ChunkStrategy | undefined {
|
||||
}
|
||||
|
||||
export function resolveEmbedModelForCli(): string {
|
||||
return process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL_URI;
|
||||
try {
|
||||
return resolveEmbedModel(loadConfig().models);
|
||||
} catch {
|
||||
return resolveEmbedModel();
|
||||
}
|
||||
}
|
||||
|
||||
export function resolveGenerateModelForCli(): string {
|
||||
try {
|
||||
return resolveGenerateModel(loadConfig().models);
|
||||
} catch {
|
||||
return resolveGenerateModel();
|
||||
}
|
||||
}
|
||||
|
||||
export function resolveRerankModelForCli(): string {
|
||||
try {
|
||||
return resolveRerankModel(loadConfig().models);
|
||||
} catch {
|
||||
return resolveRerankModel();
|
||||
}
|
||||
}
|
||||
|
||||
function resolveModelsForCli(): { embed: string; generate: string; rerank: string } {
|
||||
try {
|
||||
return resolveModels(loadConfig().models);
|
||||
} catch {
|
||||
return resolveModels();
|
||||
}
|
||||
}
|
||||
|
||||
async function vectorIndex(
|
||||
@ -3531,10 +3568,11 @@ if (isMain) {
|
||||
|
||||
case "pull": {
|
||||
const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
|
||||
const activeModels = resolveModelsForCli();
|
||||
const models = [
|
||||
DEFAULT_EMBED_MODEL_URI,
|
||||
DEFAULT_GENERATE_MODEL_URI,
|
||||
DEFAULT_RERANK_MODEL_URI,
|
||||
activeModels.embed,
|
||||
activeModels.generate,
|
||||
activeModels.rerank,
|
||||
];
|
||||
console.log(`${c.bold}Pulling models${c.reset}`);
|
||||
const results = await pullModels(models, {
|
||||
|
||||
@ -23,7 +23,6 @@ import {
|
||||
structuredSearch,
|
||||
extractSnippet,
|
||||
addLineNumbers,
|
||||
DEFAULT_EMBED_MODEL,
|
||||
DEFAULT_MULTI_GET_MAX_BYTES,
|
||||
reindexCollection,
|
||||
generateEmbeddings,
|
||||
@ -423,7 +422,7 @@ export async function createStore(options: StoreOptions): Promise<QMDStore> {
|
||||
});
|
||||
},
|
||||
searchLex: async (q, opts) => internal.searchFTS(q, opts?.limit, opts?.collection),
|
||||
searchVector: async (q, opts) => internal.searchVec(q, DEFAULT_EMBED_MODEL, opts?.limit, opts?.collection),
|
||||
searchVector: async (q, opts) => internal.searchVec(q, llm.embedModelName, opts?.limit, opts?.collection),
|
||||
expandQuery: async (q, opts) => internal.expandQuery(q, undefined, opts?.intent),
|
||||
get: async (pathOrDocid, opts) => internal.findDocument(pathOrDocid, opts),
|
||||
getDocumentBody: async (pathOrDocid, opts) => {
|
||||
|
||||
51
src/llm.ts
51
src/llm.ts
@ -31,6 +31,7 @@ async function loadNodeLlamaCpp(): Promise<NodeLlamaCppModule> {
|
||||
export function setNodeLlamaCppModuleForTest(module: NodeLlamaCppModule | null): void {
|
||||
nodeLlamaCppImport = module ? Promise.resolve(module) : null;
|
||||
failedGpuInitModes.clear();
|
||||
noGpuAccelerationWarningShown = false;
|
||||
}
|
||||
|
||||
type StdoutWrite = typeof process.stdout.write;
|
||||
@ -83,7 +84,7 @@ export function isQwen3EmbeddingModel(modelUri: string): boolean {
|
||||
* Uses Qwen3-Embedding instruct format when a Qwen embedding model is active.
|
||||
*/
|
||||
export function formatQueryForEmbedding(query: string, modelUri?: string): string {
|
||||
const uri = modelUri ?? process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL;
|
||||
const uri = modelUri ?? resolveEmbedModel();
|
||||
if (isQwen3EmbeddingModel(uri)) {
|
||||
return `Instruct: Retrieve relevant documents for the given query\nQuery: ${query}`;
|
||||
}
|
||||
@ -96,7 +97,7 @@ export function formatQueryForEmbedding(query: string, modelUri?: string): strin
|
||||
* Qwen3-Embedding encodes documents as raw text without special prefixes.
|
||||
*/
|
||||
export function formatDocForEmbedding(text: string, title?: string, modelUri?: string): string {
|
||||
const uri = modelUri ?? process.env.QMD_EMBED_MODEL ?? DEFAULT_EMBED_MODEL;
|
||||
const uri = modelUri ?? resolveEmbedModel();
|
||||
if (isQwen3EmbeddingModel(uri)) {
|
||||
// Qwen3-Embedding: documents are raw text, no task prefix
|
||||
return title ? `${title}\n${text}` : text;
|
||||
@ -255,6 +256,32 @@ export const DEFAULT_EMBED_MODEL_URI = DEFAULT_EMBED_MODEL;
|
||||
export const DEFAULT_RERANK_MODEL_URI = DEFAULT_RERANK_MODEL;
|
||||
export const DEFAULT_GENERATE_MODEL_URI = DEFAULT_GENERATE_MODEL;
|
||||
|
||||
export type ModelResolutionConfig = {
|
||||
embed?: string;
|
||||
generate?: string;
|
||||
rerank?: string;
|
||||
};
|
||||
|
||||
export function resolveEmbedModel(config?: ModelResolutionConfig): string {
|
||||
return config?.embed || process.env.QMD_EMBED_MODEL || DEFAULT_EMBED_MODEL;
|
||||
}
|
||||
|
||||
export function resolveGenerateModel(config?: ModelResolutionConfig): string {
|
||||
return config?.generate || process.env.QMD_GENERATE_MODEL || DEFAULT_GENERATE_MODEL;
|
||||
}
|
||||
|
||||
export function resolveRerankModel(config?: ModelResolutionConfig): string {
|
||||
return config?.rerank || process.env.QMD_RERANK_MODEL || DEFAULT_RERANK_MODEL;
|
||||
}
|
||||
|
||||
export function resolveModels(config?: ModelResolutionConfig): Required<ModelResolutionConfig> {
|
||||
return {
|
||||
embed: resolveEmbedModel(config),
|
||||
generate: resolveGenerateModel(config),
|
||||
rerank: resolveRerankModel(config),
|
||||
};
|
||||
}
|
||||
|
||||
// Local model cache directory
|
||||
const MODEL_CACHE_DIR = process.env.XDG_CACHE_HOME
|
||||
? join(process.env.XDG_CACHE_HOME, "qmd", "models")
|
||||
@ -579,6 +606,7 @@ function resolveExpandContextSize(configValue?: number): number {
|
||||
}
|
||||
|
||||
const failedGpuInitModes = new Set<LlamaGpuMode>();
|
||||
let noGpuAccelerationWarningShown = false;
|
||||
|
||||
export class LlamaCpp implements LLM {
|
||||
private readonly _ciMode = !!process.env.CI;
|
||||
@ -610,9 +638,9 @@ export class LlamaCpp implements LLM {
|
||||
|
||||
|
||||
constructor(config: LlamaCppConfig = {}) {
|
||||
this.embedModelUri = config.embedModel || process.env.QMD_EMBED_MODEL || DEFAULT_EMBED_MODEL;
|
||||
this.generateModelUri = config.generateModel || process.env.QMD_GENERATE_MODEL || DEFAULT_GENERATE_MODEL;
|
||||
this.rerankModelUri = config.rerankModel || process.env.QMD_RERANK_MODEL || DEFAULT_RERANK_MODEL;
|
||||
this.embedModelUri = resolveEmbedModel({ embed: config.embedModel });
|
||||
this.generateModelUri = resolveGenerateModel({ generate: config.generateModel });
|
||||
this.rerankModelUri = resolveRerankModel({ rerank: config.rerankModel });
|
||||
this.modelCacheDir = config.modelCacheDir || MODEL_CACHE_DIR;
|
||||
this.expandContextSize = resolveExpandContextSize(config.expandContextSize);
|
||||
this.inactivityTimeoutMs = config.inactivityTimeoutMs ?? DEFAULT_INACTIVITY_TIMEOUT_MS;
|
||||
@ -623,6 +651,14 @@ export class LlamaCpp implements LLM {
|
||||
return this.embedModelUri;
|
||||
}
|
||||
|
||||
get generateModelName(): string {
|
||||
return this.generateModelUri;
|
||||
}
|
||||
|
||||
get rerankModelName(): string {
|
||||
return this.rerankModelUri;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the inactivity timer. Called after each model operation.
|
||||
* When timer fires, models are unloaded to free memory (if no active sessions).
|
||||
@ -760,9 +796,10 @@ export class LlamaCpp implements LLM {
|
||||
}
|
||||
}
|
||||
|
||||
if (llama.gpu === false) {
|
||||
if (llama.gpu === false && !noGpuAccelerationWarningShown) {
|
||||
noGpuAccelerationWarningShown = true;
|
||||
process.stderr.write(
|
||||
"QMD Warning: no GPU acceleration, running on CPU (slow). Run 'qmd status' for details.\n"
|
||||
"QMD Warning: no GPU acceleration, running on CPU (slow). Run 'QMD_STATUS_DEVICE_PROBE=1 qmd status' for device details.\n"
|
||||
);
|
||||
}
|
||||
this.llama = llama;
|
||||
|
||||
13
src/store.ts
13
src/store.ts
@ -25,6 +25,9 @@ import {
|
||||
formatQueryForEmbedding,
|
||||
formatDocForEmbedding,
|
||||
withLLMSessionForLlm,
|
||||
DEFAULT_EMBED_MODEL_URI,
|
||||
DEFAULT_RERANK_MODEL_URI,
|
||||
DEFAULT_GENERATE_MODEL_URI,
|
||||
type RerankDocument,
|
||||
type ILLMSession,
|
||||
} from "./llm.js";
|
||||
@ -39,9 +42,9 @@ import type {
|
||||
// Configuration
|
||||
// =============================================================================
|
||||
|
||||
export const DEFAULT_EMBED_MODEL = "embeddinggemma";
|
||||
export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
|
||||
export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
|
||||
export const DEFAULT_EMBED_MODEL = DEFAULT_EMBED_MODEL_URI;
|
||||
export const DEFAULT_RERANK_MODEL = DEFAULT_RERANK_MODEL_URI;
|
||||
export const DEFAULT_QUERY_MODEL = DEFAULT_GENERATE_MODEL_URI;
|
||||
export const DEFAULT_GLOB = "**/*.md";
|
||||
export const DEFAULT_MULTI_GET_MAX_BYTES = 10 * 1024; // 10KB
|
||||
export const DEFAULT_EMBED_MAX_DOCS_PER_BATCH = 64;
|
||||
@ -1749,8 +1752,8 @@ export function createStore(dbPath?: string): Store {
|
||||
searchVec: (query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]) => searchVec(db, query, model, limit, collectionName, session, precomputedEmbedding),
|
||||
|
||||
// Query expansion & reranking
|
||||
expandQuery: (query: string, model?: string, intent?: string) => expandQuery(query, model, db, intent, store.llm),
|
||||
rerank: (query: string, documents: { file: string; text: string }[], model?: string, intent?: string) => rerank(query, documents, model, db, intent, store.llm),
|
||||
expandQuery: (query: string, model?: string, intent?: string) => expandQuery(query, model ?? store.llm?.generateModelName ?? DEFAULT_QUERY_MODEL, db, intent, store.llm),
|
||||
rerank: (query: string, documents: { file: string; text: string }[], model?: string, intent?: string) => rerank(query, documents, model ?? store.llm?.rerankModelName ?? DEFAULT_RERANK_MODEL, db, intent, store.llm),
|
||||
|
||||
// Document retrieval
|
||||
findDocument: (filename: string, options?: { includeBody?: boolean }) => findDocument(db, filename, options),
|
||||
|
||||
@ -27,14 +27,15 @@ let testCounter = 0; // Unique counter for each test run
|
||||
const thisDir = dirname(fileURLToPath(import.meta.url));
|
||||
const projectRoot = join(thisDir, "..");
|
||||
const qmdScript = join(projectRoot, "src", "cli", "qmd.ts");
|
||||
// Resolve tsx binary from project's node_modules (not cwd-dependent)
|
||||
const tsxBin = (() => {
|
||||
const candidate = join(projectRoot, "node_modules", ".bin", "tsx");
|
||||
if (existsSync(candidate)) {
|
||||
return candidate;
|
||||
}
|
||||
return join(process.cwd(), "node_modules", ".bin", "tsx");
|
||||
})();
|
||||
const isBunRuntime = typeof (globalThis as { Bun?: unknown }).Bun !== "undefined";
|
||||
const tsxCli = join(projectRoot, "node_modules", "tsx", "dist", "cli.mjs");
|
||||
const qmdCommand = isBunRuntime
|
||||
? { command: process.execPath, args: [qmdScript] }
|
||||
: { command: process.execPath, args: [tsxCli, qmdScript] };
|
||||
|
||||
function qmdRunnerArgs(args: string[]): { command: string; args: string[] } {
|
||||
return { command: qmdCommand.command, args: [...qmdCommand.args, ...args] };
|
||||
}
|
||||
|
||||
// Helper to run qmd command with test database
|
||||
async function runQmd(
|
||||
@ -44,7 +45,8 @@ async function runQmd(
|
||||
const workingDir = options.cwd || fixturesDir;
|
||||
const dbPath = options.dbPath || testDbPath;
|
||||
const configDir = options.configDir || testConfigDir;
|
||||
const proc = spawn(tsxBin, [qmdScript, ...args], {
|
||||
const runner = qmdRunnerArgs(args);
|
||||
const proc = spawn(runner.command, runner.args, {
|
||||
cwd: workingDir,
|
||||
env: {
|
||||
...process.env,
|
||||
@ -252,15 +254,15 @@ describe("CLI Skills", () => {
|
||||
expect(stderr).toBe("");
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toContain("qmd");
|
||||
expect(stdout).toContain("Search markdown knowledge bases");
|
||||
expect(stdout).toContain("Search local markdown knowledge bases");
|
||||
});
|
||||
|
||||
test("gets version-matched runtime skill content", async () => {
|
||||
const { stdout, stderr, exitCode } = await runQmd(["skills", "get", "qmd"]);
|
||||
expect(stderr).toBe("");
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toContain("# QMD - Quick Markdown Search");
|
||||
expect(stdout).toContain("## MCP: `query`");
|
||||
expect(stdout).toContain("# QMD - Query Markdown Documents");
|
||||
expect(stdout).toContain("## MCP Tool: `query`");
|
||||
expect(stdout).not.toContain("This file is a discovery stub");
|
||||
});
|
||||
|
||||
@ -268,7 +270,7 @@ describe("CLI Skills", () => {
|
||||
const { stdout, stderr, exitCode } = await runQmd(["skills", "get", "qmd", "--full"]);
|
||||
expect(stderr).toBe("");
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toContain("# QMD - Quick Markdown Search");
|
||||
expect(stdout).toContain("# QMD - Query Markdown Documents");
|
||||
expect(stdout).toContain("--- references/mcp-setup.md ---");
|
||||
expect(stdout).toContain("# QMD MCP Server Setup");
|
||||
});
|
||||
@ -284,8 +286,8 @@ describe("CLI Skills", () => {
|
||||
const { stdout, stderr, exitCode } = await runQmd(["skill", "show"]);
|
||||
expect(stderr).toBe("");
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).toContain("# QMD - Quick Markdown Search");
|
||||
expect(stdout).toContain("## MCP: `query`");
|
||||
expect(stdout).toContain("# QMD - Query Markdown Documents");
|
||||
expect(stdout).toContain("## MCP Tool: `query`");
|
||||
expect(stdout).not.toContain("This file is a discovery stub");
|
||||
});
|
||||
|
||||
@ -300,8 +302,8 @@ describe("CLI Skills", () => {
|
||||
|
||||
const installedSkillDir = join(installDir, ".agents", "skills", "qmd");
|
||||
const installed = readFileSync(join(installedSkillDir, "SKILL.md"), "utf8");
|
||||
expect(installed).toContain("# QMD - Quick Markdown Search");
|
||||
expect(installed).toContain("## MCP: `query`");
|
||||
expect(installed).toContain("# QMD - Query Markdown Documents");
|
||||
expect(installed).toContain("## MCP Tool: `query`");
|
||||
expect(installed).not.toContain("This file is a discovery stub");
|
||||
expect(readFileSync(join(installedSkillDir, "references", "mcp-setup.md"), "utf8")).toContain("# QMD MCP Server Setup");
|
||||
});
|
||||
@ -370,7 +372,7 @@ describe("CLI Skill Commands", () => {
|
||||
expect(exitCode).toBe(0);
|
||||
|
||||
const skillDir = join(projectDir, ".agents", "skills", "qmd");
|
||||
expect(readFileSync(join(skillDir, "SKILL.md"), "utf-8")).toContain("# QMD - Quick Markdown Search");
|
||||
expect(readFileSync(join(skillDir, "SKILL.md"), "utf-8")).toContain("# QMD - Query Markdown Documents");
|
||||
expect(existsSync(join(projectDir, ".claude", "skills", "qmd"))).toBe(false);
|
||||
expect(stdout).toContain(`✓ Installed QMD skill to ${skillDir}`);
|
||||
expect(stdout).toContain("Tip: create a Claude symlink manually");
|
||||
@ -388,9 +390,9 @@ describe("CLI Skill Commands", () => {
|
||||
const skillDir = join(fakeHome, ".agents", "skills", "qmd");
|
||||
const claudeLink = join(fakeHome, ".claude", "skills", "qmd");
|
||||
|
||||
expect(readFileSync(join(skillDir, "SKILL.md"), "utf-8")).toContain("# QMD - Quick Markdown Search");
|
||||
expect(readFileSync(join(skillDir, "SKILL.md"), "utf-8")).toContain("# QMD - Query Markdown Documents");
|
||||
expect(lstatSync(claudeLink).isSymbolicLink()).toBe(true);
|
||||
expect(readFileSync(join(claudeLink, "SKILL.md"), "utf-8")).toContain("# QMD - Quick Markdown Search");
|
||||
expect(readFileSync(join(claudeLink, "SKILL.md"), "utf-8")).toContain("# QMD - Query Markdown Documents");
|
||||
expect(stdout).toContain(`✓ Installed QMD skill to ${skillDir}`);
|
||||
expect(stdout).toContain(`✓ Linked Claude skill at ${claudeLink}`);
|
||||
});
|
||||
@ -408,7 +410,7 @@ describe("CLI Skill Commands", () => {
|
||||
|
||||
const skillDir = join(fakeHome, ".agents", "skills", "qmd");
|
||||
expect(lstatSync(skillDir).isSymbolicLink()).toBe(false);
|
||||
expect(readFileSync(join(skillDir, "SKILL.md"), "utf-8")).toContain("# QMD - Quick Markdown Search");
|
||||
expect(readFileSync(join(skillDir, "SKILL.md"), "utf-8")).toContain("# QMD - Query Markdown Documents");
|
||||
expect(stdout).toContain(`✓ Claude already sees the skill via ${join(fakeHome, ".claude", "skills")}`);
|
||||
});
|
||||
|
||||
@ -470,10 +472,13 @@ describe("CLI Status Command", () => {
|
||||
expect(stdout).toContain("Collection");
|
||||
});
|
||||
|
||||
test("skips device probing by default", async () => {
|
||||
test("shows device mode without native probing by default", async () => {
|
||||
const { stdout, exitCode } = await runQmd(["status"]);
|
||||
expect(exitCode).toBe(0);
|
||||
expect(stdout).not.toContain("Device");
|
||||
expect(stdout).toContain("Device");
|
||||
expect(stdout).toContain("Mode:");
|
||||
expect(stdout).toContain("not probed");
|
||||
expect(stdout).toContain("QMD_STATUS_DEVICE_PROBE=1");
|
||||
});
|
||||
});
|
||||
|
||||
@ -1577,7 +1582,8 @@ describe("mcp http daemon", () => {
|
||||
port: number,
|
||||
options: { args?: string[]; env?: Record<string, string> } = {},
|
||||
): import("child_process").ChildProcess {
|
||||
const proc = spawn(tsxBin, [qmdScript, ...(options.args ?? []), "mcp", "--http", "--port", String(port)], {
|
||||
const runner = qmdRunnerArgs([...(options.args ?? []), "mcp", "--http", "--port", String(port)]);
|
||||
const proc = spawn(runner.command, runner.args, {
|
||||
cwd: fixturesDir,
|
||||
env: {
|
||||
...process.env,
|
||||
|
||||
@ -17,6 +17,10 @@ import {
|
||||
withNativeStdoutRedirectedToStderr,
|
||||
resolveParallelismOverride,
|
||||
resolveSafeParallelism,
|
||||
resolveEmbedModel,
|
||||
resolveGenerateModel,
|
||||
resolveRerankModel,
|
||||
resolveModels,
|
||||
withLLMSession,
|
||||
canUnloadLLM,
|
||||
SessionReleasedError,
|
||||
@ -24,6 +28,63 @@ import {
|
||||
type ILLMSession,
|
||||
} from "../src/llm.js";
|
||||
|
||||
describe("model name resolution", () => {
|
||||
function withModelEnv(env: Record<string, string | undefined>, fn: () => void): void {
|
||||
const previous = {
|
||||
QMD_EMBED_MODEL: process.env.QMD_EMBED_MODEL,
|
||||
QMD_GENERATE_MODEL: process.env.QMD_GENERATE_MODEL,
|
||||
QMD_RERANK_MODEL: process.env.QMD_RERANK_MODEL,
|
||||
};
|
||||
try {
|
||||
for (const [key, value] of Object.entries(env)) {
|
||||
if (value === undefined) delete process.env[key];
|
||||
else process.env[key] = value;
|
||||
}
|
||||
fn();
|
||||
} finally {
|
||||
for (const [key, value] of Object.entries(previous)) {
|
||||
if (value === undefined) delete process.env[key];
|
||||
else process.env[key] = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("all model roles resolve config hints before env fallbacks", () => {
|
||||
withModelEnv({
|
||||
QMD_EMBED_MODEL: "env-embed",
|
||||
QMD_GENERATE_MODEL: "env-generate",
|
||||
QMD_RERANK_MODEL: "env-rerank",
|
||||
}, () => {
|
||||
const config = {
|
||||
embed: "config-embed",
|
||||
generate: "config-generate",
|
||||
rerank: "config-rerank",
|
||||
};
|
||||
expect(resolveEmbedModel(config)).toBe("config-embed");
|
||||
expect(resolveGenerateModel(config)).toBe("config-generate");
|
||||
expect(resolveRerankModel(config)).toBe("config-rerank");
|
||||
expect(resolveModels(config)).toEqual(config);
|
||||
});
|
||||
});
|
||||
|
||||
test("LlamaCpp constructor uses the same resolver as status/embed/query helpers", () => {
|
||||
withModelEnv({
|
||||
QMD_EMBED_MODEL: "env-embed",
|
||||
QMD_GENERATE_MODEL: "env-generate",
|
||||
QMD_RERANK_MODEL: "env-rerank",
|
||||
}, () => {
|
||||
const llm = new LlamaCpp({
|
||||
embedModel: "config-embed",
|
||||
generateModel: "config-generate",
|
||||
rerankModel: "config-rerank",
|
||||
});
|
||||
expect(llm.embedModelName).toBe(resolveEmbedModel({ embed: "config-embed" }));
|
||||
expect(llm.generateModelName).toBe(resolveGenerateModel({ generate: "config-generate" }));
|
||||
expect(llm.rerankModelName).toBe(resolveRerankModel({ rerank: "config-rerank" }));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// =============================================================================
|
||||
// Singleton Tests (no model loading required)
|
||||
// =============================================================================
|
||||
@ -178,6 +239,40 @@ describe("native llama stdout containment", () => {
|
||||
else process.env.QMD_FORCE_CPU = prevForceCpu;
|
||||
}
|
||||
});
|
||||
|
||||
test("warns about CPU fallback only once per process", async () => {
|
||||
const prevGpu = process.env.QMD_LLAMA_GPU;
|
||||
const prevForceCpu = process.env.QMD_FORCE_CPU;
|
||||
process.env.QMD_LLAMA_GPU = "false";
|
||||
delete process.env.QMD_FORCE_CPU;
|
||||
|
||||
setNodeLlamaCppModuleForTest({
|
||||
LlamaLogLevel: { error: "error" },
|
||||
resolveModelFile: vi.fn(),
|
||||
LlamaChatSession: vi.fn() as any,
|
||||
getLlama: vi.fn(async () => ({ gpu: false, cpuMathCores: 4 }) as any),
|
||||
});
|
||||
|
||||
const stderrSpy = vi.spyOn(process.stderr, "write").mockReturnValue(true);
|
||||
try {
|
||||
const first = new LlamaCpp();
|
||||
const second = new LlamaCpp();
|
||||
|
||||
await (first as any).ensureLlama();
|
||||
await (second as any).ensureLlama();
|
||||
|
||||
const stderr = String(stderrSpy.mock.calls.map(call => call[0]).join(""));
|
||||
expect(stderr.match(/no GPU acceleration/g)?.length).toBe(1);
|
||||
expect(stderr).toContain("QMD_STATUS_DEVICE_PROBE=1 qmd status");
|
||||
} finally {
|
||||
stderrSpy.mockRestore();
|
||||
setNodeLlamaCppModuleForTest(null);
|
||||
if (prevGpu === undefined) delete process.env.QMD_LLAMA_GPU;
|
||||
else process.env.QMD_LLAMA_GPU = prevGpu;
|
||||
if (prevForceCpu === undefined) delete process.env.QMD_FORCE_CPU;
|
||||
else process.env.QMD_FORCE_CPU = prevForceCpu;
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe("LLM context parallelism safety", () => {
|
||||
|
||||
@ -5,6 +5,17 @@ import { tmpdir } from "node:os";
|
||||
import { afterEach, describe, expect, test } from "vitest";
|
||||
import { findLocalConfigPath, getLocalDbPath } from "../src/collections.js";
|
||||
|
||||
function cliCommandArgs(command: string): { bin: string; args: string[] } {
|
||||
const cliPath = join(process.cwd(), "src/cli/qmd.ts");
|
||||
if (process.versions.bun) {
|
||||
return { bin: process.execPath, args: [cliPath, command] };
|
||||
}
|
||||
return {
|
||||
bin: process.execPath,
|
||||
args: [join(process.cwd(), "node_modules/tsx/dist/cli.mjs"), cliPath, command],
|
||||
};
|
||||
}
|
||||
|
||||
const roots: string[] = [];
|
||||
|
||||
function tempProject(): string {
|
||||
@ -56,12 +67,11 @@ describe("local .qmd project config", () => {
|
||||
mkdirSync(join(root, ".qmd"), { recursive: true });
|
||||
mkdirSync(join(root, "docs"), { recursive: true });
|
||||
writeFileSync(join(root, "docs", "a.md"), "# A\n\nLocal test document.\n");
|
||||
writeFileSync(join(root, ".qmd", "index.yaml"), `collections:\n docs:\n path: ${JSON.stringify(join(root, "docs"))}\n pattern: "**/*.md"\n context:\n /: Local test docs\n`);
|
||||
writeFileSync(join(root, ".qmd", "index.yaml"), `collections:\n docs:\n path: ${JSON.stringify(join(root, "docs"))}\n pattern: "**/*.md"\n context:\n /: Local test docs\nmodels:\n embed: local-embed-model\n rerank: local-rerank-model\n generate: local-generate-model\n`);
|
||||
|
||||
const home = join(root, "home");
|
||||
const tsxBin = join(process.cwd(), "node_modules", ".bin", "tsx");
|
||||
const runner = existsSync(tsxBin) ? tsxBin : "bun";
|
||||
const output = execFileSync(runner, [join(process.cwd(), "src/cli/qmd.ts"), "status"], {
|
||||
const { bin, args } = cliCommandArgs("status");
|
||||
const output = execFileSync(bin, args, {
|
||||
cwd: root,
|
||||
encoding: "utf-8",
|
||||
env: {
|
||||
@ -69,12 +79,19 @@ describe("local .qmd project config", () => {
|
||||
HOME: home,
|
||||
XDG_CONFIG_HOME: join(home, ".config"),
|
||||
XDG_CACHE_HOME: join(home, ".cache"),
|
||||
QMD_EMBED_MODEL: "env-embed-model",
|
||||
QMD_RERANK_MODEL: "env-rerank-model",
|
||||
QMD_GENERATE_MODEL: "env-generate-model",
|
||||
},
|
||||
});
|
||||
|
||||
const localIndex = join(root, ".qmd", "index.sqlite");
|
||||
expect(output).toContain(`Index: ${realpathSync(localIndex)}`);
|
||||
expect(output).toContain("docs (qmd://docs/)");
|
||||
expect(output).toContain("Embedding: local-embed-model");
|
||||
expect(output).toContain("Reranking: local-rerank-model");
|
||||
expect(output).toContain("Generation: local-generate-model");
|
||||
expect(output).not.toContain("env-embed-model");
|
||||
expect(existsSync(localIndex)).toBe(true);
|
||||
expect(existsSync(join(home, ".cache", "qmd", "index.sqlite"))).toBe(false);
|
||||
});
|
||||
|
||||
@ -186,7 +186,7 @@ function seedTestData(db: Database): void {
|
||||
for (let i = 0; i < 768; i++) embedding[i] = Math.random();
|
||||
|
||||
for (const doc of docs.slice(0, 4)) { // Skip large file for embeddings
|
||||
db.prepare(`INSERT INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, 0, 0, 'embeddinggemma', ?)`).run(doc.hash, now);
|
||||
db.prepare(`INSERT INTO content_vectors (hash, seq, pos, model, embedded_at) VALUES (?, 0, 0, ?, ?)`).run(doc.hash, DEFAULT_EMBED_MODEL, now);
|
||||
db.prepare(`INSERT INTO vectors_vec (hash_seq, embedding) VALUES (?, ?)`).run(`${doc.hash}_0`, embedding);
|
||||
}
|
||||
}
|
||||
|
||||
@ -20,8 +20,8 @@ describe("package grammar distribution", () => {
|
||||
expect(pkg.files, "published package files").toContain("scripts/check-package-grammars.mjs");
|
||||
expect(pkg.files, "published package files").toContain("skills/");
|
||||
const qmdSkill = readFileSync(new URL("skills/qmd/SKILL.md", root), "utf8");
|
||||
expect(qmdSkill).toContain("# QMD - Quick Markdown Search");
|
||||
expect(qmdSkill).toContain("## MCP: `query`");
|
||||
expect(qmdSkill).toContain("# QMD - Query Markdown Documents");
|
||||
expect(qmdSkill).toContain("## MCP Tool: `query`");
|
||||
expect(qmdSkill).not.toContain("This file is a discovery stub");
|
||||
|
||||
const scriptPath = join(root.pathname, "scripts", "check-package-grammars.mjs");
|
||||
|
||||
Loading…
Reference in New Issue
Block a user