qmd/llm.ts
Tobi Lutke bab46dacb2
Refactor: extract store, LLM, and formatter modules with comprehensive tests
- Extract store.ts: database operations, search, document retrieval
  - createStore() factory pattern for clean DB lifecycle management
  - Unified DocumentResult type with optional body loading
  - Snippet extraction with diff-style headers (@@ -line,count @@)

- Extract llm.ts: LLM abstraction layer with Ollama implementation
  - Clean interface for embed, generate, rerank operations
  - High-level rerankerLogprobsCheck with logprob-based scoring
  - Query expansion support

- Extract formatter.ts: output formatting utilities
  - Support for CLI, JSON, CSV, MD, XML formats
  - MCP-specific CSV formatting

- Extract mcp.ts: MCP server using createStore() pattern
  - Single DB connection for server lifetime (fixes closed DB errors)
  - URL-decode resource paths for proper space/special char handling

- Add comprehensive test suites (215 tests total)
  - store.test.ts: 96 tests covering all store operations
  - llm.test.ts: 60 tests for LLM abstraction
  - mcp.test.ts: 59 tests for MCP endpoints and resources
  - All tests use mocked Ollama (errors on unmocked calls)

- Add bun run inspector script for MCP debugging

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-09 16:33:32 -05:00

540 lines
14 KiB
TypeScript

/**
* llm.ts - LLM abstraction layer for QMD
*
* Provides a clean interface for LLM operations with an Ollama implementation.
* All raw fetch calls to LLM APIs should go through this module.
*/
// =============================================================================
// Types
// =============================================================================
/**
* Token with log probability
*/
export type TokenLogProb = {
token: string;
logprob: number;
};
/**
* Embedding result
*/
export type EmbeddingResult = {
embedding: number[];
model: string;
};
/**
* Generation result with optional logprobs
*/
export type GenerateResult = {
text: string;
model: string;
logprobs?: TokenLogProb[];
done: boolean;
};
/**
* Rerank result for a single document
*/
export type RerankDocumentResult = {
file: string;
relevant: boolean;
confidence: number;
score: number;
rawToken: string;
logprob: number;
};
/**
* Batch rerank result
*/
export type RerankResult = {
results: RerankDocumentResult[];
model: string;
};
/**
* Model info
*/
export type ModelInfo = {
name: string;
exists: boolean;
size?: number;
modifiedAt?: string;
};
/**
* Options for embedding
*/
export type EmbedOptions = {
model: string;
isQuery?: boolean;
title?: string;
};
/**
* Options for text generation
*/
export type GenerateOptions = {
model: string;
maxTokens?: number;
temperature?: number;
logprobs?: boolean;
raw?: boolean;
stop?: string[];
};
/**
* Options for reranking
*/
export type RerankOptions = {
model: string;
batchSize?: number;
};
/**
* Document to rerank
*/
export type RerankDocument = {
file: string;
text: string;
title?: string;
};
// =============================================================================
// LLM Interface
// =============================================================================
/**
* Abstract LLM interface - implement this for different backends
*/
export interface LLM {
/**
* Get embeddings for text
*/
embed(text: string, options: EmbedOptions): Promise<EmbeddingResult | null>;
/**
* Generate text completion
*/
generate(prompt: string, options: GenerateOptions): Promise<GenerateResult | null>;
/**
* Check if a model exists
*/
modelExists(model: string): Promise<ModelInfo>;
/**
* Pull a model (download if not available)
*/
pullModel(model: string, onProgress?: (progress: number) => void): Promise<boolean>;
// ==========================================================================
// High-level abstractions
// ==========================================================================
/**
* Expand a search query into multiple variations
*/
expandQuery(query: string, model: string, numVariations?: number): Promise<string[]>;
/**
* Rerank documents by relevance to a query
* Returns list of documents with relevance scores and boolean judgments
*/
rerank(query: string, documents: RerankDocument[], options: RerankOptions): Promise<RerankResult>;
/**
* Quick relevance check - returns just boolean judgments with logprobs
* More efficient than full rerank when you just need yes/no
*/
rerankerLogprobsCheck(query: string, documents: RerankDocument[], options: RerankOptions): Promise<RerankDocumentResult[]>;
}
// =============================================================================
// Ollama Implementation
// =============================================================================
export type OllamaConfig = {
baseUrl?: string;
defaultEmbedModel?: string;
defaultGenerateModel?: string;
defaultRerankModel?: string;
};
const DEFAULT_OLLAMA_URL = "http://localhost:11434";
const DEFAULT_EMBED_MODEL = "embeddinggemma";
const DEFAULT_GENERATE_MODEL = "qwen3:0.6b";
const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
/**
* Format text for embedding query
*/
export function formatQueryForEmbedding(query: string): string {
return `task: search result | query: ${query}`;
}
/**
* Format text for embedding document
*/
export function formatDocForEmbedding(text: string, title?: string): string {
return `title: ${title || "none"} | text: ${text}`;
}
/**
* Ollama LLM implementation
*/
export class Ollama implements LLM {
private baseUrl: string;
private defaultEmbedModel: string;
private defaultGenerateModel: string;
private defaultRerankModel: string;
constructor(config: OllamaConfig = {}) {
this.baseUrl = config.baseUrl || process.env.OLLAMA_URL || DEFAULT_OLLAMA_URL;
this.defaultEmbedModel = config.defaultEmbedModel || DEFAULT_EMBED_MODEL;
this.defaultGenerateModel = config.defaultGenerateModel || DEFAULT_GENERATE_MODEL;
this.defaultRerankModel = config.defaultRerankModel || DEFAULT_RERANK_MODEL;
}
/**
* Get the base URL for this Ollama instance
*/
getBaseUrl(): string {
return this.baseUrl;
}
// ==========================================================================
// Core API methods
// ==========================================================================
async embed(text: string, options: EmbedOptions): Promise<EmbeddingResult | null> {
const model = options.model || this.defaultEmbedModel;
const formatted = options.isQuery
? formatQueryForEmbedding(text)
: formatDocForEmbedding(text, options.title);
try {
const response = await fetch(`${this.baseUrl}/api/embed`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ model, input: formatted }),
});
if (!response.ok) {
return null;
}
const data = await response.json() as { embeddings?: number[][] };
if (!data.embeddings?.[0]) {
return null;
}
return {
embedding: data.embeddings[0],
model,
};
} catch {
return null;
}
}
async generate(prompt: string, options: GenerateOptions): Promise<GenerateResult | null> {
const model = options.model || this.defaultGenerateModel;
const requestBody: Record<string, unknown> = {
model,
prompt,
stream: false,
options: {
num_predict: options.maxTokens ?? 150,
temperature: options.temperature ?? 0,
},
};
if (options.logprobs) {
requestBody.logprobs = true;
}
if (options.raw) {
requestBody.raw = true;
}
if (options.stop) {
(requestBody.options as Record<string, unknown>).stop = options.stop;
}
try {
const response = await fetch(`${this.baseUrl}/api/generate`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify(requestBody),
});
if (!response.ok) {
return null;
}
const data = await response.json() as {
response?: string;
done?: boolean;
logprobs?: { tokens?: string[]; token_logprobs?: number[] };
};
// Parse logprobs if present
let logprobs: TokenLogProb[] | undefined;
if (data.logprobs?.tokens && data.logprobs?.token_logprobs) {
logprobs = data.logprobs.tokens.map((token, i) => ({
token,
logprob: data.logprobs!.token_logprobs![i],
}));
}
return {
text: data.response || "",
model,
logprobs,
done: data.done ?? true,
};
} catch {
return null;
}
}
async modelExists(model: string): Promise<ModelInfo> {
try {
const response = await fetch(`${this.baseUrl}/api/show`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name: model }),
});
if (!response.ok) {
return { name: model, exists: false };
}
const data = await response.json() as {
size?: number;
modified_at?: string;
};
return {
name: model,
exists: true,
size: data.size,
modifiedAt: data.modified_at,
};
} catch {
return { name: model, exists: false };
}
}
async pullModel(model: string, onProgress?: (progress: number) => void): Promise<boolean> {
try {
const response = await fetch(`${this.baseUrl}/api/pull`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ name: model, stream: false }),
});
if (!response.ok) {
return false;
}
// For non-streaming, we just wait for completion
await response.json();
onProgress?.(100);
return true;
} catch {
return false;
}
}
// ==========================================================================
// High-level abstractions
// ==========================================================================
async expandQuery(query: string, model?: string, numVariations: number = 2): Promise<string[]> {
const useModel = model || this.defaultGenerateModel;
const prompt = `You are a search query expander. Given a search query, generate ${numVariations} alternative queries that would help find relevant documents.
Rules:
- Use synonyms and related terminology (e.g., "craft" → "craftsmanship", "quality", "excellence")
- Rephrase to capture different angles (e.g., "engineering culture" → "technical excellence", "developer practices")
- Keep proper nouns and named concepts exactly as written (e.g., "Build a Business", "Stripe", "Shopify")
- Each variation should be 3-8 words, natural search terms
- Do NOT just append words like "search" or "find" or "documents"
Query: "${query}"
Output exactly ${numVariations} variations, one per line, no numbering or bullets:`;
const result = await this.generate(prompt, {
model: useModel,
maxTokens: 150,
temperature: 0,
});
if (!result) {
return [query];
}
// Parse response - filter out thinking tags and clean up
const cleanText = result.text.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
const lines = cleanText
.split("\n")
.map((l) => l.trim())
.filter((l) => l.length > 2 && l.length < 100 && !l.startsWith("<"));
return [query, ...lines.slice(0, numVariations)];
}
async rerank(
query: string,
documents: RerankDocument[],
options: RerankOptions
): Promise<RerankResult> {
const results = await this.rerankerLogprobsCheck(query, documents, options);
return {
results: results.sort((a, b) => b.score - a.score),
model: options.model || this.defaultRerankModel,
};
}
async rerankerLogprobsCheck(
query: string,
documents: RerankDocument[],
options: RerankOptions
): Promise<RerankDocumentResult[]> {
const model = options.model || this.defaultRerankModel;
const batchSize = options.batchSize || 5;
const results: RerankDocumentResult[] = [];
// Process in batches
for (let i = 0; i < documents.length; i += batchSize) {
const batch = documents.slice(i, i + batchSize);
const batchResults = await Promise.all(
batch.map((doc) => this.rerankSingle(query, doc, model))
);
results.push(...batchResults);
}
return results;
}
/**
* Rerank a single document - internal helper
*/
private async rerankSingle(
query: string,
doc: RerankDocument,
model: string
): Promise<RerankDocumentResult> {
const systemPrompt = `Judge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".`;
const instruct = `Given a search query, determine if the following document is relevant to the query. Consider both direct matches and related concepts.`;
const docTitle = doc.title || doc.file.split("/").pop()?.replace(/\.md$/, "") || doc.file;
const docPreview = doc.text.length > 4000 ? doc.text.substring(0, 4000) + "..." : doc.text;
// Qwen3-reranker prompt format with empty think tags
const prompt = `<|im_start|>system
${systemPrompt}<|im_end|>
<|im_start|>user
<Instruct>: ${instruct}
<Query>: ${query}
<Document Title>: ${docTitle}
<Document>: ${docPreview}<|im_end|>
<|im_start|>assistant
<think>
</think>
`;
const result = await this.generate(prompt, {
model,
maxTokens: 1,
temperature: 0,
logprobs: true,
raw: true,
});
if (!result) {
return {
file: doc.file,
relevant: false,
confidence: 0,
score: 0,
rawToken: "",
logprob: 0,
};
}
return this.parseRerankResponse(doc.file, result);
}
/**
* Parse rerank response into structured result
*/
private parseRerankResponse(file: string, result: GenerateResult): RerankDocumentResult {
const token = result.text.toLowerCase().trim();
const logprob = result.logprobs?.[0]?.logprob ?? 0;
const confidence = Math.exp(logprob);
let relevant: boolean;
let score: number;
if (token.startsWith("yes")) {
relevant = true;
// Score: 0.5 base + up to 0.5 from confidence
score = 0.5 + 0.5 * confidence;
} else if (token.startsWith("no")) {
relevant = false;
// Score: up to 0.5 based on uncertainty (1 - confidence)
score = 0.5 * (1 - confidence);
} else {
// Unknown token - neutral score
relevant = false;
score = 0.3;
}
return {
file,
relevant,
confidence,
score,
rawToken: result.logprobs?.[0]?.token ?? token,
logprob,
};
}
}
// =============================================================================
// Singleton for default Ollama instance
// =============================================================================
let defaultOllama: Ollama | null = null;
/**
* Get the default Ollama instance (creates one if needed)
*/
export function getDefaultOllama(): Ollama {
if (!defaultOllama) {
defaultOllama = new Ollama();
}
return defaultOllama;
}
/**
* Set a custom default Ollama instance (useful for testing)
*/
export function setDefaultOllama(ollama: Ollama | null): void {
defaultOllama = ollama;
}