Simplify disposal: let llama cascade to children, remove test dispose calls

- dispose() now just calls llama.dispose() which cascades to models/contexts
  per node-llama-cpp lifecycle docs
- Remove disposeDefaultLlamaCpp calls from tests - they don't help with
  the Metal cleanup crash
- Use singleton getDefaultLlamaCpp() in llm tests for consistency

The Metal backend crash at process exit is a known llama.cpp issue:
https://github.com/ggml-org/llama.cpp/pull/17869

All tests pass - the abort happens after test completion.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Tobi Lutke 2025-12-21 13:47:55 -04:00
parent edc294db4f
commit 10c5ec016f
No known key found for this signature in database
4 changed files with 18 additions and 43 deletions

View File

@ -34,7 +34,7 @@ import {
DEFAULT_EMBED_MODEL,
type RankedResult,
} from "./store";
import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, formatDocForEmbedding } from "./llm";
import { getDefaultLlamaCpp, formatDocForEmbedding } from "./llm";
// Eval queries with expected documents
const evalQueries: {
@ -393,6 +393,5 @@ describe("Hybrid Search (RRF)", () => {
// =============================================================================
afterAll(() => {
// Don't dispose llama - let process exit handle Metal cleanup naturally
rmSync(tempDir, { recursive: true, force: true });
});

View File

@ -12,7 +12,6 @@ import {
LlamaCpp,
getDefaultLlamaCpp,
setDefaultLlamaCpp,
disposeDefaultLlamaCpp,
type RerankDocument,
} from "./llm.js";
@ -59,7 +58,7 @@ describe("Default LlamaCpp Singleton", () => {
describe("LlamaCpp.modelExists", () => {
test("returns exists:true for HuggingFace model URIs", async () => {
const llm = new LlamaCpp();
const llm = getDefaultLlamaCpp();
const result = await llm.modelExists("hf:org/repo/model.gguf");
expect(result.exists).toBe(true);
@ -67,7 +66,7 @@ describe("LlamaCpp.modelExists", () => {
});
test("returns exists:false for non-existent local paths", async () => {
const llm = new LlamaCpp();
const llm = getDefaultLlamaCpp();
const result = await llm.modelExists("/nonexistent/path/model.gguf");
expect(result.exists).toBe(false);
@ -80,13 +79,8 @@ describe("LlamaCpp.modelExists", () => {
// =============================================================================
describe("LlamaCpp Integration", () => {
let llm: LlamaCpp;
beforeAll(() => {
llm = new LlamaCpp();
});
// Don't dispose - let process exit handle Metal cleanup naturally
// Use the singleton to avoid multiple Metal contexts
const llm = getDefaultLlamaCpp();
describe("embed", () => {
test("returns embedding with correct dimensions", async () => {
@ -339,3 +333,4 @@ describe("LlamaCpp Integration", () => {
});
});
});

View File

@ -704,38 +704,20 @@ Generate the structured expansion:`;
this.inactivityTimer = null;
}
// Dispose in order: contexts -> models -> llama
// Contexts depend on models, models depend on llama
if (this.embedContext) {
await this.embedContext.dispose();
this.embedContext = null;
}
if (this.generateContext) {
await this.generateContext.dispose();
this.generateContext = null;
}
if (this.rerankContext) {
await this.rerankContext.dispose();
this.rerankContext = null;
}
if (this.embedModel) {
await this.embedModel.dispose();
this.embedModel = null;
}
if (this.generateModel) {
await this.generateModel.dispose();
this.generateModel = null;
}
if (this.rerankModel) {
await this.rerankModel.dispose();
this.rerankModel = null;
}
// Disposing llama cascades to models and contexts automatically
// See: https://node-llama-cpp.withcat.ai/guide/objects-lifecycle
if (this.llama) {
await this.llama.dispose();
this.llama = null;
}
// Clear references
this.embedContext = null;
this.generateContext = null;
this.rerankContext = null;
this.embedModel = null;
this.generateModel = null;
this.rerankModel = null;
this.llama = null;
}
}

View File

@ -10,7 +10,7 @@ import { Database } from "bun:sqlite";
import * as sqliteVec from "sqlite-vec";
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
import { z } from "zod";
import { setDefaultLlamaCpp, disposeDefaultLlamaCpp, LlamaCpp } from "./llm";
import { setDefaultLlamaCpp, LlamaCpp } from "./llm";
import { mkdtemp, writeFile, readdir, unlink, rmdir } from "node:fs/promises";
import { join } from "node:path";
import { tmpdir } from "node:os";
@ -225,7 +225,6 @@ describe("MCP Server", () => {
});
afterAll(async () => {
// Don't dispose llama - let process exit handle Metal cleanup naturally
testDb.close();
try {
require("fs").unlinkSync(testDbPath);