From 10c5ec016fdce4cd0431b17d38bcc851175dc39d Mon Sep 17 00:00:00 2001 From: Tobi Lutke Date: Sun, 21 Dec 2025 13:47:55 -0400 Subject: [PATCH] Simplify disposal: let llama cascade to children, remove test dispose calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - dispose() now just calls llama.dispose() which cascades to models/contexts per node-llama-cpp lifecycle docs - Remove disposeDefaultLlamaCpp calls from tests - they don't help with the Metal cleanup crash - Use singleton getDefaultLlamaCpp() in llm tests for consistency The Metal backend crash at process exit is a known llama.cpp issue: https://github.com/ggml-org/llama.cpp/pull/17869 All tests pass - the abort happens after test completion. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- src/eval.test.ts | 3 +-- src/llm.test.ts | 15 +++++---------- src/llm.ts | 40 +++++++++++----------------------------- src/mcp.test.ts | 3 +-- 4 files changed, 18 insertions(+), 43 deletions(-) diff --git a/src/eval.test.ts b/src/eval.test.ts index 7b8fe57..7e9a494 100644 --- a/src/eval.test.ts +++ b/src/eval.test.ts @@ -34,7 +34,7 @@ import { DEFAULT_EMBED_MODEL, type RankedResult, } from "./store"; -import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, formatDocForEmbedding } from "./llm"; +import { getDefaultLlamaCpp, formatDocForEmbedding } from "./llm"; // Eval queries with expected documents const evalQueries: { @@ -393,6 +393,5 @@ describe("Hybrid Search (RRF)", () => { // ============================================================================= afterAll(() => { - // Don't dispose llama - let process exit handle Metal cleanup naturally rmSync(tempDir, { recursive: true, force: true }); }); diff --git a/src/llm.test.ts b/src/llm.test.ts index b643056..49ae682 100644 --- a/src/llm.test.ts +++ b/src/llm.test.ts @@ -12,7 +12,6 @@ import { LlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, - disposeDefaultLlamaCpp, type RerankDocument, } from "./llm.js"; @@ -59,7 +58,7 @@ describe("Default LlamaCpp Singleton", () => { describe("LlamaCpp.modelExists", () => { test("returns exists:true for HuggingFace model URIs", async () => { - const llm = new LlamaCpp(); + const llm = getDefaultLlamaCpp(); const result = await llm.modelExists("hf:org/repo/model.gguf"); expect(result.exists).toBe(true); @@ -67,7 +66,7 @@ describe("LlamaCpp.modelExists", () => { }); test("returns exists:false for non-existent local paths", async () => { - const llm = new LlamaCpp(); + const llm = getDefaultLlamaCpp(); const result = await llm.modelExists("/nonexistent/path/model.gguf"); expect(result.exists).toBe(false); @@ -80,13 +79,8 @@ describe("LlamaCpp.modelExists", () => { // ============================================================================= describe("LlamaCpp Integration", () => { - let llm: LlamaCpp; - - beforeAll(() => { - llm = new LlamaCpp(); - }); - - // Don't dispose - let process exit handle Metal cleanup naturally + // Use the singleton to avoid multiple Metal contexts + const llm = getDefaultLlamaCpp(); describe("embed", () => { test("returns embedding with correct dimensions", async () => { @@ -339,3 +333,4 @@ describe("LlamaCpp Integration", () => { }); }); }); + diff --git a/src/llm.ts b/src/llm.ts index a2df114..38ebbb8 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -704,38 +704,20 @@ Generate the structured expansion:`; this.inactivityTimer = null; } - // Dispose in order: contexts -> models -> llama - // Contexts depend on models, models depend on llama - if (this.embedContext) { - await this.embedContext.dispose(); - this.embedContext = null; - } - if (this.generateContext) { - await this.generateContext.dispose(); - this.generateContext = null; - } - if (this.rerankContext) { - await this.rerankContext.dispose(); - this.rerankContext = null; - } - - if (this.embedModel) { - await this.embedModel.dispose(); - this.embedModel = null; - } - if (this.generateModel) { - await this.generateModel.dispose(); - this.generateModel = null; - } - if (this.rerankModel) { - await this.rerankModel.dispose(); - this.rerankModel = null; - } - + // Disposing llama cascades to models and contexts automatically + // See: https://node-llama-cpp.withcat.ai/guide/objects-lifecycle if (this.llama) { await this.llama.dispose(); - this.llama = null; } + + // Clear references + this.embedContext = null; + this.generateContext = null; + this.rerankContext = null; + this.embedModel = null; + this.generateModel = null; + this.rerankModel = null; + this.llama = null; } } diff --git a/src/mcp.test.ts b/src/mcp.test.ts index 6f6477e..f955ca9 100644 --- a/src/mcp.test.ts +++ b/src/mcp.test.ts @@ -10,7 +10,7 @@ import { Database } from "bun:sqlite"; import * as sqliteVec from "sqlite-vec"; import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js"; import { z } from "zod"; -import { setDefaultLlamaCpp, disposeDefaultLlamaCpp, LlamaCpp } from "./llm"; +import { setDefaultLlamaCpp, LlamaCpp } from "./llm"; import { mkdtemp, writeFile, readdir, unlink, rmdir } from "node:fs/promises"; import { join } from "node:path"; import { tmpdir } from "node:os"; @@ -225,7 +225,6 @@ describe("MCP Server", () => { }); afterAll(async () => { - // Don't dispose llama - let process exit handle Metal cleanup naturally testDb.close(); try { require("fs").unlinkSync(testDbPath);