From 10c5ec016fdce4cd0431b17d38bcc851175dc39d Mon Sep 17 00:00:00 2001
From: Tobi Lutke <tobi@shopify.com>
Date: Sun, 21 Dec 2025 13:47:55 -0400
Subject: [PATCH] Simplify disposal: let llama cascade to children, remove test
 dispose calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- dispose() now just calls llama.dispose() which cascades to models/contexts
  per node-llama-cpp lifecycle docs
- Remove disposeDefaultLlamaCpp calls from tests - they don't help with
  the Metal cleanup crash
- Use singleton getDefaultLlamaCpp() in llm tests for consistency

The Metal backend crash at process exit is a known llama.cpp issue:
https://github.com/ggml-org/llama.cpp/pull/17869

All tests pass - the abort happens after test completion.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/eval.test.ts |  3 +--
 src/llm.test.ts  | 15 +++++----------
 src/llm.ts       | 40 +++++++++++-----------------------------
 src/mcp.test.ts  |  3 +--
 4 files changed, 18 insertions(+), 43 deletions(-)

diff --git a/src/eval.test.ts b/src/eval.test.ts
index 7b8fe57..7e9a494 100644
--- a/src/eval.test.ts
+++ b/src/eval.test.ts
@@ -34,7 +34,7 @@ import {
   DEFAULT_EMBED_MODEL,
   type RankedResult,
 } from "./store";
-import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, formatDocForEmbedding } from "./llm";
+import { getDefaultLlamaCpp, formatDocForEmbedding } from "./llm";
 
 // Eval queries with expected documents
 const evalQueries: {
@@ -393,6 +393,5 @@ describe("Hybrid Search (RRF)", () => {
 // =============================================================================
 
 afterAll(() => {
-  // Don't dispose llama - let process exit handle Metal cleanup naturally
   rmSync(tempDir, { recursive: true, force: true });
 });
diff --git a/src/llm.test.ts b/src/llm.test.ts
index b643056..49ae682 100644
--- a/src/llm.test.ts
+++ b/src/llm.test.ts
@@ -12,7 +12,6 @@ import {
   LlamaCpp,
   getDefaultLlamaCpp,
   setDefaultLlamaCpp,
-  disposeDefaultLlamaCpp,
   type RerankDocument,
 } from "./llm.js";
 
@@ -59,7 +58,7 @@ describe("Default LlamaCpp Singleton", () => {
 
 describe("LlamaCpp.modelExists", () => {
   test("returns exists:true for HuggingFace model URIs", async () => {
-    const llm = new LlamaCpp();
+    const llm = getDefaultLlamaCpp();
     const result = await llm.modelExists("hf:org/repo/model.gguf");
 
     expect(result.exists).toBe(true);
@@ -67,7 +66,7 @@ describe("LlamaCpp.modelExists", () => {
   });
 
   test("returns exists:false for non-existent local paths", async () => {
-    const llm = new LlamaCpp();
+    const llm = getDefaultLlamaCpp();
     const result = await llm.modelExists("/nonexistent/path/model.gguf");
 
     expect(result.exists).toBe(false);
@@ -80,13 +79,8 @@ describe("LlamaCpp.modelExists", () => {
 // =============================================================================
 
 describe("LlamaCpp Integration", () => {
-  let llm: LlamaCpp;
-
-  beforeAll(() => {
-    llm = new LlamaCpp();
-  });
-
-  // Don't dispose - let process exit handle Metal cleanup naturally
+  // Use the singleton to avoid multiple Metal contexts
+  const llm = getDefaultLlamaCpp();
 
   describe("embed", () => {
     test("returns embedding with correct dimensions", async () => {
@@ -339,3 +333,4 @@ describe("LlamaCpp Integration", () => {
     });
   });
 });
+
diff --git a/src/llm.ts b/src/llm.ts
index a2df114..38ebbb8 100644
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -704,38 +704,20 @@ Generate the structured expansion:`;
       this.inactivityTimer = null;
     }
 
-    // Dispose in order: contexts -> models -> llama
-    // Contexts depend on models, models depend on llama
-    if (this.embedContext) {
-      await this.embedContext.dispose();
-      this.embedContext = null;
-    }
-    if (this.generateContext) {
-      await this.generateContext.dispose();
-      this.generateContext = null;
-    }
-    if (this.rerankContext) {
-      await this.rerankContext.dispose();
-      this.rerankContext = null;
-    }
-
-    if (this.embedModel) {
-      await this.embedModel.dispose();
-      this.embedModel = null;
-    }
-    if (this.generateModel) {
-      await this.generateModel.dispose();
-      this.generateModel = null;
-    }
-    if (this.rerankModel) {
-      await this.rerankModel.dispose();
-      this.rerankModel = null;
-    }
-
+    // Disposing llama cascades to models and contexts automatically
+    // See: https://node-llama-cpp.withcat.ai/guide/objects-lifecycle
     if (this.llama) {
       await this.llama.dispose();
-      this.llama = null;
     }
+
+    // Clear references
+    this.embedContext = null;
+    this.generateContext = null;
+    this.rerankContext = null;
+    this.embedModel = null;
+    this.generateModel = null;
+    this.rerankModel = null;
+    this.llama = null;
   }
 }
 
diff --git a/src/mcp.test.ts b/src/mcp.test.ts
index 6f6477e..f955ca9 100644
--- a/src/mcp.test.ts
+++ b/src/mcp.test.ts
@@ -10,7 +10,7 @@ import { Database } from "bun:sqlite";
 import * as sqliteVec from "sqlite-vec";
 import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { z } from "zod";
-import { setDefaultLlamaCpp, disposeDefaultLlamaCpp, LlamaCpp } from "./llm";
+import { setDefaultLlamaCpp, LlamaCpp } from "./llm";
 import { mkdtemp, writeFile, readdir, unlink, rmdir } from "node:fs/promises";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
@@ -225,7 +225,6 @@ describe("MCP Server", () => {
   });
 
   afterAll(async () => {
-    // Don't dispose llama - let process exit handle Metal cleanup naturally
     testDb.close();
     try {
       require("fs").unlinkSync(testDbPath);