Simplify disposal: let llama cascade to children, remove test dispose calls

- dispose() now just calls llama.dispose() which cascades to models/contexts per node-llama-cpp lifecycle docs - Remove disposeDefaultLlamaCpp calls from tests - they don't help with the Metal cleanup crash - Use singleton getDefaultLlamaCpp() in llm tests for consistency The Metal backend crash at process exit is a known llama.cpp issue: https://github.com/ggml-org/llama.cpp/pull/17869 All tests pass - the abort happens after test completion. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-21 13:47:55 -04:00 · 2025-12-21 13:47:55 -04:00 · 10c5ec016f
commit 10c5ec016f
parent edc294db4f
4 changed files with 18 additions and 43 deletions
--- a/src/eval.test.ts
+++ b/src/eval.test.ts
@ -34,7 +34,7 @@ import {
  DEFAULT_EMBED_MODEL,
  type RankedResult,
 } from "./store";
-import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, formatDocForEmbedding } from "./llm";
+import { getDefaultLlamaCpp, formatDocForEmbedding } from "./llm";

 // Eval queries with expected documents
 const evalQueries: {
@ -393,6 +393,5 @@ describe("Hybrid Search (RRF)", () => {
 // =============================================================================

 afterAll(() => {
-  // Don't dispose llama - let process exit handle Metal cleanup naturally
  rmSync(tempDir, { recursive: true, force: true });
 });
--- a/src/llm.test.ts
+++ b/src/llm.test.ts
@ -12,7 +12,6 @@ import {
  LlamaCpp,
  getDefaultLlamaCpp,
  setDefaultLlamaCpp,
-  disposeDefaultLlamaCpp,
  type RerankDocument,
 } from "./llm.js";

@ -59,7 +58,7 @@ describe("Default LlamaCpp Singleton", () => {

 describe("LlamaCpp.modelExists", () => {
  test("returns exists:true for HuggingFace model URIs", async () => {
-    const llm = new LlamaCpp();
+    const llm = getDefaultLlamaCpp();
    const result = await llm.modelExists("hf:org/repo/model.gguf");

    expect(result.exists).toBe(true);
@ -67,7 +66,7 @@ describe("LlamaCpp.modelExists", () => {
  });

  test("returns exists:false for non-existent local paths", async () => {
-    const llm = new LlamaCpp();
+    const llm = getDefaultLlamaCpp();
    const result = await llm.modelExists("/nonexistent/path/model.gguf");

    expect(result.exists).toBe(false);
@ -80,13 +79,8 @@ describe("LlamaCpp.modelExists", () => {
 // =============================================================================

 describe("LlamaCpp Integration", () => {
-  let llm: LlamaCpp;
-
-  beforeAll(() => {
-    llm = new LlamaCpp();
-  });
-
-  // Don't dispose - let process exit handle Metal cleanup naturally
+  // Use the singleton to avoid multiple Metal contexts
+  const llm = getDefaultLlamaCpp();

  describe("embed", () => {
    test("returns embedding with correct dimensions", async () => {
@ -339,3 +333,4 @@ describe("LlamaCpp Integration", () => {
    });
  });
 });
+
--- a/src/llm.ts
+++ b/src/llm.ts
@ -704,38 +704,20 @@ Generate the structured expansion:`;
      this.inactivityTimer = null;
    }

-    // Dispose in order: contexts -> models -> llama
-    // Contexts depend on models, models depend on llama
-    if (this.embedContext) {
-      await this.embedContext.dispose();
-      this.embedContext = null;
-    }
-    if (this.generateContext) {
-      await this.generateContext.dispose();
-      this.generateContext = null;
-    }
-    if (this.rerankContext) {
-      await this.rerankContext.dispose();
-      this.rerankContext = null;
-    }
-
-    if (this.embedModel) {
-      await this.embedModel.dispose();
-      this.embedModel = null;
-    }
-    if (this.generateModel) {
-      await this.generateModel.dispose();
-      this.generateModel = null;
-    }
-    if (this.rerankModel) {
-      await this.rerankModel.dispose();
-      this.rerankModel = null;
-    }
-
+    // Disposing llama cascades to models and contexts automatically
+    // See: https://node-llama-cpp.withcat.ai/guide/objects-lifecycle
    if (this.llama) {
      await this.llama.dispose();
-      this.llama = null;
    }
+
+    // Clear references
+    this.embedContext = null;
+    this.generateContext = null;
+    this.rerankContext = null;
+    this.embedModel = null;
+    this.generateModel = null;
+    this.rerankModel = null;
+    this.llama = null;
  }
 }

--- a/src/mcp.test.ts
+++ b/src/mcp.test.ts
@ -10,7 +10,7 @@ import { Database } from "bun:sqlite";
 import * as sqliteVec from "sqlite-vec";
 import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { z } from "zod";
-import { setDefaultLlamaCpp, disposeDefaultLlamaCpp, LlamaCpp } from "./llm";
+import { setDefaultLlamaCpp, LlamaCpp } from "./llm";
 import { mkdtemp, writeFile, readdir, unlink, rmdir } from "node:fs/promises";
 import { join } from "node:path";
 import { tmpdir } from "node:os";
@ -225,7 +225,6 @@ describe("MCP Server", () => {
  });

  afterAll(async () => {
-    // Don't dispose llama - let process exit handle Metal cleanup naturally
    testDb.close();
    try {
      require("fs").unlinkSync(testDbPath);