Simplify disposal: let llama cascade to children, remove test dispose calls
- dispose() now just calls llama.dispose() which cascades to models/contexts per node-llama-cpp lifecycle docs - Remove disposeDefaultLlamaCpp calls from tests - they don't help with the Metal cleanup crash - Use singleton getDefaultLlamaCpp() in llm tests for consistency The Metal backend crash at process exit is a known llama.cpp issue: https://github.com/ggml-org/llama.cpp/pull/17869 All tests pass - the abort happens after test completion. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
edc294db4f
commit
10c5ec016f
@ -34,7 +34,7 @@ import {
|
||||
DEFAULT_EMBED_MODEL,
|
||||
type RankedResult,
|
||||
} from "./store";
|
||||
import { getDefaultLlamaCpp, disposeDefaultLlamaCpp, formatDocForEmbedding } from "./llm";
|
||||
import { getDefaultLlamaCpp, formatDocForEmbedding } from "./llm";
|
||||
|
||||
// Eval queries with expected documents
|
||||
const evalQueries: {
|
||||
@ -393,6 +393,5 @@ describe("Hybrid Search (RRF)", () => {
|
||||
// =============================================================================
|
||||
|
||||
afterAll(() => {
|
||||
// Don't dispose llama - let process exit handle Metal cleanup naturally
|
||||
rmSync(tempDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
@ -12,7 +12,6 @@ import {
|
||||
LlamaCpp,
|
||||
getDefaultLlamaCpp,
|
||||
setDefaultLlamaCpp,
|
||||
disposeDefaultLlamaCpp,
|
||||
type RerankDocument,
|
||||
} from "./llm.js";
|
||||
|
||||
@ -59,7 +58,7 @@ describe("Default LlamaCpp Singleton", () => {
|
||||
|
||||
describe("LlamaCpp.modelExists", () => {
|
||||
test("returns exists:true for HuggingFace model URIs", async () => {
|
||||
const llm = new LlamaCpp();
|
||||
const llm = getDefaultLlamaCpp();
|
||||
const result = await llm.modelExists("hf:org/repo/model.gguf");
|
||||
|
||||
expect(result.exists).toBe(true);
|
||||
@ -67,7 +66,7 @@ describe("LlamaCpp.modelExists", () => {
|
||||
});
|
||||
|
||||
test("returns exists:false for non-existent local paths", async () => {
|
||||
const llm = new LlamaCpp();
|
||||
const llm = getDefaultLlamaCpp();
|
||||
const result = await llm.modelExists("/nonexistent/path/model.gguf");
|
||||
|
||||
expect(result.exists).toBe(false);
|
||||
@ -80,13 +79,8 @@ describe("LlamaCpp.modelExists", () => {
|
||||
// =============================================================================
|
||||
|
||||
describe("LlamaCpp Integration", () => {
|
||||
let llm: LlamaCpp;
|
||||
|
||||
beforeAll(() => {
|
||||
llm = new LlamaCpp();
|
||||
});
|
||||
|
||||
// Don't dispose - let process exit handle Metal cleanup naturally
|
||||
// Use the singleton to avoid multiple Metal contexts
|
||||
const llm = getDefaultLlamaCpp();
|
||||
|
||||
describe("embed", () => {
|
||||
test("returns embedding with correct dimensions", async () => {
|
||||
@ -339,3 +333,4 @@ describe("LlamaCpp Integration", () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
40
src/llm.ts
40
src/llm.ts
@ -704,38 +704,20 @@ Generate the structured expansion:`;
|
||||
this.inactivityTimer = null;
|
||||
}
|
||||
|
||||
// Dispose in order: contexts -> models -> llama
|
||||
// Contexts depend on models, models depend on llama
|
||||
if (this.embedContext) {
|
||||
await this.embedContext.dispose();
|
||||
this.embedContext = null;
|
||||
}
|
||||
if (this.generateContext) {
|
||||
await this.generateContext.dispose();
|
||||
this.generateContext = null;
|
||||
}
|
||||
if (this.rerankContext) {
|
||||
await this.rerankContext.dispose();
|
||||
this.rerankContext = null;
|
||||
}
|
||||
|
||||
if (this.embedModel) {
|
||||
await this.embedModel.dispose();
|
||||
this.embedModel = null;
|
||||
}
|
||||
if (this.generateModel) {
|
||||
await this.generateModel.dispose();
|
||||
this.generateModel = null;
|
||||
}
|
||||
if (this.rerankModel) {
|
||||
await this.rerankModel.dispose();
|
||||
this.rerankModel = null;
|
||||
}
|
||||
|
||||
// Disposing llama cascades to models and contexts automatically
|
||||
// See: https://node-llama-cpp.withcat.ai/guide/objects-lifecycle
|
||||
if (this.llama) {
|
||||
await this.llama.dispose();
|
||||
this.llama = null;
|
||||
}
|
||||
|
||||
// Clear references
|
||||
this.embedContext = null;
|
||||
this.generateContext = null;
|
||||
this.rerankContext = null;
|
||||
this.embedModel = null;
|
||||
this.generateModel = null;
|
||||
this.rerankModel = null;
|
||||
this.llama = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@ import { Database } from "bun:sqlite";
|
||||
import * as sqliteVec from "sqlite-vec";
|
||||
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
|
||||
import { z } from "zod";
|
||||
import { setDefaultLlamaCpp, disposeDefaultLlamaCpp, LlamaCpp } from "./llm";
|
||||
import { setDefaultLlamaCpp, LlamaCpp } from "./llm";
|
||||
import { mkdtemp, writeFile, readdir, unlink, rmdir } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
@ -225,7 +225,6 @@ describe("MCP Server", () => {
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
// Don't dispose llama - let process exit handle Metal cleanup naturally
|
||||
testDb.close();
|
||||
try {
|
||||
require("fs").unlinkSync(testDbPath);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user