The libggml-metal static destructor asserts on a non-empty residency-set collection during __cxa_finalize_ranges, dumping a multi-kB GGML backtrace after successful output (ggml-org/llama.cpp#22593, one-line fix open as PR #22595). The assertion only trips when process.exit() skips Node's beforeExit hook — which is exactly the hook node-llama-cpp registers to auto-dispose its native handles. Primary fix: finishSuccessfulCliCommand now sets process.exitCode = 0 and returns instead of calling process.exit(0). The event loop drains, beforeExit fires, native Metal resources tear down in order, and the process exits cleanly even without the workaround env var. Defense-in-depth retained: bin/qmd and scripts/test-all.mjs still export GGML_METAL_NO_RESIDENCY=1 on darwin for error paths and tests that terminate via process.exit(). Opt back in with QMD_METAL_KEEP_RESIDENCY=1. Also: correct upstream issue refs (was #17869 → now #22593/#22595). Add scripts/repro-metal-rsets-crash.mjs minimal reproduction.
129 lines
5.4 KiB
TypeScript
129 lines
5.4 KiB
TypeScript
import { describe, expect, test } from "vitest";
|
|
import { finishSuccessfulCliCommand } from "../src/cli/qmd.ts";
|
|
import { LlamaCpp, isDarwinMetalMitigationActive } from "../src/llm.ts";
|
|
|
|
describe("CLI successful-exit lifecycle", () => {
|
|
test("exits 0 after successful output when post-output LLM cleanup fails", async () => {
|
|
const exitCodes: number[] = [];
|
|
const stderr: string[] = [];
|
|
const flushed: string[] = [];
|
|
|
|
await finishSuccessfulCliCommand({
|
|
command: "query",
|
|
format: "json",
|
|
cleanup: async () => {
|
|
throw new Error("ggml_metal_device_free abort simulation");
|
|
},
|
|
exit: (code) => {
|
|
exitCodes.push(code);
|
|
},
|
|
stdout: { write: (chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { flushed.push(String(chunk)); cb?.(); return true; } },
|
|
stderr: { write: (chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { stderr.push(String(chunk)); cb?.(); return true; } },
|
|
});
|
|
|
|
expect(exitCodes).toEqual([0]);
|
|
expect(stderr.join("")).toContain("QMD Warning: cleanup after successful output failed");
|
|
expect(flushed).toEqual([""]);
|
|
});
|
|
|
|
test("flushes stdout, runs cleanup, flushes stderr, then exits (when exit is provided)", async () => {
|
|
// The legacy lifecycle order is preserved for callers that pass an
|
|
// explicit `exit` function — primarily this test, which needs an
|
|
// observable terminating step.
|
|
const calls: string[] = [];
|
|
|
|
await finishSuccessfulCliCommand({
|
|
command: "query",
|
|
format: "json",
|
|
cleanup: async () => { calls.push("cleanup"); },
|
|
exit: (code) => { calls.push(`exit:${code}`); },
|
|
stdout: { write: (_chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { calls.push("stdout-flush"); cb?.(); return true; } },
|
|
stderr: { write: (_chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { calls.push("stderr-flush"); cb?.(); return true; } },
|
|
});
|
|
|
|
expect(calls).toEqual(["stdout-flush", "cleanup", "stderr-flush", "exit:0"]);
|
|
});
|
|
|
|
test("production path: sets process.exitCode=0 and returns instead of calling process.exit", async () => {
|
|
// The real CLI does NOT pass `exit` — finishSuccessfulCliCommand should set
|
|
// process.exitCode and return, letting Node's `beforeExit` fire so
|
|
// node-llama-cpp's auto-dispose runs BEFORE libc's static destructor.
|
|
// process.exit() skips `beforeExit`, which is what trips the libggml-metal
|
|
// assertion (ggml-org/llama.cpp#22593) even with explicit dispose.
|
|
const prevCode = process.exitCode;
|
|
process.exitCode = 1; // poison the state to verify we set it
|
|
try {
|
|
const calls: string[] = [];
|
|
await finishSuccessfulCliCommand({
|
|
command: "query",
|
|
format: "json",
|
|
cleanup: async () => { calls.push("cleanup"); },
|
|
stdout: { write: (_c: string | Uint8Array, cb?: (error?: Error | null) => void) => { calls.push("stdout-flush"); cb?.(); return true; } },
|
|
stderr: { write: (_c: string | Uint8Array, cb?: (error?: Error | null) => void) => { calls.push("stderr-flush"); cb?.(); return true; } },
|
|
});
|
|
|
|
expect(calls).toEqual(["stdout-flush", "cleanup", "stderr-flush"]);
|
|
expect(process.exitCode).toBe(0);
|
|
} finally {
|
|
process.exitCode = prevCode;
|
|
}
|
|
});
|
|
|
|
test("darwin Metal mitigation reflects launcher-exported env on darwin", () => {
|
|
// The real mitigation lives in bin/qmd, which sets GGML_METAL_NO_RESIDENCY=1
|
|
// before Node loads the llama.cpp native binding. The JS-side predicate
|
|
// just reports whether that env was set (and not overridden by
|
|
// QMD_METAL_KEEP_RESIDENCY). On non-darwin the function returns false.
|
|
const expected =
|
|
process.platform === "darwin" &&
|
|
process.env.QMD_METAL_KEEP_RESIDENCY !== "1" &&
|
|
process.env.GGML_METAL_NO_RESIDENCY === "1";
|
|
expect(isDarwinMetalMitigationActive()).toBe(expected);
|
|
});
|
|
|
|
test("QMD_METAL_KEEP_RESIDENCY=1 disables the mitigation even when GGML_METAL_NO_RESIDENCY is set", () => {
|
|
const prevKeep = process.env.QMD_METAL_KEEP_RESIDENCY;
|
|
const prevNoRes = process.env.GGML_METAL_NO_RESIDENCY;
|
|
try {
|
|
process.env.QMD_METAL_KEEP_RESIDENCY = "1";
|
|
process.env.GGML_METAL_NO_RESIDENCY = "1";
|
|
expect(isDarwinMetalMitigationActive()).toBe(false);
|
|
} finally {
|
|
if (prevKeep === undefined) delete process.env.QMD_METAL_KEEP_RESIDENCY;
|
|
else process.env.QMD_METAL_KEEP_RESIDENCY = prevKeep;
|
|
if (prevNoRes === undefined) delete process.env.GGML_METAL_NO_RESIDENCY;
|
|
else process.env.GGML_METAL_NO_RESIDENCY = prevNoRes;
|
|
}
|
|
});
|
|
|
|
test("disposes Llama resources in dependency order before CLI exit", async () => {
|
|
const calls: string[] = [];
|
|
const llm = new LlamaCpp({ inactivityTimeoutMs: 0 });
|
|
const disposable = (name: string) => ({
|
|
dispose: async () => {
|
|
calls.push(name);
|
|
},
|
|
});
|
|
|
|
Object.assign(llm as unknown as Record<string, unknown>, {
|
|
embedContexts: [disposable("embed-context")],
|
|
rerankContexts: [disposable("rerank-context")],
|
|
embedModel: disposable("embed-model"),
|
|
generateModel: disposable("generate-model"),
|
|
rerankModel: disposable("rerank-model"),
|
|
llama: disposable("llama"),
|
|
});
|
|
|
|
await llm.dispose();
|
|
|
|
expect(calls).toEqual([
|
|
"embed-context",
|
|
"rerank-context",
|
|
"embed-model",
|
|
"generate-model",
|
|
"rerank-model",
|
|
"llama",
|
|
]);
|
|
});
|
|
});
|