From 60c75cb3327df40f930d23c12a4c98c4a0f79a97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 16 May 2026 17:20:21 +0000 Subject: [PATCH 1/2] fix: avoid macOS Metal cleanup abort after JSON query --- CHANGELOG.md | 1 + src/cli/qmd.ts | 76 ++++++++++++++++++++++++++++++- src/llm.ts | 60 ++++++++++++++++++------ test/cli-exit-lifecycle.test.ts | 81 +++++++++++++++++++++++++++++++++ 4 files changed, 202 insertions(+), 16 deletions(-) create mode 100644 test/cli-exit-lifecycle.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index ac69601..39b811a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ flag. Previously it returned only the best matching chunk (~3.6KB max per result). Output payload for `--full` queries is now proportional to total document size. +- macOS Metal: `qmd query --json` now flushes successful JSON output and uses a safe immediate-exit path on Darwin to avoid ggml Metal finalizer aborts; other commands still dispose LLM contexts/models before the llama runtime. #368 - Embedding: `qmd embed -c ` now scopes pending-doc selection to the requested collection instead of embedding global pending work. Scoped `--force` clears only collection-owned vectors, preserves shared diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts index 40bc0dd..df73f36 100755 --- a/src/cli/qmd.ts +++ b/src/cli/qmd.ts @@ -207,6 +207,76 @@ const cursor = { show() { process.stderr.write('\x1b[?25h'); }, }; +type CliLifecycleWritable = { + write(chunk: string | Uint8Array, callback?: (error?: Error | null) => void): boolean; +}; + +type FinishSuccessfulCliCommandOptions = { + command: string; + format?: OutputFormat; + cleanup?: () => Promise; + exit?: (code: number) => void; + immediateExit?: (code: number) => void; + stdout?: CliLifecycleWritable; + stderr?: CliLifecycleWritable; + platform?: NodeJS.Platform; +}; + +async function flushWritable(stream: CliLifecycleWritable): Promise { + await new Promise((resolve) => { + stream.write("", () => resolve()); + }); +} + +function shouldBypassNativeCleanup(options: FinishSuccessfulCliCommandOptions): boolean { + return ( + (options.platform ?? process.platform) === "darwin" && + options.command === "query" && + options.format === "json" && + process.env.QMD_DISABLE_DARWIN_QUERY_JSON_SAFE_EXIT !== "1" + ); +} + +function immediateProcessExit(code: number): void { + const processWithReallyExit = process as NodeJS.Process & { reallyExit?: (code?: number) => void }; + if (typeof processWithReallyExit.reallyExit === "function") { + processWithReallyExit.reallyExit(code); + return; + } + process.exit(code); +} + +/** + * Finish a successful CLI command after output has been flushed. On macOS JSON + * query runs, skip normal native teardown and use Node/Bun's immediate exit path: + * ggml Metal can abort from C++ finalizers after valid JSON has already been + * produced (#368). This wrapper is only reached after the command completed, so + * real query failures still exit through the normal error path before this runs. + */ +export async function finishSuccessfulCliCommand(options: FinishSuccessfulCliCommandOptions): Promise { + const stderr = options.stderr ?? process.stderr; + const exit = options.exit ?? ((code: number) => process.exit(code)); + const immediateExit = options.immediateExit ?? immediateProcessExit; + + await flushWritable(options.stdout ?? process.stdout); + + if (shouldBypassNativeCleanup(options)) { + await flushWritable(stderr); + immediateExit(0); + return; + } + + try { + await (options.cleanup ?? disposeDefaultLlamaCpp)(); + } catch (error) { + stderr.write( + `QMD Warning: cleanup after successful output failed (${error instanceof Error ? error.message : String(error)}); exiting 0 because command output completed.\n` + ); + } + await flushWritable(stderr); + exit(0); +} + // Ensure cursor is restored on exit process.on('SIGINT', () => { cursor.show(); process.exit(130); }); process.on('SIGTERM', () => { cursor.show(); process.exit(143); }); @@ -3415,8 +3485,10 @@ if (isMain) { } if (cli.command !== "mcp") { - await disposeDefaultLlamaCpp(); - process.exit(0); + await finishSuccessfulCliCommand({ + command: cli.command, + format: cli.opts.format, + }); } } // end if (main module) diff --git a/src/llm.ts b/src/llm.ts index d469d36..f7ec2fd 100644 --- a/src/llm.ts +++ b/src/llm.ts @@ -497,6 +497,23 @@ export function resolveLlamaGpuMode(envValue = process.env.QMD_LLAMA_GPU): Llama return "auto"; } +async function disposeWithTimeout(resourceName: string, dispose: () => Promise, timeoutMs = 1000): Promise { + const timeoutPromise = new Promise<"timeout">((resolve) => { + setTimeout(() => resolve("timeout"), timeoutMs).unref(); + }); + + try { + const result = await Promise.race([dispose(), timeoutPromise]); + if (result === "timeout") { + process.stderr.write(`QMD Warning: timed out disposing ${resourceName}; continuing shutdown.\n`); + } + } catch (error) { + process.stderr.write( + `QMD Warning: failed to dispose ${resourceName} (${error instanceof Error ? error.message : String(error)}); continuing shutdown.\n` + ); + } +} + function resolveExpandContextSize(configValue?: number): number { if (configValue !== undefined) { if (!Number.isInteger(configValue) || configValue <= 0) { @@ -1413,22 +1430,37 @@ export class LlamaCpp implements LLM { this.inactivityTimer = null; } - // Disposing llama cascades to models and contexts automatically - // See: https://node-llama-cpp.withcat.ai/guide/objects-lifecycle - // Note: llama.dispose() can hang indefinitely, so we use a timeout - if (this.llama) { - const disposePromise = this.llama.dispose(); - const timeoutPromise = new Promise((resolve) => setTimeout(resolve, 1000)); - await Promise.race([disposePromise, timeoutPromise]); + // Explicitly dispose in dependency order: contexts first, then models, then llama. + // Relying only on llama.dispose() leaves Metal resource sets alive until process + // finalization on Apple Silicon, where ggml_metal_device_free can abort after + // otherwise-successful CLI output (#368). + for (const ctx of this.embedContexts) { + await disposeWithTimeout("embedding context", () => ctx.dispose()); + } + this.embedContexts = []; + + for (const ctx of this.rerankContexts) { + await disposeWithTimeout("rerank context", () => ctx.dispose()); + } + this.rerankContexts = []; + + if (this.embedModel) { + await disposeWithTimeout("embedding model", () => this.embedModel!.dispose()); + this.embedModel = null; + } + if (this.generateModel) { + await disposeWithTimeout("generation model", () => this.generateModel!.dispose()); + this.generateModel = null; + } + if (this.rerankModel) { + await disposeWithTimeout("rerank model", () => this.rerankModel!.dispose()); + this.rerankModel = null; } - // Clear references - this.embedContexts = []; - this.rerankContexts = []; - this.embedModel = null; - this.generateModel = null; - this.rerankModel = null; - this.llama = null; + if (this.llama) { + await disposeWithTimeout("llama runtime", () => this.llama!.dispose()); + this.llama = null; + } // Clear any in-flight load/create promises this.embedModelLoadPromise = null; diff --git a/test/cli-exit-lifecycle.test.ts b/test/cli-exit-lifecycle.test.ts new file mode 100644 index 0000000..b9328ed --- /dev/null +++ b/test/cli-exit-lifecycle.test.ts @@ -0,0 +1,81 @@ +import { describe, expect, test } from "vitest"; +import { finishSuccessfulCliCommand } from "../src/cli/qmd.ts"; +import { LlamaCpp } from "../src/llm.ts"; + +describe("CLI successful-exit lifecycle", () => { + test("exits 0 after successful JSON output when post-output LLM cleanup fails", async () => { + const exitCodes: number[] = []; + const stderr: string[] = []; + const flushed: string[] = []; + + await finishSuccessfulCliCommand({ + command: "query", + format: "json", + cleanup: async () => { + throw new Error("ggml_metal_device_free abort simulation"); + }, + exit: (code) => { + exitCodes.push(code); + }, + stdout: { write: (chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { flushed.push(String(chunk)); cb?.(); return true; } }, + stderr: { write: (chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { stderr.push(String(chunk)); cb?.(); return true; } }, + }); + + expect(exitCodes).toEqual([0]); + expect(stderr.join("")).toContain("QMD Warning: cleanup after successful output failed"); + expect(flushed).toEqual([""]); + }); + + test("uses immediate exit for successful macOS JSON query after stdout flush", async () => { + const calls: string[] = []; + + await finishSuccessfulCliCommand({ + command: "query", + format: "json", + platform: "darwin", + cleanup: async () => { + calls.push("cleanup"); + }, + exit: (code) => { + calls.push(`exit:${code}`); + }, + immediateExit: (code) => { + calls.push(`immediate-exit:${code}`); + }, + stdout: { write: (_chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { calls.push("stdout-flush"); cb?.(); return true; } }, + stderr: { write: (_chunk: string | Uint8Array, cb?: (error?: Error | null) => void) => { calls.push("stderr-flush"); cb?.(); return true; } }, + }); + + expect(calls).toEqual(["stdout-flush", "stderr-flush", "immediate-exit:0"]); + }); + + test("disposes Llama resources in dependency order before CLI exit", async () => { + const calls: string[] = []; + const llm = new LlamaCpp({ inactivityTimeoutMs: 0 }); + const disposable = (name: string) => ({ + dispose: async () => { + calls.push(name); + }, + }); + + Object.assign(llm as unknown as Record, { + embedContexts: [disposable("embed-context")], + rerankContexts: [disposable("rerank-context")], + embedModel: disposable("embed-model"), + generateModel: disposable("generate-model"), + rerankModel: disposable("rerank-model"), + llama: disposable("llama"), + }); + + await llm.dispose(); + + expect(calls).toEqual([ + "embed-context", + "rerank-context", + "embed-model", + "generate-model", + "rerank-model", + "llama", + ]); + }); +}); From b59ba6ab1ed35631b17ac914bfceeea588d67ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobi=20L=C3=BCtke?= Date: Sat, 16 May 2026 17:32:45 +0000 Subject: [PATCH 2/2] test: keep cleanup lifecycle regression portable --- test/cli-exit-lifecycle.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/test/cli-exit-lifecycle.test.ts b/test/cli-exit-lifecycle.test.ts index b9328ed..8558596 100644 --- a/test/cli-exit-lifecycle.test.ts +++ b/test/cli-exit-lifecycle.test.ts @@ -11,6 +11,7 @@ describe("CLI successful-exit lifecycle", () => { await finishSuccessfulCliCommand({ command: "query", format: "json", + platform: "linux", cleanup: async () => { throw new Error("ggml_metal_device_free abort simulation"); },