diff --git a/CHANGELOG.md b/CHANGELOG.md
index fedaa0f..fbfcde6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,9 @@
 - CLI: make `qmd status` skip native `node-llama-cpp` device probing by
   default so status stays safe on machines with broken or unsupported GPU
   drivers. Set `QMD_STATUS_DEVICE_PROBE=1` to opt in.
+- CLI: lazy-load `node-llama-cpp` so lightweight commands such as
+  `qmd status` do not import native ML dependencies or trigger llama.cpp
+  builds on ARM/no-GPU machines. #491
 
 ## [2.1.0] - 2026-04-05
 
diff --git a/src/llm.ts b/src/llm.ts
index 7cccc3f..7d2bbe0 100644
--- a/src/llm.ts
+++ b/src/llm.ts
@@ -4,16 +4,28 @@
  * Provides embeddings, text generation, and reranking using local GGUF models.
  */
 
-import {
-  getLlama,
-  resolveModelFile,
-  LlamaChatSession,
-  LlamaLogLevel,
-  type Llama,
-  type LlamaModel,
-  type LlamaEmbeddingContext,
-  type Token as LlamaToken,
+import type {
+  Llama,
+  LlamaModel,
+  LlamaEmbeddingContext,
+  Token as LlamaToken,
 } from "node-llama-cpp";
+
+type NodeLlamaCppModule = {
+  getLlama: (options: Record<string, unknown>) => Promise<Llama>;
+  resolveModelFile: (model: string, cacheDir: string) => Promise<string>;
+  LlamaChatSession: new (options: { contextSequence: unknown }) => {
+    prompt: (prompt: string, options?: Record<string, unknown>) => Promise<string>;
+  };
+  LlamaLogLevel: { error: unknown };
+};
+
+let nodeLlamaCppImport: Promise<NodeLlamaCppModule> | null = null;
+async function loadNodeLlamaCpp(): Promise<NodeLlamaCppModule> {
+  nodeLlamaCppImport ??= import("node-llama-cpp") as Promise<NodeLlamaCppModule>;
+  return nodeLlamaCppImport;
+}
+
 import { homedir } from "os";
 import { join } from "path";
 import { existsSync, mkdirSync, statSync, unlinkSync, readdirSync, readFileSync, writeFileSync, openSync, readSync, closeSync } from "fs";
@@ -344,6 +356,7 @@ export async function pullModels(
       }
     }
 
+    const { resolveModelFile } = await loadNodeLlamaCpp();
     const path = await resolveModelFile(model, cacheDir);
     validateGgufFile(path, model);
     const sizeBytes = existsSync(path) ? statSync(path).size : 0;
@@ -619,6 +632,7 @@ export class LlamaCpp implements LLM {
     if (!this.llama) {
       const gpuMode = resolveLlamaGpuMode();
 
+      const { getLlama, LlamaLogLevel } = await loadNodeLlamaCpp();
       const loadLlama = async (gpu: LlamaGpuMode) =>
         await getLlama({
           build: allowBuild ? "autoAttempt" : "never",
@@ -661,6 +675,7 @@ export class LlamaCpp implements LLM {
   private async resolveModel(modelUri: string): Promise<string> {
     this.ensureModelCacheDir();
     // resolveModelFile handles HF URIs and downloads to the cache dir
+    const { resolveModelFile } = await loadNodeLlamaCpp();
     const modelPath = await resolveModelFile(modelUri, this.modelCacheDir);
     validateGgufFile(modelPath, modelUri);
     return modelPath;
@@ -1079,6 +1094,7 @@ export class LlamaCpp implements LLM {
     // Create fresh context -> sequence -> session for each call
     const context = await this.generateModel!.createContext();
     const sequence = context.getSequence();
+    const { LlamaChatSession } = await loadNodeLlamaCpp();
     const session = new LlamaChatSession({ contextSequence: sequence });
 
     const maxTokens = options.maxTokens ?? 150;
@@ -1158,6 +1174,7 @@ export class LlamaCpp implements LLM {
       contextSize: this.expandContextSize,
     });
     const sequence = genContext.getSequence();
+    const { LlamaChatSession } = await loadNodeLlamaCpp();
     const session = new LlamaChatSession({ contextSequence: sequence });
 
     try {
diff --git a/test/cli-lazy-llm-import.test.ts b/test/cli-lazy-llm-import.test.ts
new file mode 100644
index 0000000..5df3a09
--- /dev/null
+++ b/test/cli-lazy-llm-import.test.ts
@@ -0,0 +1,20 @@
+import { describe, expect, test } from "vitest";
+import { readFileSync } from "fs";
+import { join } from "path";
+
+describe("LLM module loading", () => {
+  test("node-llama-cpp is only dynamically imported by LLM operations", () => {
+    const source = readFileSync(join(process.cwd(), "src", "llm.ts"), "utf-8");
+
+    expect(source).not.toMatch(/import\s+(?!type\b)[\s\S]*?from\s+["']node-llama-cpp["']/);
+    expect(source).toContain('import("node-llama-cpp")');
+  });
+
+  test("importing the CLI for lightweight commands succeeds", async () => {
+    const mod = await import("../src/cli/qmd.ts");
+    expect(mod).toMatchObject({
+      buildEditorUri: expect.any(Function),
+      termLink: expect.any(Function),
+    });
+  });
+});