Merge pull request #530 from kuishou68/fix-status-no-build-probe
This commit is contained in:
commit
171e9e3e65
@ -461,10 +461,10 @@ async function showStatus(): Promise<void> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Device / GPU info
|
// Device / GPU info
|
||||||
|
console.log(`\n${c.bold}Device${c.reset}`);
|
||||||
try {
|
try {
|
||||||
const llm = getDefaultLlamaCpp();
|
const llm = getDefaultLlamaCpp();
|
||||||
const device = await llm.getDeviceInfo();
|
const device = await llm.getDeviceInfo({ allowBuild: false });
|
||||||
console.log(`\n${c.bold}Device${c.reset}`);
|
|
||||||
if (device.gpu) {
|
if (device.gpu) {
|
||||||
console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
|
console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
|
||||||
if (device.gpuDevices.length > 0) {
|
if (device.gpuDevices.length > 0) {
|
||||||
@ -486,8 +486,11 @@ async function showStatus(): Promise<void> {
|
|||||||
console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
|
console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
|
||||||
}
|
}
|
||||||
console.log(` CPU: ${device.cpuCores} math cores`);
|
console.log(` CPU: ${device.cpuCores} math cores`);
|
||||||
} catch {
|
} catch (error) {
|
||||||
// Don't fail status if LLM init fails
|
console.log(` Status: ${c.dim}skipped${c.reset} (status probe does not build llama.cpp backends)`);
|
||||||
|
if (error instanceof Error && error.message) {
|
||||||
|
console.log(` ${c.dim}${error.message}${c.reset}`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tips section
|
// Tips section
|
||||||
|
|||||||
@ -562,15 +562,16 @@ export class LlamaCpp implements LLM {
|
|||||||
/**
|
/**
|
||||||
* Initialize the llama instance (lazy)
|
* Initialize the llama instance (lazy)
|
||||||
*/
|
*/
|
||||||
private async ensureLlama(): Promise<Llama> {
|
private async ensureLlama(allowBuild = true): Promise<Llama> {
|
||||||
if (!this.llama) {
|
if (!this.llama) {
|
||||||
const gpuMode = resolveLlamaGpuMode();
|
const gpuMode = resolveLlamaGpuMode();
|
||||||
|
|
||||||
const loadLlama = async (gpu: LlamaGpuMode) =>
|
const loadLlama = async (gpu: LlamaGpuMode) =>
|
||||||
await getLlama({
|
await getLlama({
|
||||||
build: "autoAttempt",
|
build: allowBuild ? "autoAttempt" : "never",
|
||||||
logLevel: LlamaLogLevel.error,
|
logLevel: LlamaLogLevel.error,
|
||||||
gpu,
|
gpu,
|
||||||
|
skipDownload: !allowBuild,
|
||||||
});
|
});
|
||||||
|
|
||||||
let llama: Llama;
|
let llama: Llama;
|
||||||
@ -1254,14 +1255,14 @@ export class LlamaCpp implements LLM {
|
|||||||
* Get device/GPU info for status display.
|
* Get device/GPU info for status display.
|
||||||
* Initializes llama if not already done.
|
* Initializes llama if not already done.
|
||||||
*/
|
*/
|
||||||
async getDeviceInfo(): Promise<{
|
async getDeviceInfo(options: { allowBuild?: boolean } = {}): Promise<{
|
||||||
gpu: string | false;
|
gpu: string | false;
|
||||||
gpuOffloading: boolean;
|
gpuOffloading: boolean;
|
||||||
gpuDevices: string[];
|
gpuDevices: string[];
|
||||||
vram?: { total: number; used: number; free: number };
|
vram?: { total: number; used: number; free: number };
|
||||||
cpuCores: number;
|
cpuCores: number;
|
||||||
}> {
|
}> {
|
||||||
const llama = await this.ensureLlama();
|
const llama = await this.ensureLlama(options.allowBuild ?? true);
|
||||||
const gpuDevices = await llama.getGpuDeviceNames();
|
const gpuDevices = await llama.getGpuDeviceNames();
|
||||||
let vram: { total: number; used: number; free: number } | undefined;
|
let vram: { total: number; used: number; free: number } | undefined;
|
||||||
if (llama.gpu) {
|
if (llama.gpu) {
|
||||||
|
|||||||
@ -226,6 +226,32 @@ describe("LlamaCpp rerank deduping", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("LlamaCpp.getDeviceInfo", () => {
|
||||||
|
test("can skip build attempts for status probes", async () => {
|
||||||
|
const llm = new LlamaCpp({}) as any;
|
||||||
|
const fakeLlama = {
|
||||||
|
gpu: "metal",
|
||||||
|
supportsGpuOffloading: true,
|
||||||
|
cpuMathCores: 8,
|
||||||
|
getGpuDeviceNames: vi.fn().mockResolvedValue(["Apple GPU"]),
|
||||||
|
getVramState: vi.fn().mockResolvedValue({ total: 1024, used: 256, free: 768 }),
|
||||||
|
};
|
||||||
|
|
||||||
|
llm.ensureLlama = vi.fn().mockResolvedValue(fakeLlama);
|
||||||
|
|
||||||
|
const device = await llm.getDeviceInfo({ allowBuild: false });
|
||||||
|
|
||||||
|
expect(llm.ensureLlama).toHaveBeenCalledWith(false);
|
||||||
|
expect(device).toEqual({
|
||||||
|
gpu: "metal",
|
||||||
|
gpuOffloading: true,
|
||||||
|
gpuDevices: ["Apple GPU"],
|
||||||
|
vram: { total: 1024, used: 256, free: 768 },
|
||||||
|
cpuCores: 8,
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
// Integration Tests (require actual models)
|
// Integration Tests (require actual models)
|
||||||
// =============================================================================
|
// =============================================================================
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user