407 lines
16 KiB
TypeScript
407 lines
16 KiB
TypeScript
import fs from "fs/promises"
|
|
import { realpathSync } from "node:fs"
|
|
import path from "path"
|
|
import { describe, expect, test } from "bun:test"
|
|
import { Effect, Layer } from "effect"
|
|
import { ChildProcess } from "effect/unstable/process"
|
|
import { FSUtil } from "@opencode-ai/core/fs-util"
|
|
import { Config } from "@opencode-ai/core/config"
|
|
import { Location } from "@opencode-ai/core/location"
|
|
import { LocationMutation } from "@opencode-ai/core/location-mutation"
|
|
import { PermissionV2 } from "@opencode-ai/core/permission"
|
|
import { AppProcess } from "@opencode-ai/core/process"
|
|
import { AbsolutePath } from "@opencode-ai/core/schema"
|
|
import { SessionV2 } from "@opencode-ai/core/session"
|
|
import { BashTool } from "@opencode-ai/core/tool/bash"
|
|
import { ToolOutputStore } from "@opencode-ai/core/tool-output-store"
|
|
import { ToolRegistry } from "@opencode-ai/core/tool-registry"
|
|
import { location } from "./fixture/location"
|
|
import { tmpdir } from "./fixture/tmpdir"
|
|
import { testEffect } from "./lib/effect"
|
|
|
|
const sessionID = SessionV2.ID.make("ses_bash_tool_test")
|
|
const assertions: PermissionV2.AssertInput[] = []
|
|
const runs: Array<{
|
|
readonly command: string
|
|
readonly cwd?: string
|
|
readonly shell?: string | boolean
|
|
readonly options?: AppProcess.RunOptions
|
|
}> = []
|
|
const truncations: ToolOutputStore.TruncateInput[] = []
|
|
let denyAction: string | undefined
|
|
let result: AppProcess.RunResult = {
|
|
command: "mock",
|
|
exitCode: 0,
|
|
stdout: Buffer.from("hello\n"),
|
|
stderr: Buffer.alloc(0),
|
|
stdoutTruncated: false,
|
|
stderrTruncated: false,
|
|
}
|
|
let runFailure: AppProcess.AppProcessError | undefined
|
|
let truncate = (input: ToolOutputStore.TruncateInput): Effect.Effect<ToolOutputStore.TruncateResult> =>
|
|
Effect.succeed({ content: input.content, truncated: false })
|
|
|
|
const permission = Layer.succeed(
|
|
PermissionV2.Service,
|
|
PermissionV2.Service.of({
|
|
assert: (input) =>
|
|
Effect.sync(() => assertions.push(input)).pipe(
|
|
Effect.andThen(
|
|
input.action === denyAction ? Effect.fail(new PermissionV2.DeniedError({ rules: [] })) : Effect.void,
|
|
),
|
|
),
|
|
ask: () => Effect.die("unused"),
|
|
reply: () => Effect.die("unused"),
|
|
get: () => Effect.die("unused"),
|
|
forSession: () => Effect.die("unused"),
|
|
list: () => Effect.die("unused"),
|
|
}),
|
|
)
|
|
const appProcess = Layer.succeed(
|
|
AppProcess.Service,
|
|
AppProcess.Service.of({
|
|
run: (command: ChildProcess.Command, options?: AppProcess.RunOptions) =>
|
|
Effect.suspend(() => {
|
|
if (command._tag !== "StandardCommand") throw new Error("expected standard command")
|
|
runs.push({ command: command.command, cwd: command.options.cwd, shell: command.options.shell, options })
|
|
return runFailure ? Effect.fail(runFailure) : Effect.succeed(result)
|
|
}),
|
|
} as unknown as AppProcess.Interface),
|
|
)
|
|
const resources = Layer.succeed(
|
|
ToolOutputStore.Service,
|
|
ToolOutputStore.Service.of({
|
|
limits: () => Effect.die("unused"),
|
|
write: () => Effect.die("unused"),
|
|
truncate: (input) => Effect.sync(() => truncations.push(input)).pipe(Effect.andThen(truncate(input))),
|
|
read: () => Effect.die("unused"),
|
|
cleanup: () => Effect.die("unused"),
|
|
}),
|
|
)
|
|
const config = Layer.succeed(
|
|
Config.Service,
|
|
Config.Service.of({
|
|
entries: () => Effect.succeed([]),
|
|
}),
|
|
)
|
|
|
|
const reset = () => {
|
|
assertions.length = 0
|
|
runs.length = 0
|
|
truncations.length = 0
|
|
denyAction = undefined
|
|
runFailure = undefined
|
|
result = {
|
|
command: "mock",
|
|
exitCode: 0,
|
|
stdout: Buffer.from("hello\n"),
|
|
stderr: Buffer.alloc(0),
|
|
stdoutTruncated: false,
|
|
stderrTruncated: false,
|
|
}
|
|
truncate = (input) => Effect.succeed({ content: input.content, truncated: false })
|
|
}
|
|
|
|
const withTool = <A, E, R>(
|
|
directory: string,
|
|
body: (registry: ToolRegistry.Interface) => Effect.Effect<A, E, R>,
|
|
processLayer: Layer.Layer<AppProcess.Service> = appProcess,
|
|
) => {
|
|
const filesystem = FSUtil.defaultLayer
|
|
const activeLocation = Layer.succeed(
|
|
Location.Service,
|
|
Location.Service.of(location({ directory: AbsolutePath.make(directory) })),
|
|
)
|
|
const mutation = LocationMutation.layer.pipe(Layer.provide(filesystem), Layer.provide(activeLocation))
|
|
const registry = ToolRegistry.layer.pipe(Layer.provide(permission))
|
|
const bash = BashTool.layer.pipe(
|
|
Layer.provide(registry),
|
|
Layer.provide(permission),
|
|
Layer.provide(mutation),
|
|
Layer.provide(processLayer),
|
|
Layer.provide(resources),
|
|
Layer.provide(config),
|
|
)
|
|
return Effect.gen(function* () {
|
|
return yield* body(yield* ToolRegistry.Service)
|
|
}).pipe(Effect.provide(Layer.mergeAll(registry, bash)))
|
|
}
|
|
|
|
const call = (input: typeof BashTool.Parameters.Type, id = "call-bash") => ({
|
|
sessionID,
|
|
call: { type: "tool-call" as const, id, name: "bash", input },
|
|
})
|
|
|
|
const it = testEffect(Layer.empty)
|
|
|
|
describe("BashTool", () => {
|
|
it.live("registers and returns structured successful output from the active Location", () =>
|
|
Effect.acquireUseRelease(
|
|
Effect.promise(() => tmpdir()),
|
|
(tmp) => {
|
|
reset()
|
|
return withTool(tmp.path, (registry) =>
|
|
Effect.gen(function* () {
|
|
const definitions = yield* registry.definitions()
|
|
expect(definitions.map((tool) => tool.name)).toEqual(["bash"])
|
|
expect(definitions[0]?.inputSchema).not.toHaveProperty("properties.background")
|
|
expect(yield* registry.settle(call({ command: "pwd", description: "Print working directory" }))).toEqual({
|
|
result: { type: "text", value: "hello\n\n\nCommand exited with code 0." },
|
|
output: {
|
|
structured: {
|
|
command: "pwd",
|
|
cwd: realpathSync(tmp.path),
|
|
exitCode: 0,
|
|
output: "hello\n",
|
|
truncated: false,
|
|
},
|
|
content: [{ type: "text", text: "hello\n\n\nCommand exited with code 0." }],
|
|
},
|
|
})
|
|
expect(runs).toMatchObject([{ command: "pwd", cwd: realpathSync(tmp.path) }])
|
|
expect(runs[0]?.options).toMatchObject({
|
|
maxOutputBytes: BashTool.MAX_CAPTURE_BYTES,
|
|
maxErrorBytes: BashTool.MAX_CAPTURE_BYTES,
|
|
})
|
|
expect(assertions).toEqual([{ sessionID, action: "bash", resources: ["pwd"], save: ["pwd"] }])
|
|
}),
|
|
)
|
|
},
|
|
(tmp) => Effect.promise(() => tmp[Symbol.asyncDispose]()),
|
|
),
|
|
)
|
|
|
|
it.live("resolves a relative workdir from the active Location", () =>
|
|
Effect.acquireUseRelease(
|
|
Effect.promise(() => tmpdir()),
|
|
(tmp) => {
|
|
reset()
|
|
return Effect.promise(() => fs.mkdir(path.join(tmp.path, "src"))).pipe(
|
|
Effect.andThen(withTool(tmp.path, (registry) => registry.execute(call({ command: "pwd", workdir: "src" })))),
|
|
Effect.andThen(
|
|
Effect.sync(() => expect(runs).toMatchObject([{ cwd: realpathSync(path.join(tmp.path, "src")) }])),
|
|
),
|
|
)
|
|
},
|
|
(tmp) => Effect.promise(() => tmp[Symbol.asyncDispose]()),
|
|
),
|
|
)
|
|
|
|
if (process.platform !== "win32") {
|
|
it.live("executes a real shell command through AppProcess", () =>
|
|
Effect.acquireUseRelease(
|
|
Effect.promise(() => tmpdir()),
|
|
(tmp) => {
|
|
reset()
|
|
return withTool(
|
|
tmp.path,
|
|
(registry) => registry.settle(call({ command: "printf core-bash" })),
|
|
AppProcess.defaultLayer,
|
|
).pipe(
|
|
Effect.andThen((settled) =>
|
|
Effect.sync(() => {
|
|
expect(settled.result).toEqual({ type: "text", value: "core-bash\n\nCommand exited with code 0." })
|
|
expect(settled.output?.structured).toMatchObject({
|
|
command: "printf core-bash",
|
|
cwd: realpathSync(tmp.path),
|
|
exitCode: 0,
|
|
output: "core-bash",
|
|
})
|
|
}),
|
|
),
|
|
)
|
|
},
|
|
(tmp) => Effect.promise(() => tmp[Symbol.asyncDispose]()),
|
|
),
|
|
)
|
|
}
|
|
|
|
it.live("approves an explicit external workdir before bash execution", () =>
|
|
Effect.acquireUseRelease(
|
|
Effect.promise(() => Promise.all([tmpdir(), tmpdir()])),
|
|
([active, outside]) => {
|
|
reset()
|
|
return withTool(active.path, (registry) =>
|
|
registry.execute(call({ command: "pwd", workdir: outside.path })),
|
|
).pipe(
|
|
Effect.andThen(
|
|
Effect.sync(() => {
|
|
expect(assertions.map((item) => item.action)).toEqual(["external_directory", "bash"])
|
|
expect(assertions[0]).toMatchObject({
|
|
resources: [path.join(realpathSync(outside.path), "*").replaceAll("\\", "/")],
|
|
})
|
|
expect(runs).toHaveLength(1)
|
|
}),
|
|
),
|
|
)
|
|
},
|
|
([active, outside]) =>
|
|
Effect.promise(() =>
|
|
Promise.all([active[Symbol.asyncDispose](), outside[Symbol.asyncDispose]()]).then(() => undefined),
|
|
),
|
|
),
|
|
)
|
|
|
|
it.live("does not execute after external-directory or bash denial", () =>
|
|
Effect.acquireUseRelease(
|
|
Effect.promise(() => Promise.all([tmpdir(), tmpdir()])),
|
|
([active, outside]) =>
|
|
Effect.gen(function* () {
|
|
reset()
|
|
denyAction = "external_directory"
|
|
yield* withTool(active.path, (registry) => registry.execute(call({ command: "pwd", workdir: outside.path })))
|
|
expect(assertions.map((item) => item.action)).toEqual(["external_directory"])
|
|
expect(runs).toEqual([])
|
|
|
|
reset()
|
|
denyAction = "bash"
|
|
yield* withTool(active.path, (registry) => registry.execute(call({ command: "pwd" })))
|
|
expect(assertions.map((item) => item.action)).toEqual(["bash"])
|
|
expect(runs).toEqual([])
|
|
}),
|
|
([active, outside]) =>
|
|
Effect.promise(() =>
|
|
Promise.all([active[Symbol.asyncDispose](), outside[Symbol.asyncDispose]()]).then(() => undefined),
|
|
),
|
|
),
|
|
)
|
|
|
|
it.live("reports external command arguments as advisory warnings without enforcing approval", () =>
|
|
Effect.acquireUseRelease(
|
|
Effect.promise(() => Promise.all([tmpdir(), tmpdir()])),
|
|
([active, outside]) => {
|
|
reset()
|
|
denyAction = "external_directory"
|
|
const target = path.join(outside.path, "secret.txt")
|
|
return withTool(active.path, (registry) => registry.settle(call({ command: `cat ${target}` }))).pipe(
|
|
Effect.andThen((settled) =>
|
|
Effect.sync(() => {
|
|
expect(assertions.map((item) => item.action)).toEqual(["bash"])
|
|
expect(runs).toHaveLength(1)
|
|
expect(settled.output?.structured).toMatchObject({
|
|
warnings: [
|
|
`Command argument references external directory ${path.join(realpathSync(outside.path), "*").replaceAll("\\", "/")}. Bash runs with host-user filesystem, process, and network authority; this scan is advisory only.`,
|
|
],
|
|
})
|
|
expect(settled.result).toMatchObject({ type: "text", value: expect.stringContaining("Warnings:") })
|
|
}),
|
|
),
|
|
)
|
|
},
|
|
([active, outside]) =>
|
|
Effect.promise(() =>
|
|
Promise.all([active[Symbol.asyncDispose](), outside[Symbol.asyncDispose]()]).then(() => undefined),
|
|
),
|
|
),
|
|
)
|
|
|
|
it.live("keeps non-zero exits useful and exposes managed overflow by opaque URI", () =>
|
|
Effect.acquireUseRelease(
|
|
Effect.promise(() => tmpdir()),
|
|
(tmp) => {
|
|
reset()
|
|
result = { ...result, exitCode: 7, stdout: Buffer.from("HEAD full output TAIL") }
|
|
truncate = (input) =>
|
|
Effect.succeed({
|
|
content: "HEAD\n\n... output truncated; full content available as tool-output://opaque ...\n\nTAIL",
|
|
truncated: true,
|
|
resource: new ToolOutputStore.Resource({
|
|
uri: "tool-output://opaque",
|
|
mime: "text/plain",
|
|
size: input.content.length,
|
|
}),
|
|
})
|
|
return withTool(tmp.path, (registry) => registry.settle(call({ command: "false" }, "call-overflow"))).pipe(
|
|
Effect.andThen((settled) =>
|
|
Effect.sync(() => {
|
|
expect(settled.result).toMatchObject({
|
|
type: "text",
|
|
value: expect.stringContaining("Command exited with code 7"),
|
|
})
|
|
expect(settled.output?.structured).toMatchObject({
|
|
command: "false",
|
|
cwd: realpathSync(tmp.path),
|
|
exitCode: 7,
|
|
truncated: true,
|
|
resource: { uri: "tool-output://opaque" },
|
|
})
|
|
expect(truncations).toMatchObject([
|
|
{ sessionID, toolCallID: "call-overflow", content: "HEAD full output TAIL" },
|
|
])
|
|
expect(JSON.stringify(settled)).not.toContain(tmp.path + path.sep + "tool-output")
|
|
}),
|
|
),
|
|
)
|
|
},
|
|
(tmp) => Effect.promise(() => tmp[Symbol.asyncDispose]()),
|
|
),
|
|
)
|
|
|
|
it.live("surfaces bounded process-capture truncation", () =>
|
|
Effect.acquireUseRelease(
|
|
Effect.promise(() => tmpdir()),
|
|
(tmp) => {
|
|
reset()
|
|
result = { ...result, stdoutTruncated: true }
|
|
return withTool(tmp.path, (registry) => registry.settle(call({ command: "verbose" }))).pipe(
|
|
Effect.andThen((settled) =>
|
|
Effect.sync(() => {
|
|
expect(settled.output?.structured).toMatchObject({ truncated: true, stdoutTruncated: true })
|
|
expect(settled.result).toMatchObject({
|
|
type: "text",
|
|
value: expect.stringContaining("stdout capture truncated"),
|
|
})
|
|
expect(settled.output?.structured).not.toHaveProperty("resource")
|
|
}),
|
|
),
|
|
)
|
|
},
|
|
(tmp) => Effect.promise(() => tmp[Symbol.asyncDispose]()),
|
|
),
|
|
)
|
|
|
|
it.live("returns a useful timeout settlement", () =>
|
|
Effect.acquireUseRelease(
|
|
Effect.promise(() => tmpdir()),
|
|
(tmp) => {
|
|
reset()
|
|
runFailure = new AppProcess.AppProcessError({ command: "sleep", cause: new Error("Timed out") })
|
|
return withTool(tmp.path, (registry) => registry.settle(call({ command: "sleep 60", timeout: 10 }))).pipe(
|
|
Effect.andThen((settled) =>
|
|
Effect.sync(() => {
|
|
expect(settled.result).toMatchObject({
|
|
type: "text",
|
|
value: expect.stringContaining("Command timed out"),
|
|
})
|
|
expect(settled.output?.structured).toMatchObject({
|
|
command: "sleep 60",
|
|
timedOut: true,
|
|
truncated: false,
|
|
})
|
|
}),
|
|
),
|
|
)
|
|
},
|
|
(tmp) => Effect.promise(() => tmp[Symbol.asyncDispose]()),
|
|
),
|
|
)
|
|
})
|
|
|
|
test("keeps locked deferred parity TODOs visible", async () => {
|
|
const source = await fs.readFile(new URL("../src/tool/bash.ts", import.meta.url), "utf8")
|
|
for (const todo of [
|
|
"Port tree-sitter bash / PowerShell parser-based approval reduction.",
|
|
"Port BashArity reusable command-prefix approvals.",
|
|
"Replace token-based command-argument external-directory advisories with parser-based detection.",
|
|
"Restore PowerShell and cmd-specific invocation/path handling on Windows.",
|
|
"Add plugin shell.env environment augmentation once V2 plugin hooks exist.",
|
|
"Add durable/live progress metadata streaming for long-running commands once V2 tool invocation progress context is wired.",
|
|
"Persist background job status and define restart recovery before exposing remote observation.",
|
|
"Revisit process-group cleanup and platform coverage with shell-specific tests if current AppProcess semantics do not fully cover it.",
|
|
"Revisit binary output handling if stdout/stderr decoding is text-only.",
|
|
"Stream full shell output into managed storage while retaining only a bounded in-memory preview.",
|
|
]) {
|
|
expect(source).toContain(`TODO: ${todo}`)
|
|
}
|
|
})
|