diff --git a/packages/core/src/tool/read-filesystem.ts b/packages/core/src/tool/read-filesystem.ts index c27bdae6d..ef0c4df17 100644 --- a/packages/core/src/tool/read-filesystem.ts +++ b/packages/core/src/tool/read-filesystem.ts @@ -13,23 +13,61 @@ export const MAX_MEDIA_INGEST_BYTES = 20 * 1024 * 1024 const MAX_LINE_LENGTH = 2_000 const MAX_LINE_SUFFIX = `... (line truncated to ${MAX_LINE_LENGTH} chars)` -export class BinaryFileError extends Error { - constructor(readonly resource: string) { - super(`Cannot read binary file: ${resource}`) - this.name = "BinaryFileError" +export class BinaryFileError extends Schema.TaggedErrorClass()("ReadTool.BinaryFileError", { + resource: Schema.String, +}) { + override get message() { + return `Cannot read binary file: ${this.resource}` } } -export class MediaIngestLimitError extends Error { - constructor( - readonly resource: string, - readonly maximumBytes: number, - ) { - super(`Media exceeds ${maximumBytes} byte ingestion limit: ${resource}`) - this.name = "MediaIngestLimitError" +export class MediaIngestLimitError extends Schema.TaggedErrorClass()( + "ReadTool.MediaIngestLimitError", + { + resource: Schema.String, + maximumBytes: Schema.Number, + }, +) { + override get message() { + return `Media exceeds ${this.maximumBytes} byte ingestion limit: ${this.resource}` } } +export class MalformedUtf8Error extends Schema.TaggedErrorClass()("ReadTool.MalformedUtf8Error", { + resource: Schema.String, +}) { + override get message() { + return `File is not valid UTF-8: ${this.resource}` + } +} + +export class OffsetOutOfRangeError extends Schema.TaggedErrorClass()( + "ReadTool.OffsetOutOfRangeError", + { offset: Schema.Number }, +) { + override get message() { + return `Offset ${this.offset} is out of range` + } +} + +export class PathKindError extends Schema.TaggedErrorClass()("ReadTool.PathKindError", { + resource: Schema.String, + expected: Schema.Literals(["a file", "a file or directory"]), +}) { + override get message() { + return `Path is not ${this.expected}: ${this.resource}` + } +} + +export type InspectError = FSUtil.Error | PathKindError +export type ReadError = + | FSUtil.Error + | BinaryFileError + | MediaIngestLimitError + | MalformedUtf8Error + | OffsetOutOfRangeError + | PathKindError + export const PageInput = Schema.Struct({ offset: PositiveInt.pipe(Schema.optional), limit: PositiveInt.check(Schema.isLessThanOrEqualTo(MAX_READ_LINES)).pipe(Schema.optional), @@ -52,13 +90,13 @@ export class ListPage extends Schema.Class("ReadTool.ListPage")({ }) {} export interface Interface { - readonly inspect: (path: AbsolutePath) => Effect.Effect<"file" | "directory"> + readonly inspect: (path: AbsolutePath) => Effect.Effect<"file" | "directory", InspectError> readonly read: ( path: AbsolutePath, resource: string, page?: PageInput, - ) => Effect.Effect - readonly list: (path: AbsolutePath, page?: PageInput) => Effect.Effect + ) => Effect.Effect + readonly list: (path: AbsolutePath, page?: PageInput) => Effect.Effect } export class Service extends Context.Service()("@opencode/ReadToolFileSystem") {} @@ -111,11 +149,21 @@ const binary = (resource: string, bytes: Uint8Array) => { } return nonPrintable / bytes.length > 0.3 } +const decodeUtf8 = (resource: string, decoder: TextDecoder, bytes?: Uint8Array) => + Effect.try({ + try: () => decoder.decode(bytes, { stream: bytes !== undefined }), + catch: (error) => { + if (error instanceof TypeError) return new MalformedUtf8Error({ resource }) + throw error + }, + }) +const decodeChunk = (resource: string, decoder: TextDecoder, bytes: Uint8Array) => + bytes.includes(0) ? Effect.fail(new BinaryFileError({ resource })) : decodeUtf8(resource, decoder, bytes) export const inspect = Effect.fn("ReadTool.inspect")(function* (fs: FSUtil.Interface, input: string) { - const info = yield* fs.stat(input).pipe(Effect.orDie) + const info = yield* fs.stat(input) const type = info.type === "File" ? "file" : info.type === "Directory" ? "directory" : undefined - if (!type) return yield* Effect.die(new Error("Path is not a file or directory")) + if (!type) return yield* Effect.fail(new PathKindError({ resource: input, expected: "a file or directory" })) return type }) @@ -125,32 +173,30 @@ export const read = Effect.fn("ReadTool.read")(function* ( resource: string, page: PageInput = {}, ) { - const real = yield* fs.realPath(input).pipe(Effect.orDie) + const real = yield* fs.realPath(input) return yield* Effect.scoped( Effect.gen(function* () { - const file = yield* fs.open(real, { flag: "r" }).pipe(Effect.orDie) - const info = yield* file.stat.pipe(Effect.orDie) - if (info.type !== "File") return yield* Effect.die(new Error("Path is not a file")) + const file = yield* fs.open(real, { flag: "r" }) + const info = yield* file.stat + if (info.type !== "File") return yield* Effect.fail(new PathKindError({ resource, expected: "a file" })) const first = Option.getOrElse( - yield* file.readAlloc(Math.min(64 * 1024, Number(info.size) || 4 * 1024)).pipe(Effect.orDie), + yield* file.readAlloc(Math.min(64 * 1024, Number(info.size) || 4 * 1024)), () => new Uint8Array(), ) const mime = imageMime(first) if (mime) { if (info.size > MAX_MEDIA_INGEST_BYTES) - return yield* Effect.die(new MediaIngestLimitError(resource, MAX_MEDIA_INGEST_BYTES)) + return yield* Effect.fail(new MediaIngestLimitError({ resource, maximumBytes: MAX_MEDIA_INGEST_BYTES })) const chunks = [first] let total = first.length while (total <= MAX_MEDIA_INGEST_BYTES) { - const chunk = yield* file - .readAlloc(Math.min(64 * 1024, MAX_MEDIA_INGEST_BYTES + 1 - total)) - .pipe(Effect.orDie) + const chunk = yield* file.readAlloc(Math.min(64 * 1024, MAX_MEDIA_INGEST_BYTES + 1 - total)) if (Option.isNone(chunk)) break chunks.push(chunk.value) total += chunk.value.length } if (total > MAX_MEDIA_INGEST_BYTES) - return yield* Effect.die(new MediaIngestLimitError(resource, MAX_MEDIA_INGEST_BYTES)) + return yield* Effect.fail(new MediaIngestLimitError({ resource, maximumBytes: MAX_MEDIA_INGEST_BYTES })) return { uri: pathToFileURL(real).href, name: path.basename(real), @@ -162,19 +208,19 @@ export const read = Effect.fn("ReadTool.read")(function* ( mime, } } - if (startsWith(first, [0x25, 0x50, 0x44, 0x46]) || binary(resource, first)) - return yield* Effect.die(new BinaryFileError(resource)) + if (startsWith(first, [0x25, 0x50, 0x44, 0x46]) || extensions.has(path.extname(resource).toLowerCase())) + return yield* Effect.fail(new BinaryFileError({ resource })) const paged = info.size > MAX_READ_BYTES || page.offset !== undefined || page.limit !== undefined if (!paged) { + if (binary(resource, first)) return yield* Effect.fail(new BinaryFileError({ resource })) const decoder = new TextDecoder("utf-8", { fatal: true }) - const text = [yield* Effect.sync(() => decoder.decode(first, { stream: true }))] + const text = [yield* decodeUtf8(resource, decoder, first)] while (true) { - const chunk = yield* file.readAlloc(64 * 1024).pipe(Effect.orDie) + const chunk = yield* file.readAlloc(64 * 1024) if (Option.isNone(chunk)) break - if (chunk.value.includes(0)) return yield* Effect.die(new BinaryFileError(resource)) - text.push(yield* Effect.sync(() => decoder.decode(chunk.value, { stream: true }))) + text.push(yield* decodeChunk(resource, decoder, chunk.value)) } - text.push(yield* Effect.sync(() => decoder.decode())) + text.push(yield* decodeUtf8(resource, decoder)) return { uri: pathToFileURL(real).href, name: path.basename(real), @@ -191,34 +237,29 @@ export const read = Effect.fn("ReadTool.read")(function* ( let discard = false let line = 1 let bytes = 0 - let found = false - let truncated = false let next: number | undefined const append = (input: string) => { if (line < offset) { line++ - return + return true } if (lines.length >= limit || bytes >= MAX_READ_BYTES) { - truncated = true - next ??= line++ - return + next = line + return false } - found = true const text = input.length > MAX_LINE_LENGTH ? input.slice(0, MAX_LINE_LENGTH) + MAX_LINE_SUFFIX : input const size = Buffer.byteLength(text, "utf-8") + (lines.length > 0 ? 1 : 0) if (bytes + size > MAX_READ_BYTES) { - truncated = true - next ??= line++ - return + next = line + return false } lines.push(text) bytes += size line++ + return true } - const consume = (chunk: Uint8Array) => { - if (chunk.includes(0)) throw new BinaryFileError(resource) - let text = decoder.decode(chunk, { stream: true }) + const consume = (input: string) => { + let text = input while (true) { const index = text.indexOf("\n") if (index === -1) { @@ -235,25 +276,44 @@ export const read = Effect.fn("ReadTool.read")(function* ( pending = "" discard = false text = text.slice(index + 1) - append(current.endsWith("\r") ? current.slice(0, -1) : current) + if (!append(current.endsWith("\r") ? current.slice(0, -1) : current)) return false } + return true } - yield* Effect.sync(() => consume(first)) - while (true) { - const chunk = yield* file.readAlloc(64 * 1024).pipe(Effect.orDie) + const consumeChunk = Effect.fnUntraced(function* (chunk: Uint8Array) { + let start = 0 + while (start < chunk.length) { + if (lines.length >= limit || bytes >= MAX_READ_BYTES) { + next = line + return false + } + const newline = chunk.indexOf(10, start) + const end = newline === -1 ? chunk.length : newline + 1 + const segment = chunk.subarray(start, end) + if (binary(resource, segment)) return yield* Effect.fail(new BinaryFileError({ resource })) + if (!consume(yield* decodeUtf8(resource, decoder, segment))) return false + start = end + } + return true + }) + let done = !(yield* consumeChunk(first)) + while (!done) { + const chunk = yield* file.readAlloc(64 * 1024) if (Option.isNone(chunk)) break - yield* Effect.sync(() => consume(chunk.value)) + done = !(yield* consumeChunk(chunk.value)) } - const tail = yield* Effect.sync(() => decoder.decode()) - if (!discard) pending += tail - if (pending) append(pending.endsWith("\r") ? pending.slice(0, -1) : pending) - if (!found && offset !== 1) return yield* Effect.die(new Error(`Offset ${offset} is out of range`)) + if (!done) { + const tail = yield* decodeUtf8(resource, decoder) + if (!discard) pending += tail + if (pending) append(pending.endsWith("\r") ? pending.slice(0, -1) : pending) + } + if (lines.length === 0 && offset !== 1) return yield* Effect.fail(new OffsetOutOfRangeError({ offset })) return new TextPage({ type: "text-page", content: lines.join("\n"), mime: FSUtil.mimeType(real), offset, - truncated, + truncated: next !== undefined, ...(next === undefined ? {} : { next }), }) }), @@ -261,8 +321,8 @@ export const read = Effect.fn("ReadTool.read")(function* ( }) export const list = Effect.fn("ReadTool.list")(function* (fs: FSUtil.Interface, input: string, page: PageInput = {}) { - const real = yield* fs.realPath(input).pipe(Effect.orDie) - const items = yield* fs.readDirectoryEntries(real).pipe(Effect.orDie) + const real = yield* fs.realPath(input) + const items = yield* fs.readDirectoryEntries(real) const offset = page.offset ?? 1 const limit = Math.min(page.limit ?? MAX_READ_LINES, MAX_READ_LINES) const entries = yield* Effect.forEach( diff --git a/packages/core/src/tool/read.ts b/packages/core/src/tool/read.ts index 22ec57b94..2635e4653 100644 --- a/packages/core/src/tool/read.ts +++ b/packages/core/src/tool/read.ts @@ -83,7 +83,7 @@ export const layer = Layer.effectDiscard( .pipe(Effect.catchTag("Image.ResizerUnavailableError", () => Effect.succeed(content))) } if ("encoding" in content && content.encoding === "base64") - return yield* Effect.fail(new ReadToolFileSystem.BinaryFileError(resource)) + return yield* Effect.fail(new ReadToolFileSystem.BinaryFileError({ resource })) return content }).pipe( Effect.mapError((error) => { diff --git a/packages/core/test/tool-read-filesystem.test.ts b/packages/core/test/tool-read-filesystem.test.ts new file mode 100644 index 000000000..2bc175416 --- /dev/null +++ b/packages/core/test/tool-read-filesystem.test.ts @@ -0,0 +1,117 @@ +import { describe, expect } from "bun:test" +import { NodeFileSystem } from "@effect/platform-node" +import path from "path" +import { Effect, FileSystem, Layer } from "effect" +import { FSUtil } from "@opencode-ai/core/fs-util" +import { ReadToolFileSystem } from "@opencode-ai/core/tool/read-filesystem" +import { testEffect } from "./lib/effect" + +const it = testEffect(FSUtil.layer.pipe(Layer.provideMerge(NodeFileSystem.layer))) +const fixture = Effect.gen(function* () { + const fs = yield* FSUtil.Service + const files = yield* FileSystem.FileSystem + const directory = yield* files.makeTempDirectoryScoped() + return { fs, files, directory } +}) + +describe("ReadToolFileSystem", () => { + it.effect("fails with a typed filesystem error when a resolved file disappears", () => + Effect.gen(function* () { + const { fs, directory } = yield* fixture + const file = path.join(directory, "missing.txt") + + const error = yield* ReadToolFileSystem.read(fs, file, "missing.txt").pipe(Effect.flip) + + expect(error).toMatchObject({ _tag: "PlatformError" }) + }), + ) + + it.effect("fails when a file becomes the wrong path kind", () => + Effect.gen(function* () { + const { fs, directory } = yield* fixture + + const error = yield* ReadToolFileSystem.read(fs, directory, "folder").pipe(Effect.flip) + + expect(error).toBeInstanceOf(ReadToolFileSystem.PathKindError) + }), + ) + + it.effect("fails with a typed filesystem error when directory listing fails", () => + Effect.gen(function* () { + const { fs, files, directory } = yield* fixture + const file = path.join(directory, "file.txt") + yield* files.writeFileString(file, "hello") + + const error = yield* ReadToolFileSystem.list(fs, file).pipe(Effect.flip) + + expect(error).toBeInstanceOf(FSUtil.FileSystemError) + if (error instanceof FSUtil.FileSystemError) expect(error.method).toBe("readDirectoryEntries") + }), + ) + + it.effect("reports binary and malformed UTF-8 content as typed errors", () => + Effect.gen(function* () { + const { fs, files, directory } = yield* fixture + const binary = path.join(directory, "archive.dat") + const malformed = path.join(directory, "malformed.txt") + yield* files.writeFile(binary, Uint8Array.of(0, 1, 2, 3)) + const malformedContent = new Uint8Array(64 * 1024 + 1).fill(97) + malformedContent[64 * 1024] = 0x80 + yield* files.writeFile(malformed, malformedContent) + + const binaryError = yield* ReadToolFileSystem.read(fs, binary, "archive.dat").pipe(Effect.flip) + const malformedError = yield* ReadToolFileSystem.read(fs, malformed, "malformed.txt").pipe(Effect.flip) + + expect(binaryError).toBeInstanceOf(ReadToolFileSystem.BinaryFileError) + expect(binaryError.message).toBe("Cannot read binary file: archive.dat") + expect(malformedError).toBeInstanceOf(ReadToolFileSystem.MalformedUtf8Error) + }), + ) + + it.effect("reports out-of-range pagination as a typed error", () => + Effect.gen(function* () { + const { fs, files, directory } = yield* fixture + const file = path.join(directory, "short.txt") + yield* files.writeFileString(file, "one\n") + + const error = yield* ReadToolFileSystem.read(fs, file, "short.txt", { offset: 2 }).pipe(Effect.flip) + + expect(error).toBeInstanceOf(ReadToolFileSystem.OffsetOutOfRangeError) + expect(error.message).toBe("Offset 2 is out of range") + }), + ) + + it.effect("stops reading after the requested page is complete", () => + Effect.gen(function* () { + const { fs, files, directory } = yield* fixture + const prefix = new TextEncoder().encode("one\n") + for (const [name, trailing] of [ + ["malformed.txt", 0x80], + ["nul.txt", 0], + ] as const) { + const file = path.join(directory, name) + yield* files.writeFile(file, Uint8Array.from([...prefix, trailing])) + + const result = yield* ReadToolFileSystem.read(fs, file, name, { limit: 1 }) + + expect(result).toMatchObject({ type: "text-page", content: "one", truncated: true, next: 2 }) + } + }), + ) + + it.effect("preserves the media ingestion limit message", () => + Effect.gen(function* () { + const { fs, files, directory } = yield* fixture + const file = path.join(directory, "oversized.png") + yield* files.writeFile(file, Uint8Array.of(0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a)) + yield* files.truncate(file, ReadToolFileSystem.MAX_MEDIA_INGEST_BYTES + 1) + + const error = yield* ReadToolFileSystem.read(fs, file, "oversized.png").pipe(Effect.flip) + + expect(error).toBeInstanceOf(ReadToolFileSystem.MediaIngestLimitError) + expect(error.message).toBe( + `Media exceeds ${ReadToolFileSystem.MAX_MEDIA_INGEST_BYTES} byte ingestion limit: oversized.png`, + ) + }), + ) +}) diff --git a/packages/core/test/tool-read.test.ts b/packages/core/test/tool-read.test.ts index 1d9553c77..c85b7e359 100644 --- a/packages/core/test/tool-read.test.ts +++ b/packages/core/test/tool-read.test.ts @@ -34,7 +34,7 @@ let readResult: FileSystem.Content | ReadToolFileSystem.TextPage = { encoding: "utf8", mime: "text/plain", } -let readFailure: unknown +let readFailure: ReadToolFileSystem.ReadError | undefined let configEntries: Config.Entry[] = [] const reader = Layer.succeed( ReadToolFileSystem.Service, @@ -42,7 +42,7 @@ const reader = Layer.succeed( inspect: () => (resolveFailure === undefined ? Effect.succeed(resolvedType) : Effect.die(resolveFailure)), read: (input, _resource, page = {}) => { readCalls.push({ input, page }) - if (readFailure !== undefined) return Effect.die(readFailure) + if (readFailure !== undefined) return Effect.fail(readFailure) return Effect.succeed(readResult) }, list: (_path, input = {}) => @@ -431,9 +431,32 @@ describe("ReadTool", () => { }), ) + it.effect("returns expected filesystem failures to the model", () => + Effect.gen(function* () { + readFailure = new ReadToolFileSystem.BinaryFileError({ resource: "archive.dat" }) + const registry = yield* ToolRegistry.Service + + expect( + yield* executeTool(registry, { + sessionID, + ...toolIdentity, + call: { + type: "tool-call", + id: "call-binary", + name: "read", + input: { path: "archive.dat", offset: 2, limit: 1 }, + }, + }), + ).toEqual({ type: "error", value: "Cannot read binary file: archive.dat" }) + expect(readCalls).toEqual([ + { input: AbsolutePath.make(`${process.cwd()}/archive.dat`), page: { offset: 2, limit: 1 } }, + ]) + }), + ) + it.effect("preserves unexpected filesystem defects", () => Effect.gen(function* () { - readFailure = new ReadToolFileSystem.BinaryFileError("archive.dat") + resolveFailure = new Error("unexpected") const registry = yield* ToolRegistry.Service expect( @@ -441,18 +464,10 @@ describe("ReadTool", () => { yield* executeTool(registry, { sessionID, ...toolIdentity, - call: { - type: "tool-call", - id: "call-binary", - name: "read", - input: { path: "archive.dat", offset: 2, limit: 1 }, - }, + call: { type: "tool-call", id: "call-defect", name: "read", input: { path: "README.md" } }, }).pipe(Effect.exit), ), ).toBe(true) - expect(readCalls).toEqual([ - { input: AbsolutePath.make(`${process.cwd()}/archive.dat`), page: { offset: 2, limit: 1 } }, - ]) }), )