From c6083a474c6a6fce5df631b3e1464e12740238e7 Mon Sep 17 00:00:00 2001 From: Luke Parker <10430890+Hona@users.noreply.github.com> Date: Fri, 19 Jun 2026 19:39:02 +0200 Subject: [PATCH] test(app): add manual performance diagnostics (#32937) --- packages/app/AGENTS.md | 5 + packages/app/e2e/performance/AGENTS.md | 13 + packages/app/e2e/performance/README.md | 77 +++ packages/app/e2e/performance/benchmark.ts | 144 +++++ packages/app/e2e/performance/chrome-trace.ts | 95 +++ .../app/e2e/performance/playwright.config.ts | 20 + .../performance/playwright.uncapped.config.ts | 13 + .../timeline/session-tab-flash.spec.ts | 49 ++ .../timeline/session-tab-repaint-probe.ts | 251 ++++++++ .../session-tab-switch-benchmark.spec.ts | 79 +++ .../timeline/session-tab-switch-metrics.ts | 46 ++ .../timeline/session-tab-switch-probe.ts | 152 +++++ .../session-timeline-benchmark.fixture.ts | 488 ++++++++++++++++ .../session-timeline-benchmark.spec.ts | 85 +++ .../timeline/session-timeline-profile.ts | 40 ++ .../timeline/session-timeline-stream-probe.ts | 547 ++++++++++++++++++ .../session-timeline-stress.fixture.ts | 335 +++++++++++ .../timeline/timeline-test-helpers.ts | 67 +++ .../unit/chrome-trace-write.test.ts | 15 + .../unit/session-tab-repaint-probe.test.ts | 42 ++ .../unit/session-tab-switch-metrics.test.ts | 54 ++ .../session-timeline-stream-probe.test.ts | 14 + .../session-timeline-visual-tracking.test.ts | 16 + packages/app/e2e/utils/mock-server.ts | 8 +- packages/app/package.json | 3 +- packages/app/playwright.config.ts | 1 + 26 files changed, 2656 insertions(+), 3 deletions(-) create mode 100644 packages/app/e2e/performance/AGENTS.md create mode 100644 packages/app/e2e/performance/README.md create mode 100644 packages/app/e2e/performance/benchmark.ts create mode 100644 packages/app/e2e/performance/chrome-trace.ts create mode 100644 packages/app/e2e/performance/playwright.config.ts create mode 100644 packages/app/e2e/performance/playwright.uncapped.config.ts create mode 100644 packages/app/e2e/performance/timeline/session-tab-flash.spec.ts create mode 100644 packages/app/e2e/performance/timeline/session-tab-repaint-probe.ts create mode 100644 packages/app/e2e/performance/timeline/session-tab-switch-benchmark.spec.ts create mode 100644 packages/app/e2e/performance/timeline/session-tab-switch-metrics.ts create mode 100644 packages/app/e2e/performance/timeline/session-tab-switch-probe.ts create mode 100644 packages/app/e2e/performance/timeline/session-timeline-benchmark.fixture.ts create mode 100644 packages/app/e2e/performance/timeline/session-timeline-benchmark.spec.ts create mode 100644 packages/app/e2e/performance/timeline/session-timeline-profile.ts create mode 100644 packages/app/e2e/performance/timeline/session-timeline-stream-probe.ts create mode 100644 packages/app/e2e/performance/timeline/session-timeline-stress.fixture.ts create mode 100644 packages/app/e2e/performance/timeline/timeline-test-helpers.ts create mode 100644 packages/app/e2e/performance/unit/chrome-trace-write.test.ts create mode 100644 packages/app/e2e/performance/unit/session-tab-repaint-probe.test.ts create mode 100644 packages/app/e2e/performance/unit/session-tab-switch-metrics.test.ts create mode 100644 packages/app/e2e/performance/unit/session-timeline-stream-probe.test.ts create mode 100644 packages/app/e2e/performance/unit/session-timeline-visual-tracking.test.ts diff --git a/packages/app/AGENTS.md b/packages/app/AGENTS.md index 765e960c8..2e56066e7 100644 --- a/packages/app/AGENTS.md +++ b/packages/app/AGENTS.md @@ -1,3 +1,8 @@ +## Priorities + +- Prioritise, in this order: stability, simplicity, performance. +- Before changing session or timeline code, record a production benchmark baseline and compare it after the change. + ## Debugging - NEVER try to restart the app, or the server process, EVER. diff --git a/packages/app/e2e/performance/AGENTS.md b/packages/app/e2e/performance/AGENTS.md new file mode 100644 index 000000000..e69c91a98 --- /dev/null +++ b/packages/app/e2e/performance/AGENTS.md @@ -0,0 +1,13 @@ +- Prioritize stability, then simplicity, then measurement overhead. +- Use Playwright for scenario control, isolation, and completion checks. +- Use Chrome Performance traces for generic browser profiling. +- Use Electron `contentTracing` for packaged multi-process profiling. +- Keep custom probes only for product-specific measurements. +- Do not duplicate measurements across the harness, probes, and traces. +- Run benchmarks serially to avoid cross-test contention. +- Run benchmarks against production builds. +- Keep detailed profiling opt-in when it changes workload behavior. +- Preserve raw diagnostic data or use lossless representations. +- Do not enforce machine-dependent performance thresholds. +- Assert scenario completion and metric collection only. +- Keep normal test discovery free of manual benchmarks. diff --git a/packages/app/e2e/performance/README.md b/packages/app/e2e/performance/README.md new file mode 100644 index 000000000..afa3108d5 --- /dev/null +++ b/packages/app/e2e/performance/README.md @@ -0,0 +1,77 @@ +# Manual app performance suite + +The app's high-volume performance diagnostics live under `packages/app/e2e/performance` and are excluded from normal local and CI Playwright discovery. The benchmark config builds the app and serves the production bundle before running scenarios serially. + +Run the suite explicitly from `packages/app`: + +```sh +bun run test:bench +``` + +PowerShell: + +```powershell +$env:PLAYWRIGHT_WORKERS = "1" +bun run test:bench +``` + +The suite contains: + +- cold and hot session-tab timing +- cached session repaint and mutation tracing +- streaming timeline throughput, RAF-gap, long-task, geometry, and remount diagnostics + +All benchmarks import the shared `benchmark` fixture. Pages created through Playwright's `page` fixture automatically capture main-frame navigation history and emit a Chrome trace when `OPENCODE_PERFORMANCE_TRACE_DIR` is set. Benchmarks that need isolated browser contexts use `withBenchmarkPage`, which owns the context and the same diagnostics lifecycle. + +New benchmarks should look like normal Playwright tests: + +```ts +import { benchmark, expect } from "../benchmark" + +benchmark("measures one interaction", async ({ page, report }) => { + // Only scenario-specific setup and interaction belong here. + report({ durationMs: 42 }) +}) +``` + +The fixture requires every benchmark to call `report()`, automatically names and closes traces, captures navigation history, attaches that history when a test fails, and emits metrics as a consistent `BENCHMARK` JSON line. + +```text +BENCHMARK {"name":"...","context":{"project":"chromium","platform":"darwin"},"metrics":{...}} +``` + +Every observed page also emits `BENCHMARK_PAGE` with the same run ID, navigation history, and optional trace path before the final status-bearing `BENCHMARK` record. Chrome traces are browser-wide page-lifetime diagnostics; scenario metrics use narrower explicitly named observation windows. + +This follows the stack's own guidance: [Electron recommends repeated Chrome DevTools and Chrome Tracing measurement](https://www.electronjs.org/docs/latest/tutorial/performance), [Chrome DevTools recommends Performance recordings for runtime work](https://developer.chrome.com/docs/devtools/performance), and [Playwright uses traces for test debugging rather than renderer profiling](https://playwright.dev/docs/trace-viewer). + +These Playwright benchmarks profile the shared app renderer in Chromium. A future packaged Electron benchmark that needs main-process and multi-process attribution should use Electron's official [`contentTracing`](https://www.electronjs.org/docs/latest/api/content-tracing/) API rather than extending this renderer harness with bespoke process instrumentation. + +CPU and high-volume visual profiling are disabled by default. Set `TIMELINE_CPU_PROFILE=1` to enable both, or additionally set `TIMELINE_VISUAL_PROFILE=0` for CPU-only profiling. + +The streaming scenario's 30x CPU throttle is a deterministic stress profile, not a simulated end-user device. + +Benchmarks do not assert machine-dependent performance budgets. Streaming processes 160 deltas by default and reports renderer-observed completion time, throughput, RAF callback-gap distributions, frame-budget equivalents, and long tasks through final geometry settlement. Delta count and delivery batch are included in result context when overridden. These are main-thread callback diagnostics, not compositor presentation or dropped-frame measurements. Visual-only and geometry metrics are `null` when their probes are disabled. Tab metrics describe sampled DOM observations. Assertions verify scenario and metric collection completion. Repeated repaint states are run-length grouped, but every original observation timestamp is retained alongside raw mutation batches and layout shifts. + +Committed smoke and regression tests continue to own correctness coverage for pagination, tab paint, context resize, collapse state, and composer spacing. + +## Chrome traces + +Set `OPENCODE_PERFORMANCE_TRACE_DIR` to emit a standard Chrome DevTools trace for every benchmark page automatically: + +```sh +OPENCODE_PERFORMANCE_TRACE_DIR=/tmp/opencode-performance-traces \ +bunx playwright test --config e2e/performance/playwright.config.ts \ + timeline/session-tab-switch-benchmark.spec.ts +``` + +The emitted JSON is a standard Chrome trace and can be loaded directly into the Chrome DevTools Performance panel. `devtools-tracing` can optionally inspect it from the command line without adding package scripts or dependencies: + +Trace capture mirrors [Puppeteer's official tracing defaults and lifecycle](https://pptr.dev/api/puppeteer.tracing), using Chrome's `ReturnAsStream` transfer mode and failing when Chromium reports trace data loss. + +```sh +bunx devtools-tracing stats +``` + +INP analysis requires a trace with a supported navigation/interaction insight. Selector statistics require a trace captured with `OPENCODE_PERFORMANCE_SELECTOR_TRACE=1`. + +`e2e/performance/playwright.uncapped.config.ts` disables Chromium frame-rate limiting for explicit uncapped diagnostics. Native product benchmarks should use the default Playwright configuration. diff --git a/packages/app/e2e/performance/benchmark.ts b/packages/app/e2e/performance/benchmark.ts new file mode 100644 index 000000000..b9f8ea434 --- /dev/null +++ b/packages/app/e2e/performance/benchmark.ts @@ -0,0 +1,144 @@ +import { expect, test as base, type Browser, type Page, type TestInfo } from "@playwright/test" +import { startChromeTrace } from "./chrome-trace" + +type BenchmarkFixtures = { + report: (metrics: Record, context?: Record) => void + reportState: { payload?: { metrics: Record; context: Record } } + benchmarkResult: void +} + +export type PerformancePageDiagnostics = { + navigations: string[] + stop: () => Promise +} + +const pages = new WeakMap() + +export const benchmark = base.extend({ + reportState: async ({}, use) => use({}), + report: async ({ reportState }, use) => { + await use((metrics, context = {}) => { + if (reportState.payload) throw new Error("Benchmark reported metrics more than once") + reportState.payload = { metrics, context } + }) + }, + benchmarkResult: [ + async ({ reportState }, use, testInfo) => { + await use() + const missing = !reportState.payload + console.log( + `BENCHMARK ${JSON.stringify({ + schemaVersion: 2, + runID: process.env.OPENCODE_PERFORMANCE_RUN_ID, + name: benchmarkName(testInfo), + status: missing ? "failed" : testInfo.status, + expectedStatus: testInfo.expectedStatus, + retry: testInfo.retry, + repeatEachIndex: testInfo.repeatEachIndex, + context: { + project: testInfo.project.name, + platform: process.platform, + ...reportState.payload?.context, + }, + metrics: reportState.payload?.metrics ?? null, + error: missing ? "Benchmark did not report metrics" : undefined, + })}`, + ) + if (missing && testInfo.status === testInfo.expectedStatus) + throw new Error(`Benchmark did not report metrics: ${benchmarkName(testInfo)}`) + }, + { auto: true }, + ], + page: async ({ page }, use, testInfo) => { + const name = benchmarkName(testInfo) + const diagnostics = await observePerformancePage(page, name) + try { + await use(page) + } finally { + try { + await reportPerformancePage(name, diagnostics, testInfo) + } finally { + if (testInfo.status !== testInfo.expectedStatus) { + await testInfo.attach("performance-navigations", { + body: JSON.stringify(diagnostics.navigations, null, 2), + contentType: "application/json", + }) + } + } + } + }, +}) + +function benchmarkName(testInfo: TestInfo) { + return testInfo.titlePath.slice(1).join(" > ") +} + +export { expect } + +async function observePerformancePage(page: Page, name: string) { + const navigations: string[] = [] + const onNavigation = (frame: ReturnType) => { + if (frame === page.mainFrame()) navigations.push(frame.url()) + } + page.on("framenavigated", onNavigation) + const stopTrace = await startChromeTrace(page, name).catch((error) => { + page.off("framenavigated", onNavigation) + throw error + }) + let stopping: Promise | undefined + const diagnostics: PerformancePageDiagnostics = { + navigations, + stop() { + page.off("framenavigated", onNavigation) + return (stopping ??= stopTrace?.() ?? Promise.resolve(undefined)) + }, + } + pages.set(page, diagnostics) + return diagnostics +} + +export async function withBenchmarkPage( + browser: Browser, + name: string, + run: (page: Page) => Promise, + testInfo?: TestInfo, +) { + const context = await browser.newContext() + try { + const page = await context.newPage() + const diagnostics = await observePerformancePage(page, name) + try { + return await run(page) + } finally { + await reportPerformancePage(name, diagnostics, testInfo) + } + } finally { + await context.close() + } +} + +async function reportPerformancePage(name: string, diagnostics: PerformancePageDiagnostics, testInfo?: TestInfo) { + const trace = await diagnostics.stop() + console.log( + `BENCHMARK_PAGE ${JSON.stringify({ + schemaVersion: 2, + runID: process.env.OPENCODE_PERFORMANCE_RUN_ID, + name, + test: testInfo ? benchmarkName(testInfo) : undefined, + retry: testInfo?.retry, + repeatEachIndex: testInfo?.repeatEachIndex, + context: { + platform: process.platform, + trace, + selectorTrace: process.env.OPENCODE_PERFORMANCE_SELECTOR_TRACE === "1", + }, + navigations: diagnostics.navigations, + })}`, + ) +} + +export function benchmarkDiagnostics(page: Page) { + const diagnostics = pages.get(page) + if (!diagnostics) throw new Error("Performance diagnostics are not installed for this page") + return diagnostics +} diff --git a/packages/app/e2e/performance/chrome-trace.ts b/packages/app/e2e/performance/chrome-trace.ts new file mode 100644 index 000000000..343526e25 --- /dev/null +++ b/packages/app/e2e/performance/chrome-trace.ts @@ -0,0 +1,95 @@ +import type { CDPSession, Page } from "@playwright/test" +import path from "node:path" +import { mkdir, open, rename } from "node:fs/promises" +import { Buffer } from "node:buffer" +import { createHash, randomUUID } from "node:crypto" + +const categories = [ + "-*", + "devtools.timeline", + "v8.execute", + "disabled-by-default-devtools.timeline", + "disabled-by-default-devtools.timeline.frame", + "toplevel", + "blink.console", + "blink.user_timing", + "latencyInfo", + "disabled-by-default-devtools.timeline.stack", + "disabled-by-default-v8.cpu_profiler", +] + +export async function startChromeTrace(page: Page, name: string) { + const directory = process.env.OPENCODE_PERFORMANCE_TRACE_DIR + if (!directory) return + + const selectors = process.env.OPENCODE_PERFORMANCE_SELECTOR_TRACE === "1" + const file = await prepareChromeTrace(directory, name, selectors) + const session = await page.context().newCDPSession(page) + try { + await session.send("Tracing.start", { + transferMode: "ReturnAsStream", + traceConfig: { + excludedCategories: categories + .filter((category) => category.startsWith("-")) + .map((category) => category.slice(1)), + includedCategories: [ + ...categories.filter((category) => !category.startsWith("-")), + ...(selectors + ? ["disabled-by-default-blink.debug", "disabled-by-default-devtools.timeline.invalidationTracking"] + : []), + ], + }, + }) + } catch (error) { + await Promise.allSettled([session.detach()]) + throw error + } + let stopping: Promise | undefined + + return () => + (stopping ??= (async () => { + try { + const complete = new Promise<{ stream?: string; dataLossOccurred: boolean }>((resolve) => + session.once("Tracing.tracingComplete", resolve), + ) + await session.send("Tracing.end") + const result = await complete + if (!result.stream) throw new Error(`Chrome trace stream missing: ${file}`) + const partial = `${file}.partial` + await writeProtocolStream(session, result.stream, partial) + if (result.dataLossOccurred) throw new Error(`Chrome trace lost data; partial capture retained: ${partial}`) + await rename(partial, file) + return file + } finally { + await Promise.allSettled([session.detach()]) + } + })()) +} + +export async function prepareChromeTrace( + directory: string, + name: string, + selectors: boolean, + nonce = randomUUID().slice(0, 8), +) { + await mkdir(directory, { recursive: true }) + const run = process.env.OPENCODE_PERFORMANCE_RUN_ID ?? "manual" + const hash = createHash("sha256").update(name).digest("hex").slice(0, 8) + return path.join( + directory, + `${run}-${name.replace(/[^a-zA-Z0-9_-]/g, "-")}-${hash}-${nonce}${selectors ? "-selectors" : ""}.json`, + ) +} + +async function writeProtocolStream(session: CDPSession, handle: string, file: string) { + const output = await open(file, "wx") + try { + while (true) { + const chunk = await session.send("IO.read", { handle }) + await output.write(chunk.base64Encoded ? Buffer.from(chunk.data, "base64") : chunk.data) + if (chunk.eof) break + } + } finally { + await Promise.allSettled([output.close(), session.send("IO.close", { handle })]) + } +} diff --git a/packages/app/e2e/performance/playwright.config.ts b/packages/app/e2e/performance/playwright.config.ts new file mode 100644 index 000000000..d4793daee --- /dev/null +++ b/packages/app/e2e/performance/playwright.config.ts @@ -0,0 +1,20 @@ +import config from "../../playwright.config" + +const port = Number(process.env.PLAYWRIGHT_PORT ?? 3000) +process.env.PLAYWRIGHT_SERVER_PORT = String(port) +process.env.OPENCODE_PERFORMANCE_RUN_ID ??= `${new Date().toISOString().replace(/[:.]/g, "-")}-${process.pid}` + +export default { + ...config, + testDir: ".", + testIgnore: "unit/**", + outputDir: "../test-results/performance", + fullyParallel: false, + workers: 1, + reporter: [["html", { outputFolder: "../playwright-report/performance", open: "never" }], ["line"]], + webServer: { + ...config.webServer, + command: `bun run build && bun run serve -- --host 0.0.0.0 --port ${port} --strictPort`, + reuseExistingServer: false, + }, +} diff --git a/packages/app/e2e/performance/playwright.uncapped.config.ts b/packages/app/e2e/performance/playwright.uncapped.config.ts new file mode 100644 index 000000000..9097c11f1 --- /dev/null +++ b/packages/app/e2e/performance/playwright.uncapped.config.ts @@ -0,0 +1,13 @@ +import config from "./playwright.config" + +export default { + ...config, + outputDir: "../test-results/performance-uncapped", + reporter: [["html", { outputFolder: "../playwright-report/performance-uncapped", open: "never" }], ["line"]], + use: { + ...config.use, + launchOptions: { + args: ["--disable-frame-rate-limit", "--disable-gpu-vsync"], + }, + }, +} diff --git a/packages/app/e2e/performance/timeline/session-tab-flash.spec.ts b/packages/app/e2e/performance/timeline/session-tab-flash.spec.ts new file mode 100644 index 000000000..741084751 --- /dev/null +++ b/packages/app/e2e/performance/timeline/session-tab-flash.spec.ts @@ -0,0 +1,49 @@ +import { benchmark, expect } from "../benchmark" +import { expectSessionTitle } from "../../utils/waits" +import { fixture } from "./session-timeline-stress.fixture" +import { + collectCachedRepaintTrace, + compressCachedRepaintTrace, + installCachedRepaintProbe, + waitForCachedRepaintWindow, +} from "./session-tab-repaint-probe" +import { waitForStableTimeline } from "./session-tab-switch-probe" +import { + installStressSessionTabs, + installTimelineSettings, + mockStressTimeline, + stressSessionHref, +} from "./timeline-test-helpers" + +benchmark("samples cached session repaint after the click", async ({ page, report }) => { + benchmark.setTimeout(120_000) + await mockStressTimeline(page) + await installStressSessionTabs(page) + await installTimelineSettings(page) + await page.goto(stressSessionHref(fixture.targetID)) + await expectSessionTitle(page, fixture.expected.targetTitle) + await waitForStableTimeline(page, fixture.expected.targetMessageIDs.at(-1)!) + await page + .locator(`[data-slot="titlebar-tabs"] a[href="${stressSessionHref(fixture.sourceID)}"]`) + .first() + .click() + await expectSessionTitle(page, fixture.expected.sourceTitle) + await waitForStableTimeline(page, fixture.expected.sourceMessageIDs.at(-1)!) + + await installCachedRepaintProbe(page, { + targetHref: stressSessionHref(fixture.targetID), + destination: fixture.messages[fixture.targetID].map((message) => message.info.id), + source: fixture.messages[fixture.sourceID].map((message) => message.info.id), + last: fixture.expected.targetMessageIDs.at(-1)!, + windowMs: 1_000, + }) + + await page + .locator(`[data-slot="titlebar-tabs"] a[href="${stressSessionHref(fixture.targetID)}"]`) + .first() + .click() + await Promise.all([expectSessionTitle(page, fixture.expected.targetTitle), waitForCachedRepaintWindow(page, 1_000)]) + const result = await collectCachedRepaintTrace(page) + report(compressCachedRepaintTrace(result)) + expect(result.samples.length).toBeGreaterThan(0) +}) diff --git a/packages/app/e2e/performance/timeline/session-tab-repaint-probe.ts b/packages/app/e2e/performance/timeline/session-tab-repaint-probe.ts new file mode 100644 index 000000000..862e080f1 --- /dev/null +++ b/packages/app/e2e/performance/timeline/session-tab-repaint-probe.ts @@ -0,0 +1,251 @@ +import type { Page } from "@playwright/test" + +type CachedRepaintTrace = { + timeOriginEpochMs: number + startedAtPerformanceMs: number + samples: { + observedAtMs: number + root: number | undefined + scrollTop: number + scrollHeight: number + bottomErrorPx: number | undefined + last: boolean + rows: { key: string | undefined; node: number; top: number; bottom: number }[] + mounted: number + center: string | undefined + destination: string[] + source: string[] + }[] + mutations: { observedAtMs: number; changed: { type: string; node: number }[] }[] + shifts: { occurredAtMs: number; value: number }[] + windowMs: number + running: boolean + stop: () => void +} + +export async function installCachedRepaintProbe( + page: Page, + input: { targetHref: string; destination: string[]; source: string[]; last: string; windowMs: number }, +) { + await page.evaluate(({ targetHref, destination, source, last, windowMs }) => { + const destinationIDs = new Set(destination) + const sourceIDs = new Set(source) + const nodeIDs = new WeakMap() + let nextNodeID = 1 + const id = (node: Node) => { + const current = nodeIDs.get(node) + if (current) return current + nodeIDs.set(node, nextNodeID) + return nextNodeID++ + } + const state: CachedRepaintTrace = { + timeOriginEpochMs: performance.timeOrigin, + startedAtPerformanceMs: 0, + samples: [], + mutations: [], + shifts: [], + windowMs, + running: false, + stop: () => {}, + } + const recordShifts = (entries: PerformanceEntry[]) => { + if (!state.running) return + state.shifts.push( + ...entries + .map((entry) => { + if ( + entry.startTime < state.startedAtPerformanceMs || + entry.startTime > state.startedAtPerformanceMs + state.windowMs + ) + return + return { + occurredAtMs: entry.startTime - state.startedAtPerformanceMs, + value: (entry as PerformanceEntry & { value: number }).value, + } + }) + .filter((entry): entry is { occurredAtMs: number; value: number } => entry !== undefined), + ) + } + const shiftObserver = new PerformanceObserver((entries) => recordShifts(entries.getEntries())) + shiftObserver.observe({ type: "layout-shift" }) + const recordMutations = (entries: MutationRecord[]) => { + if (!state.running) return + const observedAtMs = performance.now() - state.startedAtPerformanceMs + if (observedAtMs > state.windowMs) return + const changed = entries.flatMap((entry) => [ + ...[...entry.addedNodes].map((node) => ({ type: "add", node: id(node) })), + ...[...entry.removedNodes].map((node) => ({ type: "remove", node: id(node) })), + ]) + if (changed.length) state.mutations.push({ observedAtMs, changed }) + } + const mutationObserver = new MutationObserver(recordMutations) + mutationObserver.observe(document.documentElement, { childList: true, subtree: true }) + state.stop = () => { + recordShifts(shiftObserver.takeRecords()) + recordMutations(mutationObserver.takeRecords()) + state.running = false + shiftObserver.disconnect() + mutationObserver.disconnect() + } + const sample = () => { + if (!state.running) return + setTimeout(() => { + if (!state.running) return + const observedAtMs = performance.now() - state.startedAtPerformanceMs + if (observedAtMs > state.windowMs) return + const root = [...document.querySelectorAll(".scroll-view__viewport")].find((element) => + element.querySelector("[data-timeline-row]"), + ) + if (root) { + const view = root.getBoundingClientRect() + const rows = [...root.querySelectorAll("[data-timeline-key]")] + .map((element) => ({ + key: element.dataset.timelineKey, + node: id(element), + rect: element.getBoundingClientRect(), + })) + .filter((item) => item.rect.bottom > view.top && item.rect.top < view.bottom) + .map((item) => ({ + key: item.key, + node: item.node, + top: item.rect.top - view.top, + bottom: item.rect.bottom - view.top, + })) + const messages = [...root.querySelectorAll("[data-message-id]")] + .filter((element) => { + const rect = element.getBoundingClientRect() + return rect.bottom > view.top && rect.top < view.bottom + }) + .map((element) => element.dataset.messageId!) + const spacer = root.querySelector('[data-timeline-row="bottom-spacer"]')?.getBoundingClientRect() + state.samples.push({ + observedAtMs, + root: id(root), + scrollTop: root.scrollTop, + scrollHeight: root.scrollHeight, + bottomErrorPx: spacer ? spacer.bottom - view.bottom : undefined, + last: messages.includes(last), + rows, + mounted: root.querySelectorAll("[data-timeline-key]").length, + center: document + .elementFromPoint(view.left + view.width / 2, view.top + view.height / 2) + ?.textContent?.slice(0, 80), + destination: messages.filter((messageID) => destinationIDs.has(messageID)), + source: messages.filter((messageID) => sourceIDs.has(messageID)), + }) + } else { + state.samples.push({ + observedAtMs, + root: undefined, + scrollTop: 0, + scrollHeight: 0, + bottomErrorPx: undefined, + last: false, + rows: [], + mounted: 0, + center: document.elementFromPoint(innerWidth / 2, innerHeight / 2)?.textContent?.slice(0, 80), + destination: [], + source: [], + }) + } + requestAnimationFrame(sample) + }, 0) + } + document.addEventListener( + "click", + (event) => { + const link = event.target instanceof Element ? event.target.closest("a") : undefined + if (link?.getAttribute("href") !== targetHref) return + state.startedAtPerformanceMs = performance.now() + state.running = true + requestAnimationFrame(sample) + }, + { capture: true, once: true }, + ) + ;(window as Window & { __cachedFlash?: CachedRepaintTrace }).__cachedFlash = state + }, input) +} + +export function layoutShiftSample(entry: Pick & { value: number }, started: number) { + if (entry.startTime < started) return + return { occurredAtMs: entry.startTime - started, value: entry.value } +} + +export async function waitForCachedRepaintWindow(page: Page, durationMs: number) { + await page.waitForFunction((durationMs) => { + const state = (window as Window & { __cachedFlash?: CachedRepaintTrace }).__cachedFlash + return !!state?.running && performance.now() - state.startedAtPerformanceMs >= durationMs + }, durationMs) +} + +export async function collectCachedRepaintTrace(page: Page) { + return page.evaluate(() => { + const state = (window as Window & { __cachedFlash?: CachedRepaintTrace }).__cachedFlash! + state.stop() + return state + }) +} + +export function summarizeCachedRepaintTrace(trace: CachedRepaintTrace) { + const roots = trace.samples.map((sample) => sample.root) + const bottomErrors = trace.samples.flatMap((sample) => + sample.bottomErrorPx === undefined ? [] : [Math.abs(sample.bottomErrorPx)], + ) + const category = (sample: CachedRepaintTrace["samples"][number]) => { + if (sample.source.length) return "source" + if (sample.root === undefined || sample.rows.length === 0) return "blank" + if (!sample.destination.length) return "unknown" + if (sample.last && Math.abs(sample.bottomErrorPx ?? Infinity) <= 1) return "correct" + return "wrongDestination" + } + return { + samples: trace.samples.length, + durationMs: trace.samples.at(-1)?.observedAtMs ?? 0, + firstSampleObservedMs: trace.samples[0]?.observedAtMs, + firstSampleCorrect: trace.samples[0] ? category(trace.samples[0]) === "correct" : false, + blankSamples: trace.samples.filter((sample) => category(sample) === "blank").length, + sourceSamples: trace.samples.filter((sample) => category(sample) === "source").length, + wrongDestinationSamples: trace.samples.filter((sample) => category(sample) === "wrongDestination").length, + unknownSamples: trace.samples.filter((sample) => category(sample) === "unknown").length, + rootChanges: roots.slice(1).filter((root, index) => root !== roots[index]).length, + mountedMin: trace.samples.length ? Math.min(...trace.samples.map((sample) => sample.mounted)) : 0, + mountedMax: Math.max(...trace.samples.map((sample) => sample.mounted)), + maxBottomErrorPx: Math.max(0, ...bottomErrors), + mutationBatches: trace.mutations.length, + addedNodes: trace.mutations.reduce( + (sum, batch) => sum + batch.changed.filter((change) => change.type === "add").length, + 0, + ), + removedNodes: trace.mutations.reduce( + (sum, batch) => sum + batch.changed.filter((change) => change.type === "remove").length, + 0, + ), + layoutShiftValueSum: trace.shifts.reduce((sum, shift) => sum + shift.value, 0), + maxLayoutShiftValue: Math.max(0, ...trace.shifts.map((shift) => shift.value)), + } +} + +export function compressCachedRepaintTrace(trace: CachedRepaintTrace) { + const samples: { + observedAtMs: number[] + state: Omit + }[] = [] + for (const sample of trace.samples) { + const { observedAtMs, ...state } = sample + const previous = samples.at(-1) + if (previous && JSON.stringify(previous.state) === JSON.stringify(state)) { + previous.observedAtMs.push(observedAtMs) + continue + } + samples.push({ observedAtMs: [observedAtMs], state }) + } + return { + timeOriginEpochMs: trace.timeOriginEpochMs, + startedAtPerformanceMs: trace.startedAtPerformanceMs, + windowMs: trace.windowMs, + summary: summarizeCachedRepaintTrace(trace), + samples, + mutations: trace.mutations, + shifts: trace.shifts, + } +} diff --git a/packages/app/e2e/performance/timeline/session-tab-switch-benchmark.spec.ts b/packages/app/e2e/performance/timeline/session-tab-switch-benchmark.spec.ts new file mode 100644 index 000000000..2e80d7038 --- /dev/null +++ b/packages/app/e2e/performance/timeline/session-tab-switch-benchmark.spec.ts @@ -0,0 +1,79 @@ +import type { Page } from "@playwright/test" +import { expectSessionTitle } from "../../utils/waits" +import { benchmark, expect, withBenchmarkPage } from "../benchmark" +import { fixture } from "./session-timeline-stress.fixture" +import { installStressSessionTabs, mockStressTimeline, stressSessionHref } from "./timeline-test-helpers" +import { measureSessionSwitch, waitForStableTimeline } from "./session-tab-switch-probe" + +type Result = Awaited> + +benchmark("benchmarks cold and hot session tab switching", async ({ browser, report }, testInfo) => { + benchmark.setTimeout(180_000) + const results = { cold: [] as Result[], hot: [] as Result[] } + for (const mode of ["cold", "hot"] as const) { + for (let run = 0; run < 5; run++) { + results[mode].push( + await withBenchmarkPage(browser, `session-tab-switch-${mode}-${run}`, (page) => trial(page, mode), testInfo), + ) + } + } + report({ results, summary: summarize(results) }) +}) + +async function trial(page: Page, mode: "cold" | "hot") { + await mockStressTimeline(page) + await installStressSessionTabs(page) + if (mode === "hot") { + await page.goto(stressSessionHref(fixture.targetID)) + await expectSessionTitle(page, fixture.expected.targetTitle) + await waitForStableTimeline(page, fixture.expected.targetMessageIDs.at(-1)!) + await switchSession(page, fixture.sourceID, fixture.expected.sourceTitle) + } else { + await page.goto(stressSessionHref(fixture.sourceID)) + await expectSessionTitle(page, fixture.expected.sourceTitle) + } + await waitForStableTimeline(page, fixture.expected.sourceMessageIDs.at(-1)!) + + const destinationIDs = fixture.messages[fixture.targetID].map((message) => message.info.id) + const sourceIDs = fixture.messages[fixture.sourceID].map((message) => message.info.id) + const lastID = fixture.expected.targetMessageIDs.at(-1)! + const href = stressSessionHref(fixture.targetID) + const result = await measureSessionSwitch(page, { + destinationIDs, + sourceIDs, + lastID, + href, + switch: () => switchSession(page, fixture.targetID, fixture.expected.targetTitle), + }) + return result +} + +function summarize(results: Record<"cold" | "hot", Result[]>) { + const stats = (values: (number | null)[]) => { + const sorted = values.filter((value): value is number => value !== null).sort((a, b) => a - b) + return { + min: sorted[0] ?? null, + median: sorted[Math.floor(sorted.length / 2)] ?? null, + max: sorted.at(-1) ?? null, + missing: values.length - sorted.length, + } + } + return Object.fromEntries( + Object.entries(results).map(([mode, values]) => [ + mode, + { + firstDestinationObservedMs: stats(values.map((value) => value.firstDestinationObservedMs)), + firstCorrectObservedMs: stats(values.map((value) => value.firstCorrectObservedMs)), + stableObservedMs: stats(values.map((value) => value.stableObservedMs)), + }, + ]), + ) +} + +async function switchSession(page: Page, sessionID: string, title: string) { + const href = stressSessionHref(sessionID) + const tab = page.locator(`[data-slot="titlebar-tabs"] a[href="${href}"]`).first() + await expect(tab).toBeVisible() + await tab.click() + await expectSessionTitle(page, title) +} diff --git a/packages/app/e2e/performance/timeline/session-tab-switch-metrics.ts b/packages/app/e2e/performance/timeline/session-tab-switch-metrics.ts new file mode 100644 index 000000000..e315c2ad4 --- /dev/null +++ b/packages/app/e2e/performance/timeline/session-tab-switch-metrics.ts @@ -0,0 +1,46 @@ +export type SessionSwitchSample = { + observedAtMs: number + destination: string[] + source: string[] + hasVisibleRows: boolean + last: boolean + bottomErrorPx?: number +} + +export function classifySessionSwitch(samples: SessionSwitchSample[]) { + const firstDestination = samples.findIndex((sample) => sample.destination.length > 0) + const firstCorrect = samples.findIndex(isCorrectDestination) + const stable = samples.findIndex((_, index) => isStableSessionSwitch(samples.slice(index, index + 3))) + return { + firstDestinationObservedMs: samples[firstDestination]?.observedAtMs ?? null, + firstCorrectObservedMs: samples[firstCorrect]?.observedAtMs ?? null, + stableObservedMs: samples[stable + 2]?.observedAtMs ?? null, + wrongDestinationSamples: samples + .slice(firstDestination) + .filter((sample) => sample.destination.length > 0 && !sample.last).length, + blankSamples: samples.filter((sample) => !sample.hasVisibleRows).length, + unknownSamples: samples.filter( + (sample) => sample.hasVisibleRows && sample.destination.length === 0 && sample.source.length === 0, + ).length, + sourceSamples: samples.filter((sample) => sample.source.length > 0).length, + } +} + +export function isCorrectDestination(sample: SessionSwitchSample) { + return ( + sample.destination.length > 0 && + sample.source.length === 0 && + sample.last && + Math.abs(sample.bottomErrorPx ?? Infinity) <= 1 + ) +} + +export function isStableSessionSwitch(samples: SessionSwitchSample[]) { + return samples.length === 3 && samples.every(isCorrectDestination) +} + +export function isStableDestination(samples: Pick[]) { + return ( + samples.length === 3 && samples.every((sample) => sample.last && Math.abs(sample.bottomErrorPx ?? Infinity) <= 1) + ) +} diff --git a/packages/app/e2e/performance/timeline/session-tab-switch-probe.ts b/packages/app/e2e/performance/timeline/session-tab-switch-probe.ts new file mode 100644 index 000000000..14f9d2d00 --- /dev/null +++ b/packages/app/e2e/performance/timeline/session-tab-switch-probe.ts @@ -0,0 +1,152 @@ +import { expect, type Page } from "@playwright/test" +import { classifySessionSwitch, isStableDestination, type SessionSwitchSample } from "./session-tab-switch-metrics" + +type SessionSwitchProbe = { + samples: SessionSwitchSample[] + stop: () => void +} + +async function installSessionSwitchProbe( + page: Page, + input: { destinationIDs: string[]; sourceIDs: string[]; lastID: string; href: string }, +) { + await page.evaluate(({ destinationIDs, sourceIDs, lastID, href }) => { + const destination = new Set(destinationIDs) + const source = new Set(sourceIDs) + const samples: SessionSwitchSample[] = [] + let started: number | undefined + let running = true + const sample = () => { + if (!running || started === undefined) return + setTimeout(() => { + if (!running || started === undefined) return + const observedAtMs = performance.now() - started + const root = [...document.querySelectorAll(".scroll-view__viewport")].find((element) => + element.querySelector("[data-timeline-row]"), + ) + if (root) { + const view = root.getBoundingClientRect() + const visible = [...root.querySelectorAll("[data-message-id]")] + .filter((element) => { + const rect = element.getBoundingClientRect() + return rect.bottom > view.top && rect.top < view.bottom + }) + .map((element) => element.dataset.messageId!) + const hasVisibleRows = [...root.querySelectorAll("[data-timeline-key]")].some((element) => { + const rect = element.getBoundingClientRect() + return rect.bottom > view.top && rect.top < view.bottom + }) + const spacer = root.querySelector('[data-timeline-row="bottom-spacer"]')?.getBoundingClientRect() + samples.push({ + observedAtMs, + destination: visible.filter((id) => destination.has(id)), + source: visible.filter((id) => source.has(id)), + hasVisibleRows, + last: visible.includes(lastID), + bottomErrorPx: spacer ? spacer.bottom - view.bottom : undefined, + }) + } else { + samples.push({ observedAtMs, destination: [], source: [], hasVisibleRows: false, last: false }) + } + requestAnimationFrame(sample) + }, 0) + } + document.addEventListener( + "click", + (event) => { + const link = event.target instanceof Element ? event.target.closest("a") : undefined + if (link?.getAttribute("href") !== href) return + started = performance.now() + requestAnimationFrame(sample) + }, + { capture: true, once: true }, + ) + ;(window as Window & { __sessionSwitchProbe?: SessionSwitchProbe }).__sessionSwitchProbe = { + samples, + stop: () => { + running = false + }, + } + }, input) +} + +async function waitForStableSessionSwitch(page: Page) { + await page.waitForFunction(() => { + const samples = (window as Window & { __sessionSwitchProbe?: SessionSwitchProbe }).__sessionSwitchProbe?.samples + if (!samples) return false + return samples.some((_, index) => { + const stable = samples.slice(index, index + 3) + return ( + stable.length === 3 && + stable.every( + (sample) => + sample.destination.length > 0 && + sample.source.length === 0 && + sample.last && + Math.abs(sample.bottomErrorPx ?? Infinity) <= 1, + ) + ) + }) + }) +} + +async function collectSessionSwitchResult(page: Page) { + const samples = await page.evaluate(() => { + const probe = (window as Window & { __sessionSwitchProbe?: SessionSwitchProbe }).__sessionSwitchProbe! + probe.stop() + return probe.samples + }) + return classifySessionSwitch(samples) +} + +export async function measureSessionSwitch( + page: Page, + input: { destinationIDs: string[]; sourceIDs: string[]; lastID: string; href: string; switch: () => Promise }, +) { + const { switch: run, ...probe } = input + await installSessionSwitchProbe(page, probe) + await run() + await waitForStableSessionSwitch(page) + return collectSessionSwitchResult(page) +} + +export async function waitForStableTimeline(page: Page, lastID: string) { + const samples: Pick[] = [] + await expect + .poll( + async () => { + samples.push( + await page.evaluate( + (lastID) => + new Promise>((resolve) => { + requestAnimationFrame(() => + setTimeout(() => { + const root = [...document.querySelectorAll(".scroll-view__viewport")].find((element) => + element.querySelector("[data-timeline-row]"), + ) + if (!root) { + resolve({ last: false }) + return + } + const view = root.getBoundingClientRect() + const last = [...root.querySelectorAll("[data-message-id]")].some((element) => { + if (element.dataset.messageId !== lastID) return false + const rect = element.getBoundingClientRect() + return rect.bottom > view.top && rect.top < view.bottom + }) + const spacer = root + .querySelector('[data-timeline-row="bottom-spacer"]') + ?.getBoundingClientRect() + resolve({ last, bottomErrorPx: spacer ? spacer.bottom - view.bottom : undefined }) + }, 0), + ) + }), + lastID, + ), + ) + return isStableDestination(samples.slice(-3)) + }, + { timeout: 30_000, intervals: [0] }, + ) + .toBe(true) +} diff --git a/packages/app/e2e/performance/timeline/session-timeline-benchmark.fixture.ts b/packages/app/e2e/performance/timeline/session-timeline-benchmark.fixture.ts new file mode 100644 index 000000000..6353416d5 --- /dev/null +++ b/packages/app/e2e/performance/timeline/session-timeline-benchmark.fixture.ts @@ -0,0 +1,488 @@ +import { base64Encode } from "@opencode-ai/core/util/encode" +import type { Page } from "@playwright/test" +import { mockOpenCodeServer } from "../../utils/mock-server" +import { expectAppVisible, expectSessionTitle } from "../../utils/waits" +import { expect } from "../benchmark" + +const directory = "C:/OpenCode/TimelineStateRegression" +const projectID = "proj_timeline_state_regression" +const sessionID = "ses_timeline_state_regression" +const userMessageID = "msg_user_regression" +const assistantMessageID = "msg_assistant_regression" +const editPartID = "prt_0001_edit" +export const textPartID = "prt_9999_text" +const title = "Timeline collapse state regression" +const model = { providerID: "opencode", modelID: "claude-opus-4-6", variant: "max" } + +type EventPayload = { + directory: string + payload: Record +} + +const userMessage = { + info: { + id: userMessageID, + sessionID, + role: "user", + time: { created: 1700000000000 }, + summary: { diffs: [] }, + agent: "build", + model, + }, + parts: [ + { + id: "prt_user_text", + sessionID, + messageID: userMessageID, + type: "text", + text: "Please edit the file.", + }, + ], +} + +const editPart = { + id: editPartID, + sessionID, + messageID: assistantMessageID, + type: "tool", + callID: "call_edit_regression", + tool: "edit", + state: { + status: "completed", + input: { filePath: "src/regression.ts" }, + output: "Edited src/regression.ts", + title: "src/regression.ts", + metadata: { + filediff: { + file: "src/regression.ts", + additions: 1, + deletions: 1, + before: "export const value = 'before'\n", + after: "export const value = 'after'\n", + }, + diff: "diff --git a/src/regression.ts b/src/regression.ts\n-export const value = 'before'\n+export const value = 'after'\n", + }, + time: { start: 1700000001000, end: 1700000002000 }, + }, +} + +const streamedTextPart = { + id: textPartID, + sessionID, + messageID: assistantMessageID, + type: "text", + text: "Streaming added a later assistant text part.", +} + +const assistantMessage = { + info: { + id: assistantMessageID, + sessionID, + role: "assistant", + time: { created: 1700000001000 }, + parentID: userMessageID, + modelID: model.modelID, + providerID: model.providerID, + mode: "build", + agent: "build", + path: { cwd: directory, root: directory }, + cost: 0.01, + tokens: { input: 100, output: 200, reasoning: 0, cache: { read: 0, write: 0 } }, + variant: "max", + }, + parts: [editPart], +} + +export async function setupTimelineBenchmark(page: Page, options: { historyTurns: number; eventBatch: number }) { + const events: EventPayload[] = [] + let eventBatch = options.eventBatch + await mockOpenCodeServer(page, { + directory, + project: project(), + provider: provider(), + sessions: [session()], + pageMessages: () => ({ + items: [ + ...Array.from({ length: options.historyTurns }, (_, index) => performanceTurn(index)).flat(), + userMessage, + assistantMessage, + ], + }), + events: () => events.splice(0, eventBatch), + eventRetry: 16, + }) + await page.addInitScript(() => { + localStorage.setItem( + "settings.v3", + JSON.stringify({ + general: { + editToolPartsExpanded: true, + shellToolPartsExpanded: true, + showReasoningSummaries: true, + showSessionProgressBar: true, + }, + }), + ) + }) + await page.setViewportSize({ width: 1366, height: 768 }) + const scroller = page.locator(".scroll-view__viewport", { has: page.locator("[data-timeline-row]") }) + const text = page.locator(`[data-timeline-part-id="${textPartID}"]`).first() + await page.goto(`/${base64Encode(directory)}/session/${sessionID}`) + await expectSessionTitle(page, title) + await expectAppVisible(scroller) + return { + scroller, + text, + transport: { + enqueue(payload: EventPayload | EventPayload[]) { + events.push(...(Array.isArray(payload) ? payload : [payload])) + }, + pendingCount() { + return events.length + }, + releaseAll() { + eventBatch = events.length + }, + }, + async scrollToBottom() { + await scroller.evaluate((element) => { + element.scrollTop = element.scrollHeight + }) + }, + async waitForStableGeometry() { + await expect + .poll(() => scroller.evaluate((element) => element.scrollHeight - element.clientHeight - element.scrollTop)) + .toBeLessThanOrEqual(1) + await page.waitForFunction((partID) => { + const root = [...document.querySelectorAll(".scroll-view__viewport")].find((element) => + element.querySelector(`[data-timeline-part-id="${partID}"]`), + ) + if (!root) return false + return new Promise((resolve) => { + const height = root.scrollHeight + requestAnimationFrame(() => + requestAnimationFrame(() => + resolve(root.scrollHeight === height && root.scrollHeight - root.clientHeight - root.scrollTop <= 1), + ), + ) + }) + }, textPartID) + }, + } +} + +export function buildInitialStreamEvent(deltaCount: number): EventPayload { + return { + directory, + payload: { + type: "message.part.updated", + properties: { + part: { + ...streamedTextPart, + text: `Streaming${streamChunk(0, deltaCount + 1)}\n\n\`\`\`ts\nconst initial = true\n\`\`\``, + }, + }, + }, + } +} + +export function buildStreamDeltaEvents(deltaCount: number): EventPayload[] { + return Array.from({ length: deltaCount }, (_, index) => ({ + directory, + payload: { + type: "message.part.delta", + properties: { + messageID: assistantMessageID, + partID: textPartID, + field: "text", + delta: streamChunk(index + 1, deltaCount + 1), + }, + }, + })) +} + +function performanceTurn(index: number) { + const suffix = String(index).padStart(4, "0") + const userID = `msg_0000_${suffix}_a_user` + const assistantID = `msg_0000_${suffix}_b_assistant` + const before = historicalSource(index, false) + const after = historicalSource(index, true) + const parts = [ + ...(index % 5 === 0 + ? [ + { + id: `prt_0000_${suffix}_reasoning`, + sessionID, + messageID: assistantID, + type: "reasoning", + text: `Reviewing the existing implementation. ${"constraint analysis ".repeat(20)}`, + time: { start: 1690000001000 + index * 2_000, end: 1690000001200 + index * 2_000 }, + }, + ] + : []), + { + id: `prt_0000_${suffix}_assistant`, + sessionID, + messageID: assistantID, + type: "text", + text: historicalMarkdown(index), + }, + ...(index % 8 === 0 + ? [ + { + id: `prt_0000_${suffix}_edit`, + sessionID, + messageID: assistantID, + type: "tool", + callID: `call_0000_${suffix}_edit`, + tool: "edit", + state: { + status: "completed", + input: { filePath: `src/history-${index}.ts` }, + output: `Edited src/history-${index}.ts`, + title: `src/history-${index}.ts`, + metadata: { + filediff: { file: `src/history-${index}.ts`, additions: 48, deletions: 48, before, after }, + }, + time: { start: 1690000001200 + index * 2_000, end: 1690000001400 + index * 2_000 }, + }, + }, + ] + : []), + ...(index % 12 === 0 + ? [ + { + id: `prt_0000_${suffix}_write`, + sessionID, + messageID: assistantID, + type: "tool", + callID: `call_0000_${suffix}_write`, + tool: "write", + state: { + status: "completed", + input: { filePath: `src/generated-${index}.tsx`, content: after }, + output: `Wrote src/generated-${index}.tsx`, + title: `src/generated-${index}.tsx`, + metadata: { + filediff: { file: `src/generated-${index}.tsx`, additions: 32, deletions: 0, before: "", after }, + }, + time: { start: 1690000001400 + index * 2_000, end: 1690000001500 + index * 2_000 }, + }, + }, + ] + : []), + ...(index % 16 === 0 + ? [ + { + id: `prt_0000_${suffix}_patch`, + sessionID, + messageID: assistantID, + type: "tool", + callID: `call_0000_${suffix}_patch`, + tool: "apply_patch", + state: { + status: "completed", + input: { patchText: realisticPatch(index) }, + output: "Success. Updated src/components/SessionCard.tsx", + title: "src/components/SessionCard.tsx", + metadata: { + files: [ + { + filePath: "src/components/SessionCard.tsx", + relativePath: "src/components/SessionCard.tsx", + type: "update", + additions: 8, + deletions: 3, + patch: realisticPatch(index), + before, + after, + }, + ], + }, + time: { start: 1690000001500 + index * 2_000, end: 1690000001700 + index * 2_000 }, + }, + }, + ] + : []), + ] + return [ + { + info: { + id: userID, + sessionID, + role: "user", + time: { created: 1690000000000 + index * 2_000 }, + summary: { diffs: [] }, + agent: "build", + model, + }, + parts: [ + { + id: `prt_0000_${suffix}_user`, + sessionID, + messageID: userID, + type: "text", + text: `Historical prompt ${index}`, + }, + ], + }, + { + info: { + id: assistantID, + sessionID, + role: "assistant", + time: { created: 1690000001000 + index * 2_000, completed: 1690000001500 + index * 2_000 }, + parentID: userID, + modelID: model.modelID, + providerID: model.providerID, + mode: "build", + agent: "build", + path: { cwd: directory, root: directory }, + cost: 0.01, + tokens: { input: 100, output: 200, reasoning: 0, cache: { read: 0, write: 0 } }, + variant: "max", + finish: "stop", + }, + parts, + }, + ] +} + +function historicalMarkdown(index: number) { + const code = `import { For, Show, createSignal } from "solid-js" + +type SessionRow = { id: string; title: string; active: boolean } + +export function SessionList(props: { rows: SessionRow[] }) { + const [selected, setSelected] = createSignal() + return ( +
+ {(row) => ( + + )} +
+ ) +}` + return `## Session renderer review ${index} + +The active session keeps **semantic row identity** while reconciling measured content. See [Solid documentation](https://docs.solidjs.com/) and the inline \`measureElement(node)\` call. + +| Concern | Current behavior | Verification | +| --- | --- | --- | +| streaming | appends Markdown blocks | painted frames | +| geometry | anchors visible rows | DOM coordinates | +| tools | preserves expanded state | keyed remount probe | + +> Long sessions combine Markdown, syntax highlighting, tool output, and asynchronously rendered diffs. + +${index % 4 === 0 ? `\`\`\`tsx\n${code}\n\`\`\`\n\n\`\`\`bash\nbun typecheck\nbun test --preload ./happydom.ts ./src/pages/session\ngit diff --check\n\`\`\`` : "- preserve the viewport anchor\n- avoid replacing stable Markdown nodes\n- process provider deltas without blocking input"}` +} + +function historicalSource(index: number, updated: boolean) { + const method = updated ? "toLocaleUpperCase(props.locale)" : "toUpperCase()" + const limit = updated ? 24 : 20 + return `import { createMemo, For } from "solid-js" + +type Message = { + id: string + role: "user" | "assistant" + text: string + tokens: { input: number; output: number } +} + +export function MessageSummary(props: { messages: Message[]; locale: string }) { + const visible = createMemo(() => props.messages.filter((message) => message.text.trim()).slice(-${limit})) + const total = createMemo(() => visible().reduce((sum, message) => sum + message.tokens.output, 0)) + return ( +
+
{total().toLocaleString(props.locale)} output tokens
+ {(message) =>

{message.text.${method}}

}
+
+ ) +} +` +} + +function realisticPatch(index: number) { + return `*** Begin Patch +*** Update File: src/components/SessionCard.tsx +@@ +-const title = props.session.title.toUpperCase() +-const messages = props.messages.slice(-20) ++const title = props.session.title.toLocaleUpperCase(props.locale) ++const messages = props.messages.filter((message) => message.text.trim()).slice(-24) ++const outputTokens = messages.reduce((sum, message) => sum + message.tokens.output, 0) +@@ +-

{title}

++

{title}

++ {outputTokens.toLocaleString(props.locale)} output tokens +*** End Patch` +} + +export function streamChunk(index: number, count: number) { + if (index === 0) return `\n\n## Implementation plan\n\nStreaming **bold analysis` + if (index === count - 1) + return `\n\`\`\`\n\n## Verification\n\n- **Typecheck:** passed\n- **Timeline geometry:** stable\n- **Streaming output:** benchmark-complete ` + + const section = Math.floor(index / 18) + 1 + const fragments = [ + ` continues across three`, + ` or four word`, + ` provider deltas and`, + ` closes in this fragment**. \n\n`, + `| Concern | State`, + ` | Verification |\n|`, + ` --- | ---`, + ` | --- |\n|`, + ` markdown | incremental |`, + ` painted frames | \n\n`, + `\`\`\`tsx\nconst row: SessionRow`, + ` = rows[index] ??`, + ` fallback\nconst title =`, + ` row.title.toLocaleUpperCase(locale)\n`, + `const selected = createMemo(()`, + ` => row.id ===`, + ` activeID()) // stream-${index}\n`, + `// stream-${index}\n\`\`\`\n\n### Iteration ${section}\n\nStreaming **bold analysis`, + ] + return fragments[(index - 1) % fragments.length]! +} + +function project() { + return { + id: projectID, + worktree: directory, + vcs: "git", + name: "timeline-state-regression", + time: { created: 1700000000000, updated: 1700000000000 }, + sandboxes: [], + } +} + +function session() { + return { + id: sessionID, + slug: "timeline-state-regression", + projectID, + directory, + title, + version: "dev", + time: { created: 1700000000000, updated: 1700000000000 }, + } +} + +function provider() { + return { + all: [ + { + id: "opencode", + name: "OpenCode", + models: { "claude-opus-4-6": { id: "claude-opus-4-6", name: "Claude Opus 4.6", limit: { context: 200_000 } } }, + }, + ], + connected: ["opencode"], + default: { providerID: "opencode", modelID: "claude-opus-4-6" }, + } +} diff --git a/packages/app/e2e/performance/timeline/session-timeline-benchmark.spec.ts b/packages/app/e2e/performance/timeline/session-timeline-benchmark.spec.ts new file mode 100644 index 000000000..64d79283f --- /dev/null +++ b/packages/app/e2e/performance/timeline/session-timeline-benchmark.spec.ts @@ -0,0 +1,85 @@ +import { benchmark, benchmarkDiagnostics, expect } from "../benchmark" +import { + buildInitialStreamEvent, + buildStreamDeltaEvents, + setupTimelineBenchmark, + textPartID, +} from "./session-timeline-benchmark.fixture" +import { startTimelineProfile } from "./session-timeline-profile" +import { + collectTimelineStreamMetrics, + installTimelineStreamProbe, + startTimelineStreamProbe, +} from "./session-timeline-stream-probe" + +benchmark.describe("performance: session timeline streaming", () => { + benchmark("streams assistant text without remounting or oscillating", async ({ page, report }) => { + benchmark.setTimeout(480_000) + const cpuThrottle = Number(process.env.TIMELINE_CPU_THROTTLE ?? 30) + const deltaCount = Number(process.env.TIMELINE_DELTA_COUNT ?? 160) + const historyTurns = Number(process.env.TIMELINE_HISTORY_TURNS ?? 320) + const eventBatch = Number(process.env.TIMELINE_EVENT_BATCH ?? 1) + const minimal = process.env.TIMELINE_MINIMAL === "1" + const profileCPU = process.env.TIMELINE_CPU_PROFILE === "1" + const profileVisual = !minimal && profileCPU && process.env.TIMELINE_VISUAL_PROFILE !== "0" + const fixture = await setupTimelineBenchmark(page, { + historyTurns, + eventBatch, + }) + + fixture.transport.enqueue(buildInitialStreamEvent(deltaCount)) + const contentStart = performance.now() + await expect(fixture.text).toBeVisible() + await expect(fixture.text).toContainText("Implementation plan") + const initialContentObservedMs = performance.now() - contentStart + await fixture.scrollToBottom() + await fixture.waitForStableGeometry() + + const profile = await startTimelineProfile(page, { cpuThrottle, profileCPU }) + await installTimelineStreamProbe(page, { textPartID, finalIndex: deltaCount, profileVisual, minimal }) + const deltas = buildStreamDeltaEvents(deltaCount) + await startTimelineStreamProbe(page) + fixture.transport.enqueue(deltas) + + await page.waitForFunction( + (finalIndex) => + ( + window as Window & { + __timelineStreamBenchmark?: { applied: { index: number }[] } + } + ).__timelineStreamBenchmark?.applied.some((value) => value.index === finalIndex), + deltaCount, + { timeout: 420_000 }, + ) + await expect(fixture.text).toContainText("benchmark-complete") + await expect(fixture.text).toContainText("Streaming") + await fixture.waitForStableGeometry() + const metrics = await collectTimelineStreamMetrics(page, { + textPartID, + finalIndex: deltaCount, + navigations: benchmarkDiagnostics(page).navigations, + }) + const delivered = deltas.length - fixture.transport.pendingCount() + await profile.stop() + + report( + { + endToEndInitialContentObservedMs: initialContentObservedMs, + ...metrics, + deliveredDeltas: delivered, + pendingDeltas: fixture.transport.pendingCount(), + }, + { + cpuThrottle, + profileCPU, + profileVisual, + minimal, + queuedDeltas: deltas.length, + historyTurns, + eventBatch, + }, + ) + + await profile.reset() + }) +}) diff --git a/packages/app/e2e/performance/timeline/session-timeline-profile.ts b/packages/app/e2e/performance/timeline/session-timeline-profile.ts new file mode 100644 index 000000000..e1689498c --- /dev/null +++ b/packages/app/e2e/performance/timeline/session-timeline-profile.ts @@ -0,0 +1,40 @@ +import type { CDPSession, Page } from "@playwright/test" + +export async function startTimelineProfile(page: Page, options: { cpuThrottle: number; profileCPU: boolean }) { + const cdp = await page.context().newCDPSession(page) + if (options.cpuThrottle > 1) await cdp.send("Emulation.setCPUThrottlingRate", { rate: options.cpuThrottle }) + if (options.profileCPU) { + await cdp.send("Profiler.enable") + await cdp.send("Profiler.setSamplingInterval", { interval: 100 }) + await cdp.send("Profiler.start") + } + return { + async stop() { + if (!options.profileCPU) return + const result = await cdp.send("Profiler.stop") + const self = new Map() + result.profile.samples?.forEach((id, index) => { + const duration = (result.profile.timeDeltas?.[index] ?? 0) / 1_000 + self.set(id, (self.get(id) ?? 0) + duration) + }) + console.log( + "timeline cpu profile", + JSON.stringify( + result.profile.nodes + .map((node) => ({ + function: node.callFrame.functionName || "(anonymous)", + url: node.callFrame.url, + line: node.callFrame.lineNumber + 1, + selfMs: self.get(node.id) ?? 0, + })) + .filter((node) => node.selfMs > 1) + .sort((a, b) => b.selfMs - a.selfMs) + .slice(0, 40), + ), + ) + }, + async reset() { + if (options.cpuThrottle > 1) await cdp.send("Emulation.setCPUThrottlingRate", { rate: 1 }) + }, + } +} diff --git a/packages/app/e2e/performance/timeline/session-timeline-stream-probe.ts b/packages/app/e2e/performance/timeline/session-timeline-stream-probe.ts new file mode 100644 index 000000000..a3cd698cd --- /dev/null +++ b/packages/app/e2e/performance/timeline/session-timeline-stream-probe.ts @@ -0,0 +1,547 @@ +import type { Page } from "@playwright/test" + +const STREAM_MARKER_PATTERN = "stream-(\\d+)" +const STREAM_FRAGMENT_COUNT = 18 + +type TimelineProbeState = { + started: number + ended: number + profileVisual: boolean + minimal: boolean + frames: number[] + frameAt: number[] + applied: { at: number; index: number }[] + geometry: { + scrollTop: number + scrollHeight: number + clientHeight: number + distance: number + virtualHeight: number + headerHeight: number + }[] + blanks: number + longTasks: number[] + layoutShifts: number[] + visibleMounts: number + visibleUnmounts: number + visibleRows: Set + visibleSubtreeMounts: string[] + visibleSubtreeUnmounts: string[] + visibleSubtreeReplacements: number + visibleSubtreeDropouts: string[] + visibleSubtrees: Map + subtreeKeys: WeakMap + maxOverlap: number + maxGap: number + maxPartTopMovement: number + previousPartTop: number + slowFrames: { + duration: number + index: number + phase: "stream" | "boundary" | "complete" | "unknown" + tokenSpans: number + blocks: number + codeBlocks: number + height: number + distance: number + }[] + scroll: { + calls: number + callNoops: number + sameFrameCalls: number + assignments: number + assignmentNoops: number + lastCallFrame: number + frame: number + } + row: HTMLElement + markdown: HTMLElement + running: boolean + previous: number + cleanup: () => void + start: () => void +} + +export async function installTimelineStreamProbe( + page: Page, + options: { textPartID: string; finalIndex: number; profileVisual: boolean; minimal: boolean }, +) { + await page.evaluate( + ({ textPartID, finalIndex, profileVisual, minimal, markerPattern, fragmentCount }) => { + const part = document.querySelector(`[data-timeline-part-id="${textPartID}"]`) + const row = part?.closest("[data-timeline-row]") + const markdown = part?.querySelector('[data-component="markdown"]') + const root = part?.closest(".scroll-view__viewport") + if (!part || !row || !markdown || !root) throw new Error("missing streaming benchmark nodes") + const viewport = root.getBoundingClientRect() + const state: TimelineProbeState = { + started: 0, + ended: Infinity, + profileVisual, + minimal, + frames: [], + frameAt: [], + applied: [], + geometry: [], + blanks: 0, + longTasks: [], + layoutShifts: [], + visibleMounts: 0, + visibleUnmounts: 0, + visibleRows: new Set( + [...root.querySelectorAll("[data-timeline-key]")].filter((element) => { + const rect = element.getBoundingClientRect() + return rect.bottom > viewport.top && rect.top < viewport.bottom + }), + ), + visibleSubtreeMounts: [], + visibleSubtreeUnmounts: [], + visibleSubtreeReplacements: 0, + visibleSubtreeDropouts: [], + visibleSubtrees: new Map(), + subtreeKeys: new WeakMap(), + maxOverlap: 0, + maxGap: 0, + maxPartTopMovement: 0, + previousPartTop: part.getBoundingClientRect().top, + slowFrames: [], + scroll: { + calls: 0, + callNoops: 0, + sameFrameCalls: 0, + assignments: 0, + assignmentNoops: 0, + lastCallFrame: -1, + frame: 0, + }, + row, + markdown, + running: false, + previous: 0, + cleanup: () => {}, + start: () => {}, + } + ;(window as Window & { __timelineStreamBenchmark?: TimelineProbeState }).__timelineStreamBenchmark = state + const scrollTo = Element.prototype.scrollTo + const scrollTop = Object.getOwnPropertyDescriptor(Element.prototype, "scrollTop")! + if (profileVisual) { + Element.prototype.scrollTo = function (...args) { + state.scroll.calls += 1 + const top = typeof args[0] === "object" ? args[0]?.top : args[1] + if (typeof top === "number") { + const target = Math.min(top, this.scrollHeight - this.clientHeight) + if (Math.abs(this.scrollTop - target) < 1) state.scroll.callNoops += 1 + } + if (state.scroll.lastCallFrame === state.scroll.frame) state.scroll.sameFrameCalls += 1 + state.scroll.lastCallFrame = state.scroll.frame + return scrollTo.apply(this, args) + } + Object.defineProperty(Element.prototype, "scrollTop", { + configurable: true, + get: scrollTop.get, + set(value) { + state.scroll.assignments += 1 + if (Math.abs(this.scrollTop - value) < 1) state.scroll.assignmentNoops += 1 + scrollTop.set!.call(this, value) + }, + }) + } + + const recordLongTasks = (entries: PerformanceEntry[]) => { + if (!state.running) return + state.longTasks.push( + ...entries + .filter((entry) => entry.startTime >= state.started && entry.startTime <= state.ended) + .map((entry) => entry.duration), + ) + } + const longTaskObserver = new PerformanceObserver((list) => recordLongTasks(list.getEntries())) + longTaskObserver.observe({ type: "longtask" }) + const recordLayoutShifts = (entries: PerformanceEntry[]) => { + if (!state.running) return + state.layoutShifts.push( + ...entries + .map((entry) => { + const shift = entry as LayoutShiftEntry + if (shift.startTime < state.started || shift.hadRecentInput) return + return shift.value + }) + .filter((value): value is number => value !== undefined), + ) + } + const layoutShiftObserver = profileVisual + ? new PerformanceObserver((list) => recordLayoutShifts(list.getEntries())) + : undefined + layoutShiftObserver?.observe({ type: "layout-shift", buffered: true }) + + const visible = (element: Element) => { + const rect = element.getBoundingClientRect() + const viewport = root.getBoundingClientRect() + const style = getComputedStyle(element) + return ( + element.isConnected && + rect.width > 0 && + rect.height > 0 && + rect.bottom > viewport.top && + rect.top < viewport.bottom && + style.display !== "none" && + style.visibility !== "hidden" && + Number(style.opacity) > 0 + ) + } + const critical = [ + "[data-timeline-part-id]", + '[data-component="edit-content"]', + '[data-component="apply-patch-file-diff"]', + '[data-component="file"]', + '[data-component="markdown-code"]', + "[data-markdown-block]", + ].join(",") + const describe = (element: Element) => { + const cached = state.subtreeKeys.get(element) + if (!element.isConnected && cached) return cached + const part = element.closest("[data-timeline-part-id]")?.dataset.timelinePartId ?? "unknown" + const block = element + .closest("[data-markdown-key]") + ?.dataset.markdownKey?.replace(/:(?:code|full|live)$/, "") + const component = + element.getAttribute("data-component") ?? element.getAttribute("data-markdown-block") ?? element.tagName + const key = `${part}:${block ?? "root"}:${component}` + state.subtreeKeys.set(element, key) + return key + } + const recordMutations = (records: MutationRecord[]) => { + if (!state.running) return + records.forEach((record) => { + record.addedNodes.forEach((node) => { + if (node instanceof HTMLElement && node.matches("[data-timeline-key]") && visible(node)) { + state.visibleMounts += 1 + state.visibleRows.add(node) + } + if (!(node instanceof Element)) return + const added = [node, ...node.querySelectorAll(critical)].filter((element) => element.matches(critical)) + added.forEach((element) => { + if (visible(element)) state.visibleSubtreeMounts.push(describe(element)) + }) + }) + record.removedNodes.forEach((node) => { + if (node instanceof HTMLElement && node.matches("[data-timeline-key]") && state.visibleRows.delete(node)) + state.visibleUnmounts += 1 + if (!(node instanceof Element)) return + const removed = [node, ...node.querySelectorAll(critical)].filter((element) => element.matches(critical)) + removed.forEach((element) => { + const key = describe(element) + if (state.visibleSubtrees.get(key) === element) state.visibleSubtreeUnmounts.push(key) + }) + }) + }) + } + const mutationObserver = profileVisual ? new MutationObserver(recordMutations) : undefined + mutationObserver?.observe(root, { childList: true, subtree: true }) + const currentPart = () => root.querySelector(`[data-timeline-part-id="${textPartID}"]`) + const observeProgress = (at: number) => { + if (!state.running) return + const content = currentPart()?.textContent ?? "" + const index = content.includes("benchmark-complete") + ? finalIndex + : Number(content.match(new RegExp(markerPattern, "g"))?.at(-1)?.match(/\d+/)?.[0] ?? -1) + if (index >= 0 && index !== state.applied.at(-1)?.index) state.applied.push({ at, index }) + } + const progressObserver = new MutationObserver(() => observeProgress(performance.now())) + progressObserver.observe(root, { characterData: true, childList: true, subtree: true }) + state.cleanup = () => { + recordLongTasks(longTaskObserver.takeRecords()) + recordLayoutShifts(layoutShiftObserver?.takeRecords() ?? []) + recordMutations(mutationObserver?.takeRecords() ?? []) + if (progressObserver.takeRecords().length) observeProgress(performance.now()) + longTaskObserver.disconnect() + layoutShiftObserver?.disconnect() + mutationObserver?.disconnect() + progressObserver.disconnect() + if (!profileVisual) return + Element.prototype.scrollTo = scrollTo + Object.defineProperty(Element.prototype, "scrollTop", scrollTop) + } + + const sample = (now: number) => { + if (!state.running) return + state.frameAt.push(now) + observeProgress(now) + if (minimal) { + state.frames.push(now - state.previous) + state.previous = now + requestAnimationFrame(sample) + return + } + setTimeout(() => { + if (!state.running) return + state.scroll.frame += 1 + const duration = now - state.previous + state.frames.push(duration) + state.previous = now + const virtualRoot = root.querySelector("[data-timeline-virtual-content]") + const header = root.querySelector("[data-session-title]") + state.geometry.push({ + scrollTop: root.scrollTop, + scrollHeight: root.scrollHeight, + clientHeight: root.clientHeight, + distance: root.scrollHeight - root.clientHeight - root.scrollTop, + virtualHeight: virtualRoot?.getBoundingClientRect().height ?? 0, + headerHeight: header?.getBoundingClientRect().height ?? 0, + }) + const viewport = root.getBoundingClientRect() + if (profileVisual) { + const visibleRows = [...root.querySelectorAll("[data-timeline-key]")] + .map((element) => ({ element, rect: element.getBoundingClientRect() })) + .filter((item) => item.rect.bottom > viewport.top && item.rect.top < viewport.bottom) + .sort((a, b) => a.rect.top - b.rect.top) + state.visibleRows = new Set(visibleRows.map((item) => item.element)) + const rows = visibleRows.map((item) => item.rect) + rows.slice(1).forEach((rect, index) => { + const previous = rows[index]! + state.maxOverlap = Math.max(state.maxOverlap, previous.bottom - rect.top) + state.maxGap = Math.max(state.maxGap, rect.top - previous.bottom) + }) + const partTop = part.getBoundingClientRect().top + state.maxPartTopMovement = Math.max(state.maxPartTopMovement, Math.abs(partTop - state.previousPartTop)) + state.previousPartTop = partTop + } + const visibleRow = [...root.querySelectorAll("[data-timeline-row]")].some((element) => { + const rect = element.getBoundingClientRect() + return rect.bottom > viewport.top && rect.top < viewport.bottom + }) + if (!visibleRow) state.blanks += 1 + if (profileVisual) { + const subtrees = new Map() + const visibleSubtrees = new Map() + root.querySelectorAll(critical).forEach((element) => { + const key = describe(element) + const rect = element.getBoundingClientRect() + const style = getComputedStyle(element) + const rendered = + element.isConnected && + rect.width > 0 && + rect.height > 0 && + style.display !== "none" && + style.visibility !== "hidden" && + Number(style.opacity) > 0 + subtrees.set(key, { element, rendered }) + if (rendered && rect.bottom > viewport.top && rect.top < viewport.bottom) { + const previous = state.visibleSubtrees.get(key) + if (previous && previous !== element && key.startsWith(`${textPartID}:`)) + state.visibleSubtreeReplacements += 1 + visibleSubtrees.set(key, element) + } + }) + state.visibleSubtrees.forEach((element, key) => { + const current = subtrees.get(key) + if (key.startsWith(`${textPartID}:`) && !current?.rendered) { + const markdown = part.querySelector('[data-component="markdown"]') + state.visibleSubtreeDropouts.push( + `${key}:projection=${markdown?.dataset.markdownProjectionLength}/${markdown?.dataset.markdownProjectionBlocks}:result=${markdown?.dataset.markdownResultLength}/${markdown?.dataset.markdownResultBlocks}:applied=${markdown?.dataset.markdownAppliedBlocks}:dom=${markdown?.children.length}`, + ) + } + if (element.matches('[data-component="file"]')) { + const hadLines = element.hasAttribute("data-profiler-had-lines") + const hasLines = element.shadowRoot?.querySelector("[data-line]") != null + if (hasLines) element.setAttribute("data-profiler-had-lines", "") + if (hadLines && !hasLines) state.visibleSubtreeDropouts.push(`${key}:shadow-lines`) + } + }) + state.visibleSubtrees = visibleSubtrees + } + if (profileVisual && duration > 33.34) { + const livePart = currentPart() + const content = livePart?.textContent ?? "" + const complete = content.includes("benchmark-complete") + const index = complete + ? finalIndex + : Number(content.match(new RegExp(markerPattern, "g"))?.at(-1)?.match(/\d+/)?.[0] ?? -1) + state.slowFrames.push({ + duration, + index, + phase: complete + ? "complete" + : index >= 0 && index % fragmentCount === 0 + ? "boundary" + : index >= 0 + ? "stream" + : "unknown", + tokenSpans: livePart?.querySelectorAll(".shiki span").length ?? 0, + blocks: livePart?.querySelectorAll("[data-markdown-block]").length ?? 0, + codeBlocks: livePart?.querySelectorAll('[data-component="markdown-code"]').length ?? 0, + height: livePart?.getBoundingClientRect().height ?? 0, + distance: root.scrollHeight - root.clientHeight - root.scrollTop, + }) + } + requestAnimationFrame(sample) + }, 0) + } + state.start = () => { + state.started = performance.now() + state.previous = state.started + state.running = true + requestAnimationFrame(sample) + } + }, + { ...options, markerPattern: STREAM_MARKER_PATTERN, fragmentCount: STREAM_FRAGMENT_COUNT }, + ) +} + +export function startTimelineStreamProbe(page: Page) { + return page.evaluate(() => { + const state = (window as Window & { __timelineStreamBenchmark?: TimelineProbeState }).__timelineStreamBenchmark + if (!state) throw new Error("missing streaming benchmark state") + state.start() + }) +} + +type LayoutShiftEntry = PerformanceEntry & { value: number; hadRecentInput?: boolean } + +export function layoutShiftValue( + entry: Pick, + start: number, +) { + if (entry.startTime < start || entry.hadRecentInput) return + return entry.value +} + +export function removeVisibleRow(visible: Set, row: T) { + return visible.delete(row) +} + +export function streamProgress(content: string) { + const index = Number(content.match(new RegExp(STREAM_MARKER_PATTERN, "g"))?.at(-1)?.match(/\d+/)?.[0] ?? -1) + return { + index, + phase: content.includes("benchmark-complete") + ? ("complete" as const) + : index >= 0 && index % STREAM_FRAGMENT_COUNT === 0 + ? ("boundary" as const) + : index >= 0 + ? ("stream" as const) + : ("unknown" as const), + } +} + +export async function collectTimelineStreamMetrics( + page: Page, + options: { textPartID: string; finalIndex: number; navigations: string[] }, +) { + return page.evaluate(({ textPartID, finalIndex, navigations }) => { + const state = (window as Window & { __timelineStreamBenchmark?: TimelineProbeState }).__timelineStreamBenchmark + if (!state) throw new Error(`missing streaming benchmark state after navigation: ${JSON.stringify(navigations)}`) + state.ended = performance.now() + state.cleanup() + state.running = false + const part = document.querySelector(`[data-timeline-part-id="${textPartID}"]`) + const row = part?.closest("[data-timeline-row]") + const markdown = part?.querySelector('[data-component="markdown"]') + const sorted = state.frames.slice().sort((a, b) => a - b) + const duration = state.frames.reduce((sum, value) => sum + value, 0) + const longestSlowStreak = state.frames.reduce( + (result, value) => { + const current = value > 33.34 ? result.current + 1 : 0 + return { current, longest: Math.max(result.longest, current) } + }, + { current: 0, longest: 0 }, + ).longest + const busyStart = state.applied.at(0)?.at + const completion = state.applied.find((value) => value.index === finalIndex) + const busyEnd = completion?.at + const busyFrames = + busyStart === undefined || busyEnd === undefined + ? [] + : state.frames.filter((_, index) => state.frameAt[index]! >= busyStart && state.frameAt[index]! <= busyEnd) + const busySorted = busyFrames.slice().sort((a, b) => a - b) + const busyDuration = busyFrames.reduce((sum, value) => sum + value, 0) + const completionObservedMs = (completion?.at ?? NaN) - state.started + const visual = state.profileVisual + ? { + layoutShiftValueSum: state.layoutShifts.reduce((sum, value) => sum + value, 0), + maxLayoutShiftValue: Math.max(0, ...state.layoutShifts), + visibleMounts: state.visibleMounts, + visibleUnmounts: state.visibleUnmounts, + visibleSubtreeMounts: state.visibleSubtreeMounts, + visibleSubtreeUnmounts: [...new Set(state.visibleSubtreeUnmounts)], + visibleSubtreeReplacements: state.visibleSubtreeReplacements, + visibleSubtreeDropouts: [...new Set(state.visibleSubtreeDropouts)], + maxOverlapPx: state.maxOverlap, + maxGapPx: state.maxGap, + maxPartTopMovementPx: state.maxPartTopMovement, + slowestRafGaps: state.slowFrames + .sort((a, b) => b.duration - a.duration) + .slice(0, 20) + .map((frame) => ({ + durationMs: frame.duration, + index: frame.index, + phase: frame.phase, + tokenSpans: frame.tokenSpans, + blocks: frame.blocks, + codeBlocks: frame.codeBlocks, + heightPx: frame.height, + distancePx: frame.distance, + })), + slowRafGapPhases: Object.fromEntries( + ["stream", "boundary", "complete", "unknown"].map((phase) => { + const frames = state.slowFrames.filter((frame) => frame.phase === phase) + return [ + phase, + { + count: frames.length, + totalMs: frames.reduce((sum, frame) => sum + frame.duration, 0), + maxMs: Math.max(0, ...frames.map((frame) => frame.duration)), + }, + ] + }), + ), + scroll: state.scroll, + } + : null + const geometry = state.minimal + ? null + : { + maxDistancePx: Math.max(0, ...state.geometry.map((sample) => sample.distance)), + finalDistancePx: state.geometry.at(-1)?.distance ?? 0, + final: state.geometry.at(-1), + distanceTransitionsPx: state.geometry + .map((sample) => Math.round(sample.distance)) + .filter((value, index, values) => index === 0 || value !== values[index - 1]), + bottomDriftTransitions: state.geometry.slice(1).filter((value, index) => { + const previous = state.geometry[index]?.distance ?? 0 + return previous <= 1 && value.distance > 1 + }).length, + blankSamples: state.blanks, + } + return { + capabilities: { visual: state.profileVisual, geometry: !state.minimal }, + completionObservedMs, + deltasPerSecond: Number.isFinite(completionObservedMs) ? finalIndex / (completionObservedMs / 1_000) : null, + rafGapSamples: state.frames.length, + rafCallbackRate: duration ? (state.frames.length * 1000) / duration : 0, + observedProgressWindowRafCallbackRate: busyDuration ? (busyFrames.length * 1000) / busyDuration : null, + observedProgressWindowRafGapP95Ms: busySorted[Math.floor(busySorted.length * 0.95)] ?? null, + observedProgressWindowRafGaps: busyFrames.length, + maxObservedProgressIndex: Math.max(-1, ...state.applied.map((value) => value.index)), + observedProgressTransitions: state.applied.length, + rafGapP50Ms: sorted[Math.floor(sorted.length * 0.5)] ?? 0, + rafGapP95Ms: sorted[Math.floor(sorted.length * 0.95)] ?? 0, + rafGapP99Ms: sorted[Math.floor(sorted.length * 0.99)] ?? 0, + maxRafGapMs: sorted.at(-1) ?? 0, + rafGapsOver33Ms: state.frames.filter((value) => value > 33.34).length, + rafGapsOver50Ms: state.frames.filter((value) => value > 50).length, + missedFrameBudgetEquivalents: state.frames.reduce( + (sum, value) => sum + Math.max(0, Math.round(value / 16.67) - 1), + 0, + ), + longestRafGapOver33MsStreak: longestSlowStreak, + longTaskCount: state.longTasks.length, + longTaskTimeMs: state.longTasks.reduce((sum, value) => sum + value, 0), + visual, + geometry, + rowReplaced: row !== state.row, + markdownReplaced: markdown !== state.markdown, + domTextCharacters: part?.textContent?.length ?? 0, + } + }, options) +} diff --git a/packages/app/e2e/performance/timeline/session-timeline-stress.fixture.ts b/packages/app/e2e/performance/timeline/session-timeline-stress.fixture.ts new file mode 100644 index 000000000..e5c353e4c --- /dev/null +++ b/packages/app/e2e/performance/timeline/session-timeline-stress.fixture.ts @@ -0,0 +1,335 @@ +const words = [ + "alpha", + "bravo", + "charlie", + "delta", + "echo", + "foxtrot", + "golf", + "hotel", + "india", + "juliet", + "kilo", + "lima", + "metro", + "nova", + "orbit", + "pixel", + "quartz", + "river", + "signal", + "vector", +] + +const sourceID = "ses_smoke_source" +const targetID = "ses_smoke_target" +const directory = "C:/OpenCode/SmokeProject" +const projectID = "proj_smoke_timeline" +const model = { providerID: "opencode", modelID: "claude-opus-4-6", variant: "max" } + +type MessageInfo = Record & { id: string; role: "user" | "assistant" } +type MessagePart = Record & { id: string; type: string; text?: string; tool?: string } +type Message = { info: MessageInfo; parts: MessagePart[] } + +function lorem(seed: number, length: number) { + let out = "" + let i = seed + while (out.length < length) { + const word = words[i % words.length] + out += (out ? " " : "") + word + if (i % 17 === 0) out += ".\n\n" + i += 7 + } + return out.slice(0, length) +} + +function id(prefix: string, value: number) { + return `${prefix}_smoke_${String(value).padStart(4, "0")}` +} + +function userMessage(sessionID: string, index: number, textLength: number, diffs: unknown[] = []): Message { + const messageID = id("msg_user", index) + return { + info: { + id: messageID, + sessionID, + role: "user", + time: { created: 1700000000000 + index * 10_000 }, + summary: { diffs }, + agent: "build", + model, + }, + parts: [ + { + id: id("prt_user_text", index), + sessionID, + messageID, + type: "text", + text: lorem(index, textLength), + }, + ], + } +} + +function assistantMessage(sessionID: string, index: number, parentID: string, parts: MessagePart[]): Message { + const messageID = id("msg_assistant", index) + return { + info: { + id: messageID, + sessionID, + role: "assistant", + time: { created: 1700000000000 + index * 10_000 + 1_000, completed: 1700000000000 + index * 10_000 + 8_000 }, + parentID, + modelID: model.modelID, + providerID: model.providerID, + mode: "build", + agent: "build", + path: { cwd: directory, root: directory }, + cost: 0.01, + tokens: { input: 100, output: 200, reasoning: 0, cache: { read: 0, write: 0 } }, + variant: "max", + finish: "stop", + }, + parts: parts.map((part) => ({ + ...part, + sessionID, + messageID, + })), + } +} + +function textPart(index: number, partIndex: number, length: number): MessagePart { + const prose = lorem(index * 13 + partIndex, length) + const text = + index % 12 === 0 + ? `${prose}\n\n\`\`\`ts\n${code(index, 80)}\n\`\`\`` + : index % 5 === 0 + ? `${prose}\n\n\`\`\`ts\nexport const value = "${lorem(index, 220)}"\n\`\`\`` + : index % 7 === 0 + ? `${prose}\n\nThe wrapped inline value is \`${lorem(index, 180)}\`.` + : prose + return { id: id(`prt_text_${partIndex}`, index), type: "text", text } +} + +function reasoningPart(index: number, partIndex: number, length: number): MessagePart { + return { + id: id(`prt_reasoning_${partIndex}`, index), + type: "reasoning", + text: lorem(index * 19 + partIndex, length), + time: { start: 1700000000000 + index * 10_000, end: 1700000000000 + index * 10_000 + 500 }, + } +} + +function toolPart( + index: number, + partIndex: number, + tool: string, + input: Record, + outputLength = 160, +): MessagePart { + const metadata = + tool === "apply_patch" + ? { files: [patchFile(index, "update"), patchFile(index + 1, index % 2 === 0 ? "add" : "delete")] } + : tool === "edit" || tool === "write" + ? { + filediff: fileDiff(String(input.filePath ?? `src/generated/file-${index}.ts`), index), + diff: patch(index, outputLength), + preview: patch(index + 1, 420), + } + : tool === "question" + ? { answers: [["Proceed"], ["Keep sample output"]] } + : {} + return { + id: id(`prt_tool_${tool}_${partIndex}`, index), + type: "tool", + callID: id("call", index * 10 + partIndex), + tool, + state: { + status: "completed", + input, + output: lorem(index * 23 + partIndex, outputLength), + title: tool === "bash" ? "Verify generated output" : input.filePath || input.path || input.pattern || "completed", + metadata, + time: { start: 1700000000000 + index * 10_000, end: 1700000000000 + index * 10_000 + 400 }, + }, + } +} + +function patchFile(seed: number, type: "add" | "update" | "delete") { + return { + filePath: `src/generated/patch-${seed}.ts`, + relativePath: `src/generated/patch-${seed}.ts`, + type, + additions: (seed % 7) + 1, + deletions: type === "add" ? 0 : seed % 4, + patch: patch(seed, 520), + before: type === "add" ? undefined : code(seed, 18), + after: type === "delete" ? undefined : code(seed + 1, 24), + } +} + +function fileDiff(file: string, seed: number) { + const lines = seed % 12 === 0 ? 300 : seed % 8 === 0 ? 2 : 38 + const before = code(seed, lines, seed % 10 === 0 ? 280 : 32) + const after = + lines === 2 + ? before.replace("value1", "updatedValue1") + : lines === 300 + ? code(seed + 1, lines, seed % 10 === 0 ? 280 : 32) + : before.replace("value4", "updatedValue4").replace("value20", "updatedValue20") + return { + file, + additions: lines === 300 ? 300 : lines === 2 ? 1 : 2, + deletions: lines === 300 ? 300 : lines === 2 ? 1 : 2, + before, + after, + } +} + +function patch(seed: number, length: number) { + return `diff --git a/src/generated/file-${seed}.ts b/src/generated/file-${seed}.ts\n+${lorem(seed, length).replace(/\n/g, "\n+")}` +} + +function code(seed: number, lines: number, width = 32) { + return Array.from( + { length: lines }, + (_, index) => `export const value${index} = "${lorem(seed + index, width)}"`, + ).join("\n") +} + +function turn(index: number): Message[] { + const diff = index % 9 === 0 ? [fileDiff(`src/generated/summary-${index}.ts`, index)] : [] + const user = userMessage(targetID, index, 100 + (index % 4) * 80, diff) + const parts = [ + ...(index % 5 === 0 ? [reasoningPart(index, 0, 420)] : []), + ...(index % 3 === 0 + ? [ + toolPart(index, 0, "read", { filePath: `src/generated/file-${index}.ts`, offset: 0, limit: 80 }, 220), + toolPart(index, 5, "glob", { path: directory, pattern: `**/*sample-${index}*.ts` }, 140), + toolPart(index, 1, "grep", { path: directory, pattern: `sample-${index}`, include: "*.ts" }, 180), + toolPart(index, 6, "list", { path: `src/generated/${index}` }, 120), + ] + : []), + textPart(index, 2, 160 + (index % 6) * 90), + ...(index % 4 === 0 ? [toolPart(index, 3, "edit", { filePath: `src/generated/file-${index}.ts` }, 700)] : []), + ...(index % 6 === 0 + ? [toolPart(index, 7, "write", { filePath: `src/generated/write-${index}.ts`, content: code(index, 28) }, 560)] + : []), + ...(index % 8 === 0 + ? [toolPart(index, 8, "apply_patch", { files: [`src/generated/patch-${index}.ts`] }, 620)] + : []), + ...(index % 7 === 0 + ? [toolPart(index, 4, "bash", { command: "bun typecheck", description: "Verify generated output" }, 620)] + : []), + ...(index % 10 === 0 ? [toolPart(index, 9, "webfetch", { url: "https://example.com/docs/sample" }, 120)] : []), + ...(index % 11 === 0 ? [toolPart(index, 10, "websearch", { query: "sample movement notes" }, 240)] : []), + ...(index % 13 === 0 + ? [ + toolPart( + index, + 11, + "question", + { questions: [{ question: "Use generated fixture?" }, { question: "Keep same row shape?" }] }, + 120, + ), + ] + : []), + ...(index % 17 === 0 + ? [toolPart(index, 12, "task", { description: "Inspect generated fixture", subagent_type: "explore" }, 160)] + : []), + ] + return [user, assistantMessage(targetID, index, user.info.id, parts)] +} + +const targetMessages = Array.from({ length: 72 }, (_, index) => turn(index)).flat() +const sourceMessages = Array.from({ length: 12 }, (_, index) => [ + userMessage(sourceID, index + 1000, 120), + assistantMessage(sourceID, index + 1000, id("msg_user", index + 1000), [textPart(index + 1000, 0, 240)]), +]).flat() + +function renderable(part: MessagePart) { + if (part.type === "tool" && part.tool === "todowrite") return false + if (part.type === "text") return !!part.text.trim() + if (part.type === "reasoning") return !!part.text.trim() + return part.type !== "step-start" && part.type !== "step-finish" && part.type !== "patch" +} + +function orderedParts(message: Message) { + return message.parts.slice().sort((a, b) => a.id.localeCompare(b.id)) +} + +export const fixture = { + directory, + project: { + id: projectID, + worktree: directory, + vcs: "git", + name: "smoke-project", + time: { created: 1700000000000, updated: 1700000000000 }, + sandboxes: [], + }, + provider: { + all: [ + { + id: "opencode", + name: "OpenCode", + models: { "claude-opus-4-6": { id: "claude-opus-4-6", name: "Claude Opus 4.6", limit: { context: 200_000 } } }, + }, + ], + connected: ["opencode"], + default: { providerID: "opencode", modelID: "claude-opus-4-6" }, + }, + sessions: [ + { + id: sourceID, + slug: "source", + projectID, + directory, + title: "Uncommitted changes inquiry", + version: "dev", + time: { created: 1700000000000, updated: 1700000000000 }, + }, + { + id: targetID, + slug: "target", + projectID, + directory, + title: "Example Game: sample jump movement & sample physics analysis", + version: "dev", + time: { created: 1700000001000, updated: 1700000001000 }, + }, + ], + sourceID, + targetID, + messages: { [sourceID]: sourceMessages, [targetID]: targetMessages }, + expected: { + sourceTitle: "Uncommitted changes inquiry", + targetTitle: "Example Game: sample jump movement & sample physics analysis", + sourceMessageIDs: sourceMessages + .filter((message) => message.info.role === "user") + .map((message) => message.info.id), + targetMessageIDs: targetMessages + .filter((message) => message.info.role === "user") + .map((message) => message.info.id), + targetPartIDs: targetMessages.flatMap((message) => + orderedParts(message) + .filter(renderable) + .map((part) => part.id), + ), + }, +} + +export function pageMessages(sessionID: string, limit: number, before?: string) { + const messages = fixture.messages[sessionID as keyof typeof fixture.messages] ?? [] + const end = before + ? Math.max( + 0, + messages.findIndex((message) => message.info.id === before), + ) + : messages.length + const start = Math.max(0, end - limit) + return { + items: messages.slice(start, end), + cursor: start > 0 ? messages[start]!.info.id : undefined, + } +} diff --git a/packages/app/e2e/performance/timeline/timeline-test-helpers.ts b/packages/app/e2e/performance/timeline/timeline-test-helpers.ts new file mode 100644 index 000000000..dc7e17307 --- /dev/null +++ b/packages/app/e2e/performance/timeline/timeline-test-helpers.ts @@ -0,0 +1,67 @@ +import type { Page } from "@playwright/test" +import { base64Encode } from "@opencode-ai/core/util/encode" +import { mockOpenCodeServer } from "../../utils/mock-server" +import { fixture } from "./session-timeline-stress.fixture" + +export async function installTimelineSettings(page: Page) { + await page.addInitScript(() => { + localStorage.setItem( + "settings.v3", + JSON.stringify({ + general: { + editToolPartsExpanded: true, + shellToolPartsExpanded: true, + showReasoningSummaries: true, + showSessionProgressBar: true, + }, + }), + ) + }) +} + +export function mockStressTimeline(page: Page) { + return mockOpenCodeServer(page, { + sessions: fixture.sessions, + provider: fixture.provider, + directory: fixture.directory, + project: fixture.project, + pageMessages: (sessionID) => ({ items: fixture.messages[sessionID as keyof typeof fixture.messages] ?? [] }), + }) +} + +export async function installStressSessionTabs(page: Page) { + const server = `http://${process.env.PLAYWRIGHT_SERVER_HOST ?? "127.0.0.1"}:${process.env.PLAYWRIGHT_SERVER_PORT ?? "4096"}` + await page.addInitScript( + ({ directory, sourceID, targetID, dirBase64, server }) => { + localStorage.setItem( + "opencode.global.dat:server", + JSON.stringify({ + projects: { local: [{ worktree: directory, expanded: true }] }, + lastProject: { local: directory }, + }), + ) + localStorage.setItem( + "opencode.global.dat:tabs", + JSON.stringify( + [sourceID, targetID].map((sessionId) => ({ + type: "session", + server, + dirBase64, + sessionId, + })), + ), + ) + }, + { + directory: fixture.directory, + sourceID: fixture.sourceID, + targetID: fixture.targetID, + dirBase64: base64Encode(fixture.directory), + server, + }, + ) +} + +export function stressSessionHref(sessionID: string) { + return `/${base64Encode(fixture.directory)}/session/${sessionID}` +} diff --git a/packages/app/e2e/performance/unit/chrome-trace-write.test.ts b/packages/app/e2e/performance/unit/chrome-trace-write.test.ts new file mode 100644 index 000000000..456020ff3 --- /dev/null +++ b/packages/app/e2e/performance/unit/chrome-trace-write.test.ts @@ -0,0 +1,15 @@ +import { expect, test } from "bun:test" +import { mkdtemp, rm } from "node:fs/promises" +import path from "node:path" +import os from "node:os" +import { prepareChromeTrace } from "../chrome-trace" + +test("creates the configured trace directory", async () => { + const root = await mkdtemp(path.join(os.tmpdir(), "opencode-trace-")) + try { + const file = await prepareChromeTrace(path.join(root, "nested", "traces"), "session/tab", false, "test") + expect(file).toEndWith("-session-tab-458ed9e3-test.json") + } finally { + await rm(root, { recursive: true, force: true }) + } +}) diff --git a/packages/app/e2e/performance/unit/session-tab-repaint-probe.test.ts b/packages/app/e2e/performance/unit/session-tab-repaint-probe.test.ts new file mode 100644 index 000000000..5d20c03a0 --- /dev/null +++ b/packages/app/e2e/performance/unit/session-tab-repaint-probe.test.ts @@ -0,0 +1,42 @@ +import { expect, test } from "bun:test" +import { compressCachedRepaintTrace, layoutShiftSample } from "../timeline/session-tab-repaint-probe" + +test("compresses repeated repaint states without losing frame samples", () => { + const state = { + root: 1, + scrollTop: 10, + scrollHeight: 20, + bottomErrorPx: 0, + last: true, + rows: [{ key: "row", node: 2, top: 0, bottom: 10 }], + mounted: 1, + center: "content", + } + const trace = { + timeOriginEpochMs: 1_000, + startedAtPerformanceMs: 100, + samples: [ + { observedAtMs: 16, ...state, destination: ["target"], source: [] }, + { observedAtMs: 32, ...state, destination: ["target"], source: [] }, + { observedAtMs: 48, ...state, scrollTop: 11, destination: ["target"], source: [] }, + ], + mutations: [{ observedAtMs: 20, changed: [{ type: "add", node: 2 }] }], + shifts: [{ occurredAtMs: 24, value: 0.1 }], + windowMs: 1_000, + running: false, + stop() {}, + } + const compressed = compressCachedRepaintTrace(trace) + const samples = compressed.samples.flatMap((group) => + group.observedAtMs.map((observedAtMs) => ({ observedAtMs, ...group.state })), + ) + + expect(samples).toEqual(trace.samples) + expect(compressed.mutations).toEqual(trace.mutations) + expect(compressed.shifts).toEqual(trace.shifts) +}) + +test("records layout shifts at occurrence time within the probe window", () => { + expect(layoutShiftSample({ startTime: 99, value: 0.1 }, 100)).toBeUndefined() + expect(layoutShiftSample({ startTime: 124, value: 0.2 }, 100)).toEqual({ occurredAtMs: 24, value: 0.2 }) +}) diff --git a/packages/app/e2e/performance/unit/session-tab-switch-metrics.test.ts b/packages/app/e2e/performance/unit/session-tab-switch-metrics.test.ts new file mode 100644 index 000000000..dd771b7d5 --- /dev/null +++ b/packages/app/e2e/performance/unit/session-tab-switch-metrics.test.ts @@ -0,0 +1,54 @@ +import { expect, test } from "bun:test" +import { classifySessionSwitch } from "../timeline/session-tab-switch-metrics" + +test("counts source and blank samples before the destination is observed", () => { + const result = classifySessionSwitch([ + { observedAtMs: 16, destination: [], source: ["source"], hasVisibleRows: true, last: false }, + { observedAtMs: 32, destination: [], source: [], hasVisibleRows: false, last: false }, + { observedAtMs: 48, destination: ["destination"], source: [], hasVisibleRows: true, last: true, bottomErrorPx: 0 }, + { observedAtMs: 64, destination: ["destination"], source: [], hasVisibleRows: true, last: true, bottomErrorPx: 0 }, + { observedAtMs: 80, destination: ["destination"], source: [], hasVisibleRows: true, last: true, bottomErrorPx: 0 }, + ]) + + expect(result.blankSamples).toBe(1) + expect(result.sourceSamples).toBe(1) + expect(result.unknownSamples).toBe(0) + expect(result.firstDestinationObservedMs).toBe(48) + expect(result.stableObservedMs).toBe(80) +}) + +test("does not classify mixed source and destination content as correct", () => { + const result = classifySessionSwitch([ + { + observedAtMs: 16, + destination: ["destination"], + source: ["source"], + hasVisibleRows: true, + last: true, + bottomErrorPx: 0, + }, + { observedAtMs: 32, destination: ["destination"], source: [], hasVisibleRows: true, last: true, bottomErrorPx: 0 }, + { observedAtMs: 48, destination: ["destination"], source: [], hasVisibleRows: true, last: true, bottomErrorPx: 0 }, + { observedAtMs: 64, destination: ["destination"], source: [], hasVisibleRows: true, last: true, bottomErrorPx: 0 }, + ]) + + expect(result.firstCorrectObservedMs).toBe(32) + expect(result.stableObservedMs).toBe(64) +}) + +test("reports missing correctness without throwing", () => { + const result = classifySessionSwitch([ + { + observedAtMs: 16, + destination: ["destination"], + source: ["source"], + hasVisibleRows: true, + last: true, + bottomErrorPx: 0, + }, + ]) + + expect(result.firstDestinationObservedMs).toBe(16) + expect(result.firstCorrectObservedMs).toBeNull() + expect(result.stableObservedMs).toBeNull() +}) diff --git a/packages/app/e2e/performance/unit/session-timeline-stream-probe.test.ts b/packages/app/e2e/performance/unit/session-timeline-stream-probe.test.ts new file mode 100644 index 000000000..f8fca1adb --- /dev/null +++ b/packages/app/e2e/performance/unit/session-timeline-stream-probe.test.ts @@ -0,0 +1,14 @@ +import { expect, test } from "bun:test" +import { streamChunk } from "../timeline/session-timeline-benchmark.fixture" +import { streamProgress } from "../timeline/session-timeline-stream-probe" + +test("classifies emitted stream markers using the fixture cycle", () => { + expect(streamProgress("before stream-17 after stream-18")).toEqual({ index: 18, phase: "boundary" }) + expect(streamProgress("before stream-18 after stream-19")).toEqual({ index: 19, phase: "stream" }) + expect(streamProgress("benchmark-complete stream-36")).toEqual({ index: 36, phase: "complete" }) + expect(streamProgress("no marker")).toEqual({ index: -1, phase: "unknown" }) +}) + +test("emits progress markers at fixture boundaries", () => { + expect(streamProgress(streamChunk(18, 160))).toEqual({ index: 18, phase: "boundary" }) +}) diff --git a/packages/app/e2e/performance/unit/session-timeline-visual-tracking.test.ts b/packages/app/e2e/performance/unit/session-timeline-visual-tracking.test.ts new file mode 100644 index 000000000..c0215c5cb --- /dev/null +++ b/packages/app/e2e/performance/unit/session-timeline-visual-tracking.test.ts @@ -0,0 +1,16 @@ +import { expect, test } from "bun:test" +import { layoutShiftValue, removeVisibleRow } from "../timeline/session-timeline-stream-probe" + +test("excludes layout shifts before the probe window and recent input", () => { + expect(layoutShiftValue({ startTime: 9, value: 0.1 }, 10)).toBeUndefined() + expect(layoutShiftValue({ startTime: 10, value: 0.2, hadRecentInput: true }, 10)).toBeUndefined() + expect(layoutShiftValue({ startTime: 11, value: 0.3 }, 10)).toBe(0.3) +}) + +test("classifies removed rows from their last painted visibility", () => { + const row = {} + const visible = new Set([row]) + + expect(removeVisibleRow(visible, row)).toBe(true) + expect(removeVisibleRow(visible, row)).toBe(false) +}) diff --git a/packages/app/e2e/utils/mock-server.ts b/packages/app/e2e/utils/mock-server.ts index c4ef9f6cc..cf0c95243 100644 --- a/packages/app/e2e/utils/mock-server.ts +++ b/packages/app/e2e/utils/mock-server.ts @@ -44,7 +44,10 @@ export async function mockOpenCodeServer(page: Page, config: MockServerConfig) { await page.route("**/*", async (route) => { const url = new URL(route.request().url()) const targetPort = process.env.PLAYWRIGHT_SERVER_PORT ?? "4096" - if (url.port !== targetPort) return route.fallback() + const appPort = new URL( + process.env.PLAYWRIGHT_BASE_URL ?? `http://127.0.0.1:${process.env.PLAYWRIGHT_PORT ?? "3000"}`, + ).port + if (url.port !== targetPort && url.port !== appPort) return route.fallback() const path = url.pathname if (path === "/global/event" || path === "/event") return sse(route, config.events?.(), config.eventRetry) @@ -72,7 +75,8 @@ export async function mockOpenCodeServer(page: Page, config: MockServerConfig) { return json(route, pageData.items, pageData.cursor ? { "x-next-cursor": pageData.cursor } : undefined) } - return json(route, {}) + if (url.port === targetPort && targetPort !== appPort) return json(route, {}) + return route.fallback() }) } diff --git a/packages/app/package.json b/packages/app/package.json index 0b46ec028..b572e7308 100644 --- a/packages/app/package.json +++ b/packages/app/package.json @@ -24,7 +24,8 @@ "test:e2e": "playwright test", "test:e2e:local": "playwright test", "test:e2e:ui": "playwright test --ui", - "test:e2e:report": "playwright show-report e2e/playwright-report" + "test:e2e:report": "playwright show-report e2e/playwright-report", + "test:bench": "bun test ./e2e/performance/unit && playwright test --config e2e/performance/playwright.config.ts" }, "license": "MIT", "devDependencies": { diff --git a/packages/app/playwright.config.ts b/packages/app/playwright.config.ts index d9648a88b..f68652363 100644 --- a/packages/app/playwright.config.ts +++ b/packages/app/playwright.config.ts @@ -9,6 +9,7 @@ const reuse = !process.env.CI const workers = Number(process.env.PLAYWRIGHT_WORKERS ?? (process.env.CI ? 5 : 0)) || undefined export default defineConfig({ testDir: "./e2e", + testIgnore: process.env.OPENCODE_PERFORMANCE === "1" ? "performance/**/*.test.ts" : "performance/**", outputDir: "./e2e/test-results", timeout: 60_000, expect: {