test(app): add manual performance diagnostics (#32937)

This commit is contained in:
Luke Parker 2026-06-19 19:39:02 +02:00 committed by GitHub
parent 10ec856ff2
commit c6083a474c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 2656 additions and 3 deletions

View File

@ -1,3 +1,8 @@
## Priorities
- Prioritise, in this order: stability, simplicity, performance.
- Before changing session or timeline code, record a production benchmark baseline and compare it after the change.
## Debugging
- NEVER try to restart the app, or the server process, EVER.

View File

@ -0,0 +1,13 @@
- Prioritize stability, then simplicity, then measurement overhead.
- Use Playwright for scenario control, isolation, and completion checks.
- Use Chrome Performance traces for generic browser profiling.
- Use Electron `contentTracing` for packaged multi-process profiling.
- Keep custom probes only for product-specific measurements.
- Do not duplicate measurements across the harness, probes, and traces.
- Run benchmarks serially to avoid cross-test contention.
- Run benchmarks against production builds.
- Keep detailed profiling opt-in when it changes workload behavior.
- Preserve raw diagnostic data or use lossless representations.
- Do not enforce machine-dependent performance thresholds.
- Assert scenario completion and metric collection only.
- Keep normal test discovery free of manual benchmarks.

View File

@ -0,0 +1,77 @@
# Manual app performance suite
The app's high-volume performance diagnostics live under `packages/app/e2e/performance` and are excluded from normal local and CI Playwright discovery. The benchmark config builds the app and serves the production bundle before running scenarios serially.
Run the suite explicitly from `packages/app`:
```sh
bun run test:bench
```
PowerShell:
```powershell
$env:PLAYWRIGHT_WORKERS = "1"
bun run test:bench
```
The suite contains:
- cold and hot session-tab timing
- cached session repaint and mutation tracing
- streaming timeline throughput, RAF-gap, long-task, geometry, and remount diagnostics
All benchmarks import the shared `benchmark` fixture. Pages created through Playwright's `page` fixture automatically capture main-frame navigation history and emit a Chrome trace when `OPENCODE_PERFORMANCE_TRACE_DIR` is set. Benchmarks that need isolated browser contexts use `withBenchmarkPage`, which owns the context and the same diagnostics lifecycle.
New benchmarks should look like normal Playwright tests:
```ts
import { benchmark, expect } from "../benchmark"
benchmark("measures one interaction", async ({ page, report }) => {
// Only scenario-specific setup and interaction belong here.
report({ durationMs: 42 })
})
```
The fixture requires every benchmark to call `report()`, automatically names and closes traces, captures navigation history, attaches that history when a test fails, and emits metrics as a consistent `BENCHMARK` JSON line.
```text
BENCHMARK {"name":"...","context":{"project":"chromium","platform":"darwin"},"metrics":{...}}
```
Every observed page also emits `BENCHMARK_PAGE` with the same run ID, navigation history, and optional trace path before the final status-bearing `BENCHMARK` record. Chrome traces are browser-wide page-lifetime diagnostics; scenario metrics use narrower explicitly named observation windows.
This follows the stack's own guidance: [Electron recommends repeated Chrome DevTools and Chrome Tracing measurement](https://www.electronjs.org/docs/latest/tutorial/performance), [Chrome DevTools recommends Performance recordings for runtime work](https://developer.chrome.com/docs/devtools/performance), and [Playwright uses traces for test debugging rather than renderer profiling](https://playwright.dev/docs/trace-viewer).
These Playwright benchmarks profile the shared app renderer in Chromium. A future packaged Electron benchmark that needs main-process and multi-process attribution should use Electron's official [`contentTracing`](https://www.electronjs.org/docs/latest/api/content-tracing/) API rather than extending this renderer harness with bespoke process instrumentation.
CPU and high-volume visual profiling are disabled by default. Set `TIMELINE_CPU_PROFILE=1` to enable both, or additionally set `TIMELINE_VISUAL_PROFILE=0` for CPU-only profiling.
The streaming scenario's 30x CPU throttle is a deterministic stress profile, not a simulated end-user device.
Benchmarks do not assert machine-dependent performance budgets. Streaming processes 160 deltas by default and reports renderer-observed completion time, throughput, RAF callback-gap distributions, frame-budget equivalents, and long tasks through final geometry settlement. Delta count and delivery batch are included in result context when overridden. These are main-thread callback diagnostics, not compositor presentation or dropped-frame measurements. Visual-only and geometry metrics are `null` when their probes are disabled. Tab metrics describe sampled DOM observations. Assertions verify scenario and metric collection completion. Repeated repaint states are run-length grouped, but every original observation timestamp is retained alongside raw mutation batches and layout shifts.
Committed smoke and regression tests continue to own correctness coverage for pagination, tab paint, context resize, collapse state, and composer spacing.
## Chrome traces
Set `OPENCODE_PERFORMANCE_TRACE_DIR` to emit a standard Chrome DevTools trace for every benchmark page automatically:
```sh
OPENCODE_PERFORMANCE_TRACE_DIR=/tmp/opencode-performance-traces \
bunx playwright test --config e2e/performance/playwright.config.ts \
timeline/session-tab-switch-benchmark.spec.ts
```
The emitted JSON is a standard Chrome trace and can be loaded directly into the Chrome DevTools Performance panel. `devtools-tracing` can optionally inspect it from the command line without adding package scripts or dependencies:
Trace capture mirrors [Puppeteer's official tracing defaults and lifecycle](https://pptr.dev/api/puppeteer.tracing), using Chrome's `ReturnAsStream` transfer mode and failing when Chromium reports trace data loss.
```sh
bunx devtools-tracing stats <trace-path-from-BENCHMARK_PAGE>
```
INP analysis requires a trace with a supported navigation/interaction insight. Selector statistics require a trace captured with `OPENCODE_PERFORMANCE_SELECTOR_TRACE=1`.
`e2e/performance/playwright.uncapped.config.ts` disables Chromium frame-rate limiting for explicit uncapped diagnostics. Native product benchmarks should use the default Playwright configuration.

View File

@ -0,0 +1,144 @@
import { expect, test as base, type Browser, type Page, type TestInfo } from "@playwright/test"
import { startChromeTrace } from "./chrome-trace"
type BenchmarkFixtures = {
report: (metrics: Record<string, unknown>, context?: Record<string, unknown>) => void
reportState: { payload?: { metrics: Record<string, unknown>; context: Record<string, unknown> } }
benchmarkResult: void
}
export type PerformancePageDiagnostics = {
navigations: string[]
stop: () => Promise<string | undefined>
}
const pages = new WeakMap<Page, PerformancePageDiagnostics>()
export const benchmark = base.extend<BenchmarkFixtures>({
reportState: async ({}, use) => use({}),
report: async ({ reportState }, use) => {
await use((metrics, context = {}) => {
if (reportState.payload) throw new Error("Benchmark reported metrics more than once")
reportState.payload = { metrics, context }
})
},
benchmarkResult: [
async ({ reportState }, use, testInfo) => {
await use()
const missing = !reportState.payload
console.log(
`BENCHMARK ${JSON.stringify({
schemaVersion: 2,
runID: process.env.OPENCODE_PERFORMANCE_RUN_ID,
name: benchmarkName(testInfo),
status: missing ? "failed" : testInfo.status,
expectedStatus: testInfo.expectedStatus,
retry: testInfo.retry,
repeatEachIndex: testInfo.repeatEachIndex,
context: {
project: testInfo.project.name,
platform: process.platform,
...reportState.payload?.context,
},
metrics: reportState.payload?.metrics ?? null,
error: missing ? "Benchmark did not report metrics" : undefined,
})}`,
)
if (missing && testInfo.status === testInfo.expectedStatus)
throw new Error(`Benchmark did not report metrics: ${benchmarkName(testInfo)}`)
},
{ auto: true },
],
page: async ({ page }, use, testInfo) => {
const name = benchmarkName(testInfo)
const diagnostics = await observePerformancePage(page, name)
try {
await use(page)
} finally {
try {
await reportPerformancePage(name, diagnostics, testInfo)
} finally {
if (testInfo.status !== testInfo.expectedStatus) {
await testInfo.attach("performance-navigations", {
body: JSON.stringify(diagnostics.navigations, null, 2),
contentType: "application/json",
})
}
}
}
},
})
function benchmarkName(testInfo: TestInfo) {
return testInfo.titlePath.slice(1).join(" > ")
}
export { expect }
async function observePerformancePage(page: Page, name: string) {
const navigations: string[] = []
const onNavigation = (frame: ReturnType<Page["mainFrame"]>) => {
if (frame === page.mainFrame()) navigations.push(frame.url())
}
page.on("framenavigated", onNavigation)
const stopTrace = await startChromeTrace(page, name).catch((error) => {
page.off("framenavigated", onNavigation)
throw error
})
let stopping: Promise<string | undefined> | undefined
const diagnostics: PerformancePageDiagnostics = {
navigations,
stop() {
page.off("framenavigated", onNavigation)
return (stopping ??= stopTrace?.() ?? Promise.resolve(undefined))
},
}
pages.set(page, diagnostics)
return diagnostics
}
export async function withBenchmarkPage<T>(
browser: Browser,
name: string,
run: (page: Page) => Promise<T>,
testInfo?: TestInfo,
) {
const context = await browser.newContext()
try {
const page = await context.newPage()
const diagnostics = await observePerformancePage(page, name)
try {
return await run(page)
} finally {
await reportPerformancePage(name, diagnostics, testInfo)
}
} finally {
await context.close()
}
}
async function reportPerformancePage(name: string, diagnostics: PerformancePageDiagnostics, testInfo?: TestInfo) {
const trace = await diagnostics.stop()
console.log(
`BENCHMARK_PAGE ${JSON.stringify({
schemaVersion: 2,
runID: process.env.OPENCODE_PERFORMANCE_RUN_ID,
name,
test: testInfo ? benchmarkName(testInfo) : undefined,
retry: testInfo?.retry,
repeatEachIndex: testInfo?.repeatEachIndex,
context: {
platform: process.platform,
trace,
selectorTrace: process.env.OPENCODE_PERFORMANCE_SELECTOR_TRACE === "1",
},
navigations: diagnostics.navigations,
})}`,
)
}
export function benchmarkDiagnostics(page: Page) {
const diagnostics = pages.get(page)
if (!diagnostics) throw new Error("Performance diagnostics are not installed for this page")
return diagnostics
}

View File

@ -0,0 +1,95 @@
import type { CDPSession, Page } from "@playwright/test"
import path from "node:path"
import { mkdir, open, rename } from "node:fs/promises"
import { Buffer } from "node:buffer"
import { createHash, randomUUID } from "node:crypto"
const categories = [
"-*",
"devtools.timeline",
"v8.execute",
"disabled-by-default-devtools.timeline",
"disabled-by-default-devtools.timeline.frame",
"toplevel",
"blink.console",
"blink.user_timing",
"latencyInfo",
"disabled-by-default-devtools.timeline.stack",
"disabled-by-default-v8.cpu_profiler",
]
export async function startChromeTrace(page: Page, name: string) {
const directory = process.env.OPENCODE_PERFORMANCE_TRACE_DIR
if (!directory) return
const selectors = process.env.OPENCODE_PERFORMANCE_SELECTOR_TRACE === "1"
const file = await prepareChromeTrace(directory, name, selectors)
const session = await page.context().newCDPSession(page)
try {
await session.send("Tracing.start", {
transferMode: "ReturnAsStream",
traceConfig: {
excludedCategories: categories
.filter((category) => category.startsWith("-"))
.map((category) => category.slice(1)),
includedCategories: [
...categories.filter((category) => !category.startsWith("-")),
...(selectors
? ["disabled-by-default-blink.debug", "disabled-by-default-devtools.timeline.invalidationTracking"]
: []),
],
},
})
} catch (error) {
await Promise.allSettled([session.detach()])
throw error
}
let stopping: Promise<string> | undefined
return () =>
(stopping ??= (async () => {
try {
const complete = new Promise<{ stream?: string; dataLossOccurred: boolean }>((resolve) =>
session.once("Tracing.tracingComplete", resolve),
)
await session.send("Tracing.end")
const result = await complete
if (!result.stream) throw new Error(`Chrome trace stream missing: ${file}`)
const partial = `${file}.partial`
await writeProtocolStream(session, result.stream, partial)
if (result.dataLossOccurred) throw new Error(`Chrome trace lost data; partial capture retained: ${partial}`)
await rename(partial, file)
return file
} finally {
await Promise.allSettled([session.detach()])
}
})())
}
export async function prepareChromeTrace(
directory: string,
name: string,
selectors: boolean,
nonce = randomUUID().slice(0, 8),
) {
await mkdir(directory, { recursive: true })
const run = process.env.OPENCODE_PERFORMANCE_RUN_ID ?? "manual"
const hash = createHash("sha256").update(name).digest("hex").slice(0, 8)
return path.join(
directory,
`${run}-${name.replace(/[^a-zA-Z0-9_-]/g, "-")}-${hash}-${nonce}${selectors ? "-selectors" : ""}.json`,
)
}
async function writeProtocolStream(session: CDPSession, handle: string, file: string) {
const output = await open(file, "wx")
try {
while (true) {
const chunk = await session.send("IO.read", { handle })
await output.write(chunk.base64Encoded ? Buffer.from(chunk.data, "base64") : chunk.data)
if (chunk.eof) break
}
} finally {
await Promise.allSettled([output.close(), session.send("IO.close", { handle })])
}
}

View File

@ -0,0 +1,20 @@
import config from "../../playwright.config"
const port = Number(process.env.PLAYWRIGHT_PORT ?? 3000)
process.env.PLAYWRIGHT_SERVER_PORT = String(port)
process.env.OPENCODE_PERFORMANCE_RUN_ID ??= `${new Date().toISOString().replace(/[:.]/g, "-")}-${process.pid}`
export default {
...config,
testDir: ".",
testIgnore: "unit/**",
outputDir: "../test-results/performance",
fullyParallel: false,
workers: 1,
reporter: [["html", { outputFolder: "../playwright-report/performance", open: "never" }], ["line"]],
webServer: {
...config.webServer,
command: `bun run build && bun run serve -- --host 0.0.0.0 --port ${port} --strictPort`,
reuseExistingServer: false,
},
}

View File

@ -0,0 +1,13 @@
import config from "./playwright.config"
export default {
...config,
outputDir: "../test-results/performance-uncapped",
reporter: [["html", { outputFolder: "../playwright-report/performance-uncapped", open: "never" }], ["line"]],
use: {
...config.use,
launchOptions: {
args: ["--disable-frame-rate-limit", "--disable-gpu-vsync"],
},
},
}

View File

@ -0,0 +1,49 @@
import { benchmark, expect } from "../benchmark"
import { expectSessionTitle } from "../../utils/waits"
import { fixture } from "./session-timeline-stress.fixture"
import {
collectCachedRepaintTrace,
compressCachedRepaintTrace,
installCachedRepaintProbe,
waitForCachedRepaintWindow,
} from "./session-tab-repaint-probe"
import { waitForStableTimeline } from "./session-tab-switch-probe"
import {
installStressSessionTabs,
installTimelineSettings,
mockStressTimeline,
stressSessionHref,
} from "./timeline-test-helpers"
benchmark("samples cached session repaint after the click", async ({ page, report }) => {
benchmark.setTimeout(120_000)
await mockStressTimeline(page)
await installStressSessionTabs(page)
await installTimelineSettings(page)
await page.goto(stressSessionHref(fixture.targetID))
await expectSessionTitle(page, fixture.expected.targetTitle)
await waitForStableTimeline(page, fixture.expected.targetMessageIDs.at(-1)!)
await page
.locator(`[data-slot="titlebar-tabs"] a[href="${stressSessionHref(fixture.sourceID)}"]`)
.first()
.click()
await expectSessionTitle(page, fixture.expected.sourceTitle)
await waitForStableTimeline(page, fixture.expected.sourceMessageIDs.at(-1)!)
await installCachedRepaintProbe(page, {
targetHref: stressSessionHref(fixture.targetID),
destination: fixture.messages[fixture.targetID].map((message) => message.info.id),
source: fixture.messages[fixture.sourceID].map((message) => message.info.id),
last: fixture.expected.targetMessageIDs.at(-1)!,
windowMs: 1_000,
})
await page
.locator(`[data-slot="titlebar-tabs"] a[href="${stressSessionHref(fixture.targetID)}"]`)
.first()
.click()
await Promise.all([expectSessionTitle(page, fixture.expected.targetTitle), waitForCachedRepaintWindow(page, 1_000)])
const result = await collectCachedRepaintTrace(page)
report(compressCachedRepaintTrace(result))
expect(result.samples.length).toBeGreaterThan(0)
})

View File

@ -0,0 +1,251 @@
import type { Page } from "@playwright/test"
type CachedRepaintTrace = {
timeOriginEpochMs: number
startedAtPerformanceMs: number
samples: {
observedAtMs: number
root: number | undefined
scrollTop: number
scrollHeight: number
bottomErrorPx: number | undefined
last: boolean
rows: { key: string | undefined; node: number; top: number; bottom: number }[]
mounted: number
center: string | undefined
destination: string[]
source: string[]
}[]
mutations: { observedAtMs: number; changed: { type: string; node: number }[] }[]
shifts: { occurredAtMs: number; value: number }[]
windowMs: number
running: boolean
stop: () => void
}
export async function installCachedRepaintProbe(
page: Page,
input: { targetHref: string; destination: string[]; source: string[]; last: string; windowMs: number },
) {
await page.evaluate(({ targetHref, destination, source, last, windowMs }) => {
const destinationIDs = new Set(destination)
const sourceIDs = new Set(source)
const nodeIDs = new WeakMap<Node, number>()
let nextNodeID = 1
const id = (node: Node) => {
const current = nodeIDs.get(node)
if (current) return current
nodeIDs.set(node, nextNodeID)
return nextNodeID++
}
const state: CachedRepaintTrace = {
timeOriginEpochMs: performance.timeOrigin,
startedAtPerformanceMs: 0,
samples: [],
mutations: [],
shifts: [],
windowMs,
running: false,
stop: () => {},
}
const recordShifts = (entries: PerformanceEntry[]) => {
if (!state.running) return
state.shifts.push(
...entries
.map((entry) => {
if (
entry.startTime < state.startedAtPerformanceMs ||
entry.startTime > state.startedAtPerformanceMs + state.windowMs
)
return
return {
occurredAtMs: entry.startTime - state.startedAtPerformanceMs,
value: (entry as PerformanceEntry & { value: number }).value,
}
})
.filter((entry): entry is { occurredAtMs: number; value: number } => entry !== undefined),
)
}
const shiftObserver = new PerformanceObserver((entries) => recordShifts(entries.getEntries()))
shiftObserver.observe({ type: "layout-shift" })
const recordMutations = (entries: MutationRecord[]) => {
if (!state.running) return
const observedAtMs = performance.now() - state.startedAtPerformanceMs
if (observedAtMs > state.windowMs) return
const changed = entries.flatMap((entry) => [
...[...entry.addedNodes].map((node) => ({ type: "add", node: id(node) })),
...[...entry.removedNodes].map((node) => ({ type: "remove", node: id(node) })),
])
if (changed.length) state.mutations.push({ observedAtMs, changed })
}
const mutationObserver = new MutationObserver(recordMutations)
mutationObserver.observe(document.documentElement, { childList: true, subtree: true })
state.stop = () => {
recordShifts(shiftObserver.takeRecords())
recordMutations(mutationObserver.takeRecords())
state.running = false
shiftObserver.disconnect()
mutationObserver.disconnect()
}
const sample = () => {
if (!state.running) return
setTimeout(() => {
if (!state.running) return
const observedAtMs = performance.now() - state.startedAtPerformanceMs
if (observedAtMs > state.windowMs) return
const root = [...document.querySelectorAll<HTMLElement>(".scroll-view__viewport")].find((element) =>
element.querySelector("[data-timeline-row]"),
)
if (root) {
const view = root.getBoundingClientRect()
const rows = [...root.querySelectorAll<HTMLElement>("[data-timeline-key]")]
.map((element) => ({
key: element.dataset.timelineKey,
node: id(element),
rect: element.getBoundingClientRect(),
}))
.filter((item) => item.rect.bottom > view.top && item.rect.top < view.bottom)
.map((item) => ({
key: item.key,
node: item.node,
top: item.rect.top - view.top,
bottom: item.rect.bottom - view.top,
}))
const messages = [...root.querySelectorAll<HTMLElement>("[data-message-id]")]
.filter((element) => {
const rect = element.getBoundingClientRect()
return rect.bottom > view.top && rect.top < view.bottom
})
.map((element) => element.dataset.messageId!)
const spacer = root.querySelector<HTMLElement>('[data-timeline-row="bottom-spacer"]')?.getBoundingClientRect()
state.samples.push({
observedAtMs,
root: id(root),
scrollTop: root.scrollTop,
scrollHeight: root.scrollHeight,
bottomErrorPx: spacer ? spacer.bottom - view.bottom : undefined,
last: messages.includes(last),
rows,
mounted: root.querySelectorAll("[data-timeline-key]").length,
center: document
.elementFromPoint(view.left + view.width / 2, view.top + view.height / 2)
?.textContent?.slice(0, 80),
destination: messages.filter((messageID) => destinationIDs.has(messageID)),
source: messages.filter((messageID) => sourceIDs.has(messageID)),
})
} else {
state.samples.push({
observedAtMs,
root: undefined,
scrollTop: 0,
scrollHeight: 0,
bottomErrorPx: undefined,
last: false,
rows: [],
mounted: 0,
center: document.elementFromPoint(innerWidth / 2, innerHeight / 2)?.textContent?.slice(0, 80),
destination: [],
source: [],
})
}
requestAnimationFrame(sample)
}, 0)
}
document.addEventListener(
"click",
(event) => {
const link = event.target instanceof Element ? event.target.closest("a") : undefined
if (link?.getAttribute("href") !== targetHref) return
state.startedAtPerformanceMs = performance.now()
state.running = true
requestAnimationFrame(sample)
},
{ capture: true, once: true },
)
;(window as Window & { __cachedFlash?: CachedRepaintTrace }).__cachedFlash = state
}, input)
}
export function layoutShiftSample(entry: Pick<PerformanceEntry, "startTime"> & { value: number }, started: number) {
if (entry.startTime < started) return
return { occurredAtMs: entry.startTime - started, value: entry.value }
}
export async function waitForCachedRepaintWindow(page: Page, durationMs: number) {
await page.waitForFunction((durationMs) => {
const state = (window as Window & { __cachedFlash?: CachedRepaintTrace }).__cachedFlash
return !!state?.running && performance.now() - state.startedAtPerformanceMs >= durationMs
}, durationMs)
}
export async function collectCachedRepaintTrace(page: Page) {
return page.evaluate(() => {
const state = (window as Window & { __cachedFlash?: CachedRepaintTrace }).__cachedFlash!
state.stop()
return state
})
}
export function summarizeCachedRepaintTrace(trace: CachedRepaintTrace) {
const roots = trace.samples.map((sample) => sample.root)
const bottomErrors = trace.samples.flatMap((sample) =>
sample.bottomErrorPx === undefined ? [] : [Math.abs(sample.bottomErrorPx)],
)
const category = (sample: CachedRepaintTrace["samples"][number]) => {
if (sample.source.length) return "source"
if (sample.root === undefined || sample.rows.length === 0) return "blank"
if (!sample.destination.length) return "unknown"
if (sample.last && Math.abs(sample.bottomErrorPx ?? Infinity) <= 1) return "correct"
return "wrongDestination"
}
return {
samples: trace.samples.length,
durationMs: trace.samples.at(-1)?.observedAtMs ?? 0,
firstSampleObservedMs: trace.samples[0]?.observedAtMs,
firstSampleCorrect: trace.samples[0] ? category(trace.samples[0]) === "correct" : false,
blankSamples: trace.samples.filter((sample) => category(sample) === "blank").length,
sourceSamples: trace.samples.filter((sample) => category(sample) === "source").length,
wrongDestinationSamples: trace.samples.filter((sample) => category(sample) === "wrongDestination").length,
unknownSamples: trace.samples.filter((sample) => category(sample) === "unknown").length,
rootChanges: roots.slice(1).filter((root, index) => root !== roots[index]).length,
mountedMin: trace.samples.length ? Math.min(...trace.samples.map((sample) => sample.mounted)) : 0,
mountedMax: Math.max(...trace.samples.map((sample) => sample.mounted)),
maxBottomErrorPx: Math.max(0, ...bottomErrors),
mutationBatches: trace.mutations.length,
addedNodes: trace.mutations.reduce(
(sum, batch) => sum + batch.changed.filter((change) => change.type === "add").length,
0,
),
removedNodes: trace.mutations.reduce(
(sum, batch) => sum + batch.changed.filter((change) => change.type === "remove").length,
0,
),
layoutShiftValueSum: trace.shifts.reduce((sum, shift) => sum + shift.value, 0),
maxLayoutShiftValue: Math.max(0, ...trace.shifts.map((shift) => shift.value)),
}
}
export function compressCachedRepaintTrace(trace: CachedRepaintTrace) {
const samples: {
observedAtMs: number[]
state: Omit<CachedRepaintTrace["samples"][number], "observedAtMs">
}[] = []
for (const sample of trace.samples) {
const { observedAtMs, ...state } = sample
const previous = samples.at(-1)
if (previous && JSON.stringify(previous.state) === JSON.stringify(state)) {
previous.observedAtMs.push(observedAtMs)
continue
}
samples.push({ observedAtMs: [observedAtMs], state })
}
return {
timeOriginEpochMs: trace.timeOriginEpochMs,
startedAtPerformanceMs: trace.startedAtPerformanceMs,
windowMs: trace.windowMs,
summary: summarizeCachedRepaintTrace(trace),
samples,
mutations: trace.mutations,
shifts: trace.shifts,
}
}

View File

@ -0,0 +1,79 @@
import type { Page } from "@playwright/test"
import { expectSessionTitle } from "../../utils/waits"
import { benchmark, expect, withBenchmarkPage } from "../benchmark"
import { fixture } from "./session-timeline-stress.fixture"
import { installStressSessionTabs, mockStressTimeline, stressSessionHref } from "./timeline-test-helpers"
import { measureSessionSwitch, waitForStableTimeline } from "./session-tab-switch-probe"
type Result = Awaited<ReturnType<typeof measureSessionSwitch>>
benchmark("benchmarks cold and hot session tab switching", async ({ browser, report }, testInfo) => {
benchmark.setTimeout(180_000)
const results = { cold: [] as Result[], hot: [] as Result[] }
for (const mode of ["cold", "hot"] as const) {
for (let run = 0; run < 5; run++) {
results[mode].push(
await withBenchmarkPage(browser, `session-tab-switch-${mode}-${run}`, (page) => trial(page, mode), testInfo),
)
}
}
report({ results, summary: summarize(results) })
})
async function trial(page: Page, mode: "cold" | "hot") {
await mockStressTimeline(page)
await installStressSessionTabs(page)
if (mode === "hot") {
await page.goto(stressSessionHref(fixture.targetID))
await expectSessionTitle(page, fixture.expected.targetTitle)
await waitForStableTimeline(page, fixture.expected.targetMessageIDs.at(-1)!)
await switchSession(page, fixture.sourceID, fixture.expected.sourceTitle)
} else {
await page.goto(stressSessionHref(fixture.sourceID))
await expectSessionTitle(page, fixture.expected.sourceTitle)
}
await waitForStableTimeline(page, fixture.expected.sourceMessageIDs.at(-1)!)
const destinationIDs = fixture.messages[fixture.targetID].map((message) => message.info.id)
const sourceIDs = fixture.messages[fixture.sourceID].map((message) => message.info.id)
const lastID = fixture.expected.targetMessageIDs.at(-1)!
const href = stressSessionHref(fixture.targetID)
const result = await measureSessionSwitch(page, {
destinationIDs,
sourceIDs,
lastID,
href,
switch: () => switchSession(page, fixture.targetID, fixture.expected.targetTitle),
})
return result
}
function summarize(results: Record<"cold" | "hot", Result[]>) {
const stats = (values: (number | null)[]) => {
const sorted = values.filter((value): value is number => value !== null).sort((a, b) => a - b)
return {
min: sorted[0] ?? null,
median: sorted[Math.floor(sorted.length / 2)] ?? null,
max: sorted.at(-1) ?? null,
missing: values.length - sorted.length,
}
}
return Object.fromEntries(
Object.entries(results).map(([mode, values]) => [
mode,
{
firstDestinationObservedMs: stats(values.map((value) => value.firstDestinationObservedMs)),
firstCorrectObservedMs: stats(values.map((value) => value.firstCorrectObservedMs)),
stableObservedMs: stats(values.map((value) => value.stableObservedMs)),
},
]),
)
}
async function switchSession(page: Page, sessionID: string, title: string) {
const href = stressSessionHref(sessionID)
const tab = page.locator(`[data-slot="titlebar-tabs"] a[href="${href}"]`).first()
await expect(tab).toBeVisible()
await tab.click()
await expectSessionTitle(page, title)
}

View File

@ -0,0 +1,46 @@
export type SessionSwitchSample = {
observedAtMs: number
destination: string[]
source: string[]
hasVisibleRows: boolean
last: boolean
bottomErrorPx?: number
}
export function classifySessionSwitch(samples: SessionSwitchSample[]) {
const firstDestination = samples.findIndex((sample) => sample.destination.length > 0)
const firstCorrect = samples.findIndex(isCorrectDestination)
const stable = samples.findIndex((_, index) => isStableSessionSwitch(samples.slice(index, index + 3)))
return {
firstDestinationObservedMs: samples[firstDestination]?.observedAtMs ?? null,
firstCorrectObservedMs: samples[firstCorrect]?.observedAtMs ?? null,
stableObservedMs: samples[stable + 2]?.observedAtMs ?? null,
wrongDestinationSamples: samples
.slice(firstDestination)
.filter((sample) => sample.destination.length > 0 && !sample.last).length,
blankSamples: samples.filter((sample) => !sample.hasVisibleRows).length,
unknownSamples: samples.filter(
(sample) => sample.hasVisibleRows && sample.destination.length === 0 && sample.source.length === 0,
).length,
sourceSamples: samples.filter((sample) => sample.source.length > 0).length,
}
}
export function isCorrectDestination(sample: SessionSwitchSample) {
return (
sample.destination.length > 0 &&
sample.source.length === 0 &&
sample.last &&
Math.abs(sample.bottomErrorPx ?? Infinity) <= 1
)
}
export function isStableSessionSwitch(samples: SessionSwitchSample[]) {
return samples.length === 3 && samples.every(isCorrectDestination)
}
export function isStableDestination(samples: Pick<SessionSwitchSample, "last" | "bottomErrorPx">[]) {
return (
samples.length === 3 && samples.every((sample) => sample.last && Math.abs(sample.bottomErrorPx ?? Infinity) <= 1)
)
}

View File

@ -0,0 +1,152 @@
import { expect, type Page } from "@playwright/test"
import { classifySessionSwitch, isStableDestination, type SessionSwitchSample } from "./session-tab-switch-metrics"
type SessionSwitchProbe = {
samples: SessionSwitchSample[]
stop: () => void
}
async function installSessionSwitchProbe(
page: Page,
input: { destinationIDs: string[]; sourceIDs: string[]; lastID: string; href: string },
) {
await page.evaluate(({ destinationIDs, sourceIDs, lastID, href }) => {
const destination = new Set(destinationIDs)
const source = new Set(sourceIDs)
const samples: SessionSwitchSample[] = []
let started: number | undefined
let running = true
const sample = () => {
if (!running || started === undefined) return
setTimeout(() => {
if (!running || started === undefined) return
const observedAtMs = performance.now() - started
const root = [...document.querySelectorAll<HTMLElement>(".scroll-view__viewport")].find((element) =>
element.querySelector("[data-timeline-row]"),
)
if (root) {
const view = root.getBoundingClientRect()
const visible = [...root.querySelectorAll<HTMLElement>("[data-message-id]")]
.filter((element) => {
const rect = element.getBoundingClientRect()
return rect.bottom > view.top && rect.top < view.bottom
})
.map((element) => element.dataset.messageId!)
const hasVisibleRows = [...root.querySelectorAll<HTMLElement>("[data-timeline-key]")].some((element) => {
const rect = element.getBoundingClientRect()
return rect.bottom > view.top && rect.top < view.bottom
})
const spacer = root.querySelector<HTMLElement>('[data-timeline-row="bottom-spacer"]')?.getBoundingClientRect()
samples.push({
observedAtMs,
destination: visible.filter((id) => destination.has(id)),
source: visible.filter((id) => source.has(id)),
hasVisibleRows,
last: visible.includes(lastID),
bottomErrorPx: spacer ? spacer.bottom - view.bottom : undefined,
})
} else {
samples.push({ observedAtMs, destination: [], source: [], hasVisibleRows: false, last: false })
}
requestAnimationFrame(sample)
}, 0)
}
document.addEventListener(
"click",
(event) => {
const link = event.target instanceof Element ? event.target.closest("a") : undefined
if (link?.getAttribute("href") !== href) return
started = performance.now()
requestAnimationFrame(sample)
},
{ capture: true, once: true },
)
;(window as Window & { __sessionSwitchProbe?: SessionSwitchProbe }).__sessionSwitchProbe = {
samples,
stop: () => {
running = false
},
}
}, input)
}
async function waitForStableSessionSwitch(page: Page) {
await page.waitForFunction(() => {
const samples = (window as Window & { __sessionSwitchProbe?: SessionSwitchProbe }).__sessionSwitchProbe?.samples
if (!samples) return false
return samples.some((_, index) => {
const stable = samples.slice(index, index + 3)
return (
stable.length === 3 &&
stable.every(
(sample) =>
sample.destination.length > 0 &&
sample.source.length === 0 &&
sample.last &&
Math.abs(sample.bottomErrorPx ?? Infinity) <= 1,
)
)
})
})
}
async function collectSessionSwitchResult(page: Page) {
const samples = await page.evaluate(() => {
const probe = (window as Window & { __sessionSwitchProbe?: SessionSwitchProbe }).__sessionSwitchProbe!
probe.stop()
return probe.samples
})
return classifySessionSwitch(samples)
}
export async function measureSessionSwitch(
page: Page,
input: { destinationIDs: string[]; sourceIDs: string[]; lastID: string; href: string; switch: () => Promise<void> },
) {
const { switch: run, ...probe } = input
await installSessionSwitchProbe(page, probe)
await run()
await waitForStableSessionSwitch(page)
return collectSessionSwitchResult(page)
}
export async function waitForStableTimeline(page: Page, lastID: string) {
const samples: Pick<SessionSwitchSample, "last" | "bottomErrorPx">[] = []
await expect
.poll(
async () => {
samples.push(
await page.evaluate(
(lastID) =>
new Promise<Pick<SessionSwitchSample, "last" | "bottomErrorPx">>((resolve) => {
requestAnimationFrame(() =>
setTimeout(() => {
const root = [...document.querySelectorAll<HTMLElement>(".scroll-view__viewport")].find((element) =>
element.querySelector("[data-timeline-row]"),
)
if (!root) {
resolve({ last: false })
return
}
const view = root.getBoundingClientRect()
const last = [...root.querySelectorAll<HTMLElement>("[data-message-id]")].some((element) => {
if (element.dataset.messageId !== lastID) return false
const rect = element.getBoundingClientRect()
return rect.bottom > view.top && rect.top < view.bottom
})
const spacer = root
.querySelector<HTMLElement>('[data-timeline-row="bottom-spacer"]')
?.getBoundingClientRect()
resolve({ last, bottomErrorPx: spacer ? spacer.bottom - view.bottom : undefined })
}, 0),
)
}),
lastID,
),
)
return isStableDestination(samples.slice(-3))
},
{ timeout: 30_000, intervals: [0] },
)
.toBe(true)
}

View File

@ -0,0 +1,488 @@
import { base64Encode } from "@opencode-ai/core/util/encode"
import type { Page } from "@playwright/test"
import { mockOpenCodeServer } from "../../utils/mock-server"
import { expectAppVisible, expectSessionTitle } from "../../utils/waits"
import { expect } from "../benchmark"
const directory = "C:/OpenCode/TimelineStateRegression"
const projectID = "proj_timeline_state_regression"
const sessionID = "ses_timeline_state_regression"
const userMessageID = "msg_user_regression"
const assistantMessageID = "msg_assistant_regression"
const editPartID = "prt_0001_edit"
export const textPartID = "prt_9999_text"
const title = "Timeline collapse state regression"
const model = { providerID: "opencode", modelID: "claude-opus-4-6", variant: "max" }
type EventPayload = {
directory: string
payload: Record<string, unknown>
}
const userMessage = {
info: {
id: userMessageID,
sessionID,
role: "user",
time: { created: 1700000000000 },
summary: { diffs: [] },
agent: "build",
model,
},
parts: [
{
id: "prt_user_text",
sessionID,
messageID: userMessageID,
type: "text",
text: "Please edit the file.",
},
],
}
const editPart = {
id: editPartID,
sessionID,
messageID: assistantMessageID,
type: "tool",
callID: "call_edit_regression",
tool: "edit",
state: {
status: "completed",
input: { filePath: "src/regression.ts" },
output: "Edited src/regression.ts",
title: "src/regression.ts",
metadata: {
filediff: {
file: "src/regression.ts",
additions: 1,
deletions: 1,
before: "export const value = 'before'\n",
after: "export const value = 'after'\n",
},
diff: "diff --git a/src/regression.ts b/src/regression.ts\n-export const value = 'before'\n+export const value = 'after'\n",
},
time: { start: 1700000001000, end: 1700000002000 },
},
}
const streamedTextPart = {
id: textPartID,
sessionID,
messageID: assistantMessageID,
type: "text",
text: "Streaming added a later assistant text part.",
}
const assistantMessage = {
info: {
id: assistantMessageID,
sessionID,
role: "assistant",
time: { created: 1700000001000 },
parentID: userMessageID,
modelID: model.modelID,
providerID: model.providerID,
mode: "build",
agent: "build",
path: { cwd: directory, root: directory },
cost: 0.01,
tokens: { input: 100, output: 200, reasoning: 0, cache: { read: 0, write: 0 } },
variant: "max",
},
parts: [editPart],
}
export async function setupTimelineBenchmark(page: Page, options: { historyTurns: number; eventBatch: number }) {
const events: EventPayload[] = []
let eventBatch = options.eventBatch
await mockOpenCodeServer(page, {
directory,
project: project(),
provider: provider(),
sessions: [session()],
pageMessages: () => ({
items: [
...Array.from({ length: options.historyTurns }, (_, index) => performanceTurn(index)).flat(),
userMessage,
assistantMessage,
],
}),
events: () => events.splice(0, eventBatch),
eventRetry: 16,
})
await page.addInitScript(() => {
localStorage.setItem(
"settings.v3",
JSON.stringify({
general: {
editToolPartsExpanded: true,
shellToolPartsExpanded: true,
showReasoningSummaries: true,
showSessionProgressBar: true,
},
}),
)
})
await page.setViewportSize({ width: 1366, height: 768 })
const scroller = page.locator(".scroll-view__viewport", { has: page.locator("[data-timeline-row]") })
const text = page.locator(`[data-timeline-part-id="${textPartID}"]`).first()
await page.goto(`/${base64Encode(directory)}/session/${sessionID}`)
await expectSessionTitle(page, title)
await expectAppVisible(scroller)
return {
scroller,
text,
transport: {
enqueue(payload: EventPayload | EventPayload[]) {
events.push(...(Array.isArray(payload) ? payload : [payload]))
},
pendingCount() {
return events.length
},
releaseAll() {
eventBatch = events.length
},
},
async scrollToBottom() {
await scroller.evaluate((element) => {
element.scrollTop = element.scrollHeight
})
},
async waitForStableGeometry() {
await expect
.poll(() => scroller.evaluate((element) => element.scrollHeight - element.clientHeight - element.scrollTop))
.toBeLessThanOrEqual(1)
await page.waitForFunction((partID) => {
const root = [...document.querySelectorAll<HTMLElement>(".scroll-view__viewport")].find((element) =>
element.querySelector(`[data-timeline-part-id="${partID}"]`),
)
if (!root) return false
return new Promise<boolean>((resolve) => {
const height = root.scrollHeight
requestAnimationFrame(() =>
requestAnimationFrame(() =>
resolve(root.scrollHeight === height && root.scrollHeight - root.clientHeight - root.scrollTop <= 1),
),
)
})
}, textPartID)
},
}
}
export function buildInitialStreamEvent(deltaCount: number): EventPayload {
return {
directory,
payload: {
type: "message.part.updated",
properties: {
part: {
...streamedTextPart,
text: `Streaming${streamChunk(0, deltaCount + 1)}\n\n\`\`\`ts\nconst initial = true\n\`\`\``,
},
},
},
}
}
export function buildStreamDeltaEvents(deltaCount: number): EventPayload[] {
return Array.from({ length: deltaCount }, (_, index) => ({
directory,
payload: {
type: "message.part.delta",
properties: {
messageID: assistantMessageID,
partID: textPartID,
field: "text",
delta: streamChunk(index + 1, deltaCount + 1),
},
},
}))
}
function performanceTurn(index: number) {
const suffix = String(index).padStart(4, "0")
const userID = `msg_0000_${suffix}_a_user`
const assistantID = `msg_0000_${suffix}_b_assistant`
const before = historicalSource(index, false)
const after = historicalSource(index, true)
const parts = [
...(index % 5 === 0
? [
{
id: `prt_0000_${suffix}_reasoning`,
sessionID,
messageID: assistantID,
type: "reasoning",
text: `Reviewing the existing implementation. ${"constraint analysis ".repeat(20)}`,
time: { start: 1690000001000 + index * 2_000, end: 1690000001200 + index * 2_000 },
},
]
: []),
{
id: `prt_0000_${suffix}_assistant`,
sessionID,
messageID: assistantID,
type: "text",
text: historicalMarkdown(index),
},
...(index % 8 === 0
? [
{
id: `prt_0000_${suffix}_edit`,
sessionID,
messageID: assistantID,
type: "tool",
callID: `call_0000_${suffix}_edit`,
tool: "edit",
state: {
status: "completed",
input: { filePath: `src/history-${index}.ts` },
output: `Edited src/history-${index}.ts`,
title: `src/history-${index}.ts`,
metadata: {
filediff: { file: `src/history-${index}.ts`, additions: 48, deletions: 48, before, after },
},
time: { start: 1690000001200 + index * 2_000, end: 1690000001400 + index * 2_000 },
},
},
]
: []),
...(index % 12 === 0
? [
{
id: `prt_0000_${suffix}_write`,
sessionID,
messageID: assistantID,
type: "tool",
callID: `call_0000_${suffix}_write`,
tool: "write",
state: {
status: "completed",
input: { filePath: `src/generated-${index}.tsx`, content: after },
output: `Wrote src/generated-${index}.tsx`,
title: `src/generated-${index}.tsx`,
metadata: {
filediff: { file: `src/generated-${index}.tsx`, additions: 32, deletions: 0, before: "", after },
},
time: { start: 1690000001400 + index * 2_000, end: 1690000001500 + index * 2_000 },
},
},
]
: []),
...(index % 16 === 0
? [
{
id: `prt_0000_${suffix}_patch`,
sessionID,
messageID: assistantID,
type: "tool",
callID: `call_0000_${suffix}_patch`,
tool: "apply_patch",
state: {
status: "completed",
input: { patchText: realisticPatch(index) },
output: "Success. Updated src/components/SessionCard.tsx",
title: "src/components/SessionCard.tsx",
metadata: {
files: [
{
filePath: "src/components/SessionCard.tsx",
relativePath: "src/components/SessionCard.tsx",
type: "update",
additions: 8,
deletions: 3,
patch: realisticPatch(index),
before,
after,
},
],
},
time: { start: 1690000001500 + index * 2_000, end: 1690000001700 + index * 2_000 },
},
},
]
: []),
]
return [
{
info: {
id: userID,
sessionID,
role: "user",
time: { created: 1690000000000 + index * 2_000 },
summary: { diffs: [] },
agent: "build",
model,
},
parts: [
{
id: `prt_0000_${suffix}_user`,
sessionID,
messageID: userID,
type: "text",
text: `Historical prompt ${index}`,
},
],
},
{
info: {
id: assistantID,
sessionID,
role: "assistant",
time: { created: 1690000001000 + index * 2_000, completed: 1690000001500 + index * 2_000 },
parentID: userID,
modelID: model.modelID,
providerID: model.providerID,
mode: "build",
agent: "build",
path: { cwd: directory, root: directory },
cost: 0.01,
tokens: { input: 100, output: 200, reasoning: 0, cache: { read: 0, write: 0 } },
variant: "max",
finish: "stop",
},
parts,
},
]
}
function historicalMarkdown(index: number) {
const code = `import { For, Show, createSignal } from "solid-js"
type SessionRow = { id: string; title: string; active: boolean }
export function SessionList(props: { rows: SessionRow[] }) {
const [selected, setSelected] = createSignal<string>()
return (
<section aria-label="Sessions">
<For each={props.rows}>{(row) => (
<button classList={{ active: row.active }} onClick={() => setSelected(row.id)}>
<Show when={selected() === row.id} fallback={row.title}>{row.title.toUpperCase()}</Show>
</button>
)}</For>
</section>
)
}`
return `## Session renderer review ${index}
The active session keeps **semantic row identity** while reconciling measured content. See [Solid documentation](https://docs.solidjs.com/) and the inline \`measureElement(node)\` call.
| Concern | Current behavior | Verification |
| --- | --- | --- |
| streaming | appends Markdown blocks | painted frames |
| geometry | anchors visible rows | DOM coordinates |
| tools | preserves expanded state | keyed remount probe |
> Long sessions combine Markdown, syntax highlighting, tool output, and asynchronously rendered diffs.
${index % 4 === 0 ? `\`\`\`tsx\n${code}\n\`\`\`\n\n\`\`\`bash\nbun typecheck\nbun test --preload ./happydom.ts ./src/pages/session\ngit diff --check\n\`\`\`` : "- preserve the viewport anchor\n- avoid replacing stable Markdown nodes\n- process provider deltas without blocking input"}`
}
function historicalSource(index: number, updated: boolean) {
const method = updated ? "toLocaleUpperCase(props.locale)" : "toUpperCase()"
const limit = updated ? 24 : 20
return `import { createMemo, For } from "solid-js"
type Message = {
id: string
role: "user" | "assistant"
text: string
tokens: { input: number; output: number }
}
export function MessageSummary(props: { messages: Message[]; locale: string }) {
const visible = createMemo(() => props.messages.filter((message) => message.text.trim()).slice(-${limit}))
const total = createMemo(() => visible().reduce((sum, message) => sum + message.tokens.output, 0))
return (
<article data-session-index="${index}">
<header>{total().toLocaleString(props.locale)} output tokens</header>
<For each={visible()}>{(message) => <p data-role={message.role}>{message.text.${method}}</p>}</For>
</article>
)
}
`
}
function realisticPatch(index: number) {
return `*** Begin Patch
*** Update File: src/components/SessionCard.tsx
@@
-const title = props.session.title.toUpperCase()
-const messages = props.messages.slice(-20)
+const title = props.session.title.toLocaleUpperCase(props.locale)
+const messages = props.messages.filter((message) => message.text.trim()).slice(-24)
+const outputTokens = messages.reduce((sum, message) => sum + message.tokens.output, 0)
@@
- <h2>{title}</h2>
+ <h2 data-session-index="${index}">{title}</h2>
+ <span>{outputTokens.toLocaleString(props.locale)} output tokens</span>
*** End Patch`
}
export function streamChunk(index: number, count: number) {
if (index === 0) return `\n\n## Implementation plan\n\nStreaming **bold analysis`
if (index === count - 1)
return `\n\`\`\`\n\n## Verification\n\n- **Typecheck:** passed\n- **Timeline geometry:** stable\n- **Streaming output:** benchmark-complete <!-- stream-${index} -->`
const section = Math.floor(index / 18) + 1
const fragments = [
` continues across three`,
` or four word`,
` provider deltas and`,
` closes in this fragment**. <!-- stream-${index} -->\n\n`,
`| Concern | State`,
` | Verification |\n|`,
` --- | ---`,
` | --- |\n|`,
` markdown | incremental |`,
` painted frames | <!-- stream-${index} -->\n\n`,
`\`\`\`tsx\nconst row: SessionRow`,
` = rows[index] ??`,
` fallback\nconst title =`,
` row.title.toLocaleUpperCase(locale)\n`,
`const selected = createMemo(()`,
` => row.id ===`,
` activeID()) // stream-${index}\n`,
`// stream-${index}\n\`\`\`\n\n### Iteration ${section}\n\nStreaming **bold analysis`,
]
return fragments[(index - 1) % fragments.length]!
}
function project() {
return {
id: projectID,
worktree: directory,
vcs: "git",
name: "timeline-state-regression",
time: { created: 1700000000000, updated: 1700000000000 },
sandboxes: [],
}
}
function session() {
return {
id: sessionID,
slug: "timeline-state-regression",
projectID,
directory,
title,
version: "dev",
time: { created: 1700000000000, updated: 1700000000000 },
}
}
function provider() {
return {
all: [
{
id: "opencode",
name: "OpenCode",
models: { "claude-opus-4-6": { id: "claude-opus-4-6", name: "Claude Opus 4.6", limit: { context: 200_000 } } },
},
],
connected: ["opencode"],
default: { providerID: "opencode", modelID: "claude-opus-4-6" },
}
}

View File

@ -0,0 +1,85 @@
import { benchmark, benchmarkDiagnostics, expect } from "../benchmark"
import {
buildInitialStreamEvent,
buildStreamDeltaEvents,
setupTimelineBenchmark,
textPartID,
} from "./session-timeline-benchmark.fixture"
import { startTimelineProfile } from "./session-timeline-profile"
import {
collectTimelineStreamMetrics,
installTimelineStreamProbe,
startTimelineStreamProbe,
} from "./session-timeline-stream-probe"
benchmark.describe("performance: session timeline streaming", () => {
benchmark("streams assistant text without remounting or oscillating", async ({ page, report }) => {
benchmark.setTimeout(480_000)
const cpuThrottle = Number(process.env.TIMELINE_CPU_THROTTLE ?? 30)
const deltaCount = Number(process.env.TIMELINE_DELTA_COUNT ?? 160)
const historyTurns = Number(process.env.TIMELINE_HISTORY_TURNS ?? 320)
const eventBatch = Number(process.env.TIMELINE_EVENT_BATCH ?? 1)
const minimal = process.env.TIMELINE_MINIMAL === "1"
const profileCPU = process.env.TIMELINE_CPU_PROFILE === "1"
const profileVisual = !minimal && profileCPU && process.env.TIMELINE_VISUAL_PROFILE !== "0"
const fixture = await setupTimelineBenchmark(page, {
historyTurns,
eventBatch,
})
fixture.transport.enqueue(buildInitialStreamEvent(deltaCount))
const contentStart = performance.now()
await expect(fixture.text).toBeVisible()
await expect(fixture.text).toContainText("Implementation plan")
const initialContentObservedMs = performance.now() - contentStart
await fixture.scrollToBottom()
await fixture.waitForStableGeometry()
const profile = await startTimelineProfile(page, { cpuThrottle, profileCPU })
await installTimelineStreamProbe(page, { textPartID, finalIndex: deltaCount, profileVisual, minimal })
const deltas = buildStreamDeltaEvents(deltaCount)
await startTimelineStreamProbe(page)
fixture.transport.enqueue(deltas)
await page.waitForFunction(
(finalIndex) =>
(
window as Window & {
__timelineStreamBenchmark?: { applied: { index: number }[] }
}
).__timelineStreamBenchmark?.applied.some((value) => value.index === finalIndex),
deltaCount,
{ timeout: 420_000 },
)
await expect(fixture.text).toContainText("benchmark-complete")
await expect(fixture.text).toContainText("Streaming")
await fixture.waitForStableGeometry()
const metrics = await collectTimelineStreamMetrics(page, {
textPartID,
finalIndex: deltaCount,
navigations: benchmarkDiagnostics(page).navigations,
})
const delivered = deltas.length - fixture.transport.pendingCount()
await profile.stop()
report(
{
endToEndInitialContentObservedMs: initialContentObservedMs,
...metrics,
deliveredDeltas: delivered,
pendingDeltas: fixture.transport.pendingCount(),
},
{
cpuThrottle,
profileCPU,
profileVisual,
minimal,
queuedDeltas: deltas.length,
historyTurns,
eventBatch,
},
)
await profile.reset()
})
})

View File

@ -0,0 +1,40 @@
import type { CDPSession, Page } from "@playwright/test"
export async function startTimelineProfile(page: Page, options: { cpuThrottle: number; profileCPU: boolean }) {
const cdp = await page.context().newCDPSession(page)
if (options.cpuThrottle > 1) await cdp.send("Emulation.setCPUThrottlingRate", { rate: options.cpuThrottle })
if (options.profileCPU) {
await cdp.send("Profiler.enable")
await cdp.send("Profiler.setSamplingInterval", { interval: 100 })
await cdp.send("Profiler.start")
}
return {
async stop() {
if (!options.profileCPU) return
const result = await cdp.send("Profiler.stop")
const self = new Map<number, number>()
result.profile.samples?.forEach((id, index) => {
const duration = (result.profile.timeDeltas?.[index] ?? 0) / 1_000
self.set(id, (self.get(id) ?? 0) + duration)
})
console.log(
"timeline cpu profile",
JSON.stringify(
result.profile.nodes
.map((node) => ({
function: node.callFrame.functionName || "(anonymous)",
url: node.callFrame.url,
line: node.callFrame.lineNumber + 1,
selfMs: self.get(node.id) ?? 0,
}))
.filter((node) => node.selfMs > 1)
.sort((a, b) => b.selfMs - a.selfMs)
.slice(0, 40),
),
)
},
async reset() {
if (options.cpuThrottle > 1) await cdp.send("Emulation.setCPUThrottlingRate", { rate: 1 })
},
}
}

View File

@ -0,0 +1,547 @@
import type { Page } from "@playwright/test"
const STREAM_MARKER_PATTERN = "stream-(\\d+)"
const STREAM_FRAGMENT_COUNT = 18
type TimelineProbeState = {
started: number
ended: number
profileVisual: boolean
minimal: boolean
frames: number[]
frameAt: number[]
applied: { at: number; index: number }[]
geometry: {
scrollTop: number
scrollHeight: number
clientHeight: number
distance: number
virtualHeight: number
headerHeight: number
}[]
blanks: number
longTasks: number[]
layoutShifts: number[]
visibleMounts: number
visibleUnmounts: number
visibleRows: Set<Element>
visibleSubtreeMounts: string[]
visibleSubtreeUnmounts: string[]
visibleSubtreeReplacements: number
visibleSubtreeDropouts: string[]
visibleSubtrees: Map<string, Element>
subtreeKeys: WeakMap<Element, string>
maxOverlap: number
maxGap: number
maxPartTopMovement: number
previousPartTop: number
slowFrames: {
duration: number
index: number
phase: "stream" | "boundary" | "complete" | "unknown"
tokenSpans: number
blocks: number
codeBlocks: number
height: number
distance: number
}[]
scroll: {
calls: number
callNoops: number
sameFrameCalls: number
assignments: number
assignmentNoops: number
lastCallFrame: number
frame: number
}
row: HTMLElement
markdown: HTMLElement
running: boolean
previous: number
cleanup: () => void
start: () => void
}
export async function installTimelineStreamProbe(
page: Page,
options: { textPartID: string; finalIndex: number; profileVisual: boolean; minimal: boolean },
) {
await page.evaluate(
({ textPartID, finalIndex, profileVisual, minimal, markerPattern, fragmentCount }) => {
const part = document.querySelector<HTMLElement>(`[data-timeline-part-id="${textPartID}"]`)
const row = part?.closest<HTMLElement>("[data-timeline-row]")
const markdown = part?.querySelector<HTMLElement>('[data-component="markdown"]')
const root = part?.closest<HTMLElement>(".scroll-view__viewport")
if (!part || !row || !markdown || !root) throw new Error("missing streaming benchmark nodes")
const viewport = root.getBoundingClientRect()
const state: TimelineProbeState = {
started: 0,
ended: Infinity,
profileVisual,
minimal,
frames: [],
frameAt: [],
applied: [],
geometry: [],
blanks: 0,
longTasks: [],
layoutShifts: [],
visibleMounts: 0,
visibleUnmounts: 0,
visibleRows: new Set(
[...root.querySelectorAll("[data-timeline-key]")].filter((element) => {
const rect = element.getBoundingClientRect()
return rect.bottom > viewport.top && rect.top < viewport.bottom
}),
),
visibleSubtreeMounts: [],
visibleSubtreeUnmounts: [],
visibleSubtreeReplacements: 0,
visibleSubtreeDropouts: [],
visibleSubtrees: new Map<string, Element>(),
subtreeKeys: new WeakMap<Element, string>(),
maxOverlap: 0,
maxGap: 0,
maxPartTopMovement: 0,
previousPartTop: part.getBoundingClientRect().top,
slowFrames: [],
scroll: {
calls: 0,
callNoops: 0,
sameFrameCalls: 0,
assignments: 0,
assignmentNoops: 0,
lastCallFrame: -1,
frame: 0,
},
row,
markdown,
running: false,
previous: 0,
cleanup: () => {},
start: () => {},
}
;(window as Window & { __timelineStreamBenchmark?: TimelineProbeState }).__timelineStreamBenchmark = state
const scrollTo = Element.prototype.scrollTo
const scrollTop = Object.getOwnPropertyDescriptor(Element.prototype, "scrollTop")!
if (profileVisual) {
Element.prototype.scrollTo = function (...args) {
state.scroll.calls += 1
const top = typeof args[0] === "object" ? args[0]?.top : args[1]
if (typeof top === "number") {
const target = Math.min(top, this.scrollHeight - this.clientHeight)
if (Math.abs(this.scrollTop - target) < 1) state.scroll.callNoops += 1
}
if (state.scroll.lastCallFrame === state.scroll.frame) state.scroll.sameFrameCalls += 1
state.scroll.lastCallFrame = state.scroll.frame
return scrollTo.apply(this, args)
}
Object.defineProperty(Element.prototype, "scrollTop", {
configurable: true,
get: scrollTop.get,
set(value) {
state.scroll.assignments += 1
if (Math.abs(this.scrollTop - value) < 1) state.scroll.assignmentNoops += 1
scrollTop.set!.call(this, value)
},
})
}
const recordLongTasks = (entries: PerformanceEntry[]) => {
if (!state.running) return
state.longTasks.push(
...entries
.filter((entry) => entry.startTime >= state.started && entry.startTime <= state.ended)
.map((entry) => entry.duration),
)
}
const longTaskObserver = new PerformanceObserver((list) => recordLongTasks(list.getEntries()))
longTaskObserver.observe({ type: "longtask" })
const recordLayoutShifts = (entries: PerformanceEntry[]) => {
if (!state.running) return
state.layoutShifts.push(
...entries
.map((entry) => {
const shift = entry as LayoutShiftEntry
if (shift.startTime < state.started || shift.hadRecentInput) return
return shift.value
})
.filter((value): value is number => value !== undefined),
)
}
const layoutShiftObserver = profileVisual
? new PerformanceObserver((list) => recordLayoutShifts(list.getEntries()))
: undefined
layoutShiftObserver?.observe({ type: "layout-shift", buffered: true })
const visible = (element: Element) => {
const rect = element.getBoundingClientRect()
const viewport = root.getBoundingClientRect()
const style = getComputedStyle(element)
return (
element.isConnected &&
rect.width > 0 &&
rect.height > 0 &&
rect.bottom > viewport.top &&
rect.top < viewport.bottom &&
style.display !== "none" &&
style.visibility !== "hidden" &&
Number(style.opacity) > 0
)
}
const critical = [
"[data-timeline-part-id]",
'[data-component="edit-content"]',
'[data-component="apply-patch-file-diff"]',
'[data-component="file"]',
'[data-component="markdown-code"]',
"[data-markdown-block]",
].join(",")
const describe = (element: Element) => {
const cached = state.subtreeKeys.get(element)
if (!element.isConnected && cached) return cached
const part = element.closest<HTMLElement>("[data-timeline-part-id]")?.dataset.timelinePartId ?? "unknown"
const block = element
.closest<HTMLElement>("[data-markdown-key]")
?.dataset.markdownKey?.replace(/:(?:code|full|live)$/, "")
const component =
element.getAttribute("data-component") ?? element.getAttribute("data-markdown-block") ?? element.tagName
const key = `${part}:${block ?? "root"}:${component}`
state.subtreeKeys.set(element, key)
return key
}
const recordMutations = (records: MutationRecord[]) => {
if (!state.running) return
records.forEach((record) => {
record.addedNodes.forEach((node) => {
if (node instanceof HTMLElement && node.matches("[data-timeline-key]") && visible(node)) {
state.visibleMounts += 1
state.visibleRows.add(node)
}
if (!(node instanceof Element)) return
const added = [node, ...node.querySelectorAll(critical)].filter((element) => element.matches(critical))
added.forEach((element) => {
if (visible(element)) state.visibleSubtreeMounts.push(describe(element))
})
})
record.removedNodes.forEach((node) => {
if (node instanceof HTMLElement && node.matches("[data-timeline-key]") && state.visibleRows.delete(node))
state.visibleUnmounts += 1
if (!(node instanceof Element)) return
const removed = [node, ...node.querySelectorAll(critical)].filter((element) => element.matches(critical))
removed.forEach((element) => {
const key = describe(element)
if (state.visibleSubtrees.get(key) === element) state.visibleSubtreeUnmounts.push(key)
})
})
})
}
const mutationObserver = profileVisual ? new MutationObserver(recordMutations) : undefined
mutationObserver?.observe(root, { childList: true, subtree: true })
const currentPart = () => root.querySelector<HTMLElement>(`[data-timeline-part-id="${textPartID}"]`)
const observeProgress = (at: number) => {
if (!state.running) return
const content = currentPart()?.textContent ?? ""
const index = content.includes("benchmark-complete")
? finalIndex
: Number(content.match(new RegExp(markerPattern, "g"))?.at(-1)?.match(/\d+/)?.[0] ?? -1)
if (index >= 0 && index !== state.applied.at(-1)?.index) state.applied.push({ at, index })
}
const progressObserver = new MutationObserver(() => observeProgress(performance.now()))
progressObserver.observe(root, { characterData: true, childList: true, subtree: true })
state.cleanup = () => {
recordLongTasks(longTaskObserver.takeRecords())
recordLayoutShifts(layoutShiftObserver?.takeRecords() ?? [])
recordMutations(mutationObserver?.takeRecords() ?? [])
if (progressObserver.takeRecords().length) observeProgress(performance.now())
longTaskObserver.disconnect()
layoutShiftObserver?.disconnect()
mutationObserver?.disconnect()
progressObserver.disconnect()
if (!profileVisual) return
Element.prototype.scrollTo = scrollTo
Object.defineProperty(Element.prototype, "scrollTop", scrollTop)
}
const sample = (now: number) => {
if (!state.running) return
state.frameAt.push(now)
observeProgress(now)
if (minimal) {
state.frames.push(now - state.previous)
state.previous = now
requestAnimationFrame(sample)
return
}
setTimeout(() => {
if (!state.running) return
state.scroll.frame += 1
const duration = now - state.previous
state.frames.push(duration)
state.previous = now
const virtualRoot = root.querySelector<HTMLElement>("[data-timeline-virtual-content]")
const header = root.querySelector<HTMLElement>("[data-session-title]")
state.geometry.push({
scrollTop: root.scrollTop,
scrollHeight: root.scrollHeight,
clientHeight: root.clientHeight,
distance: root.scrollHeight - root.clientHeight - root.scrollTop,
virtualHeight: virtualRoot?.getBoundingClientRect().height ?? 0,
headerHeight: header?.getBoundingClientRect().height ?? 0,
})
const viewport = root.getBoundingClientRect()
if (profileVisual) {
const visibleRows = [...root.querySelectorAll<HTMLElement>("[data-timeline-key]")]
.map((element) => ({ element, rect: element.getBoundingClientRect() }))
.filter((item) => item.rect.bottom > viewport.top && item.rect.top < viewport.bottom)
.sort((a, b) => a.rect.top - b.rect.top)
state.visibleRows = new Set(visibleRows.map((item) => item.element))
const rows = visibleRows.map((item) => item.rect)
rows.slice(1).forEach((rect, index) => {
const previous = rows[index]!
state.maxOverlap = Math.max(state.maxOverlap, previous.bottom - rect.top)
state.maxGap = Math.max(state.maxGap, rect.top - previous.bottom)
})
const partTop = part.getBoundingClientRect().top
state.maxPartTopMovement = Math.max(state.maxPartTopMovement, Math.abs(partTop - state.previousPartTop))
state.previousPartTop = partTop
}
const visibleRow = [...root.querySelectorAll<HTMLElement>("[data-timeline-row]")].some((element) => {
const rect = element.getBoundingClientRect()
return rect.bottom > viewport.top && rect.top < viewport.bottom
})
if (!visibleRow) state.blanks += 1
if (profileVisual) {
const subtrees = new Map<string, { element: Element; rendered: boolean }>()
const visibleSubtrees = new Map<string, Element>()
root.querySelectorAll(critical).forEach((element) => {
const key = describe(element)
const rect = element.getBoundingClientRect()
const style = getComputedStyle(element)
const rendered =
element.isConnected &&
rect.width > 0 &&
rect.height > 0 &&
style.display !== "none" &&
style.visibility !== "hidden" &&
Number(style.opacity) > 0
subtrees.set(key, { element, rendered })
if (rendered && rect.bottom > viewport.top && rect.top < viewport.bottom) {
const previous = state.visibleSubtrees.get(key)
if (previous && previous !== element && key.startsWith(`${textPartID}:`))
state.visibleSubtreeReplacements += 1
visibleSubtrees.set(key, element)
}
})
state.visibleSubtrees.forEach((element, key) => {
const current = subtrees.get(key)
if (key.startsWith(`${textPartID}:`) && !current?.rendered) {
const markdown = part.querySelector<HTMLElement>('[data-component="markdown"]')
state.visibleSubtreeDropouts.push(
`${key}:projection=${markdown?.dataset.markdownProjectionLength}/${markdown?.dataset.markdownProjectionBlocks}:result=${markdown?.dataset.markdownResultLength}/${markdown?.dataset.markdownResultBlocks}:applied=${markdown?.dataset.markdownAppliedBlocks}:dom=${markdown?.children.length}`,
)
}
if (element.matches('[data-component="file"]')) {
const hadLines = element.hasAttribute("data-profiler-had-lines")
const hasLines = element.shadowRoot?.querySelector("[data-line]") != null
if (hasLines) element.setAttribute("data-profiler-had-lines", "")
if (hadLines && !hasLines) state.visibleSubtreeDropouts.push(`${key}:shadow-lines`)
}
})
state.visibleSubtrees = visibleSubtrees
}
if (profileVisual && duration > 33.34) {
const livePart = currentPart()
const content = livePart?.textContent ?? ""
const complete = content.includes("benchmark-complete")
const index = complete
? finalIndex
: Number(content.match(new RegExp(markerPattern, "g"))?.at(-1)?.match(/\d+/)?.[0] ?? -1)
state.slowFrames.push({
duration,
index,
phase: complete
? "complete"
: index >= 0 && index % fragmentCount === 0
? "boundary"
: index >= 0
? "stream"
: "unknown",
tokenSpans: livePart?.querySelectorAll(".shiki span").length ?? 0,
blocks: livePart?.querySelectorAll("[data-markdown-block]").length ?? 0,
codeBlocks: livePart?.querySelectorAll('[data-component="markdown-code"]').length ?? 0,
height: livePart?.getBoundingClientRect().height ?? 0,
distance: root.scrollHeight - root.clientHeight - root.scrollTop,
})
}
requestAnimationFrame(sample)
}, 0)
}
state.start = () => {
state.started = performance.now()
state.previous = state.started
state.running = true
requestAnimationFrame(sample)
}
},
{ ...options, markerPattern: STREAM_MARKER_PATTERN, fragmentCount: STREAM_FRAGMENT_COUNT },
)
}
export function startTimelineStreamProbe(page: Page) {
return page.evaluate(() => {
const state = (window as Window & { __timelineStreamBenchmark?: TimelineProbeState }).__timelineStreamBenchmark
if (!state) throw new Error("missing streaming benchmark state")
state.start()
})
}
type LayoutShiftEntry = PerformanceEntry & { value: number; hadRecentInput?: boolean }
export function layoutShiftValue(
entry: Pick<LayoutShiftEntry, "startTime" | "value" | "hadRecentInput">,
start: number,
) {
if (entry.startTime < start || entry.hadRecentInput) return
return entry.value
}
export function removeVisibleRow<T>(visible: Set<T>, row: T) {
return visible.delete(row)
}
export function streamProgress(content: string) {
const index = Number(content.match(new RegExp(STREAM_MARKER_PATTERN, "g"))?.at(-1)?.match(/\d+/)?.[0] ?? -1)
return {
index,
phase: content.includes("benchmark-complete")
? ("complete" as const)
: index >= 0 && index % STREAM_FRAGMENT_COUNT === 0
? ("boundary" as const)
: index >= 0
? ("stream" as const)
: ("unknown" as const),
}
}
export async function collectTimelineStreamMetrics(
page: Page,
options: { textPartID: string; finalIndex: number; navigations: string[] },
) {
return page.evaluate(({ textPartID, finalIndex, navigations }) => {
const state = (window as Window & { __timelineStreamBenchmark?: TimelineProbeState }).__timelineStreamBenchmark
if (!state) throw new Error(`missing streaming benchmark state after navigation: ${JSON.stringify(navigations)}`)
state.ended = performance.now()
state.cleanup()
state.running = false
const part = document.querySelector<HTMLElement>(`[data-timeline-part-id="${textPartID}"]`)
const row = part?.closest<HTMLElement>("[data-timeline-row]")
const markdown = part?.querySelector<HTMLElement>('[data-component="markdown"]')
const sorted = state.frames.slice().sort((a, b) => a - b)
const duration = state.frames.reduce((sum, value) => sum + value, 0)
const longestSlowStreak = state.frames.reduce(
(result, value) => {
const current = value > 33.34 ? result.current + 1 : 0
return { current, longest: Math.max(result.longest, current) }
},
{ current: 0, longest: 0 },
).longest
const busyStart = state.applied.at(0)?.at
const completion = state.applied.find((value) => value.index === finalIndex)
const busyEnd = completion?.at
const busyFrames =
busyStart === undefined || busyEnd === undefined
? []
: state.frames.filter((_, index) => state.frameAt[index]! >= busyStart && state.frameAt[index]! <= busyEnd)
const busySorted = busyFrames.slice().sort((a, b) => a - b)
const busyDuration = busyFrames.reduce((sum, value) => sum + value, 0)
const completionObservedMs = (completion?.at ?? NaN) - state.started
const visual = state.profileVisual
? {
layoutShiftValueSum: state.layoutShifts.reduce((sum, value) => sum + value, 0),
maxLayoutShiftValue: Math.max(0, ...state.layoutShifts),
visibleMounts: state.visibleMounts,
visibleUnmounts: state.visibleUnmounts,
visibleSubtreeMounts: state.visibleSubtreeMounts,
visibleSubtreeUnmounts: [...new Set(state.visibleSubtreeUnmounts)],
visibleSubtreeReplacements: state.visibleSubtreeReplacements,
visibleSubtreeDropouts: [...new Set(state.visibleSubtreeDropouts)],
maxOverlapPx: state.maxOverlap,
maxGapPx: state.maxGap,
maxPartTopMovementPx: state.maxPartTopMovement,
slowestRafGaps: state.slowFrames
.sort((a, b) => b.duration - a.duration)
.slice(0, 20)
.map((frame) => ({
durationMs: frame.duration,
index: frame.index,
phase: frame.phase,
tokenSpans: frame.tokenSpans,
blocks: frame.blocks,
codeBlocks: frame.codeBlocks,
heightPx: frame.height,
distancePx: frame.distance,
})),
slowRafGapPhases: Object.fromEntries(
["stream", "boundary", "complete", "unknown"].map((phase) => {
const frames = state.slowFrames.filter((frame) => frame.phase === phase)
return [
phase,
{
count: frames.length,
totalMs: frames.reduce((sum, frame) => sum + frame.duration, 0),
maxMs: Math.max(0, ...frames.map((frame) => frame.duration)),
},
]
}),
),
scroll: state.scroll,
}
: null
const geometry = state.minimal
? null
: {
maxDistancePx: Math.max(0, ...state.geometry.map((sample) => sample.distance)),
finalDistancePx: state.geometry.at(-1)?.distance ?? 0,
final: state.geometry.at(-1),
distanceTransitionsPx: state.geometry
.map((sample) => Math.round(sample.distance))
.filter((value, index, values) => index === 0 || value !== values[index - 1]),
bottomDriftTransitions: state.geometry.slice(1).filter((value, index) => {
const previous = state.geometry[index]?.distance ?? 0
return previous <= 1 && value.distance > 1
}).length,
blankSamples: state.blanks,
}
return {
capabilities: { visual: state.profileVisual, geometry: !state.minimal },
completionObservedMs,
deltasPerSecond: Number.isFinite(completionObservedMs) ? finalIndex / (completionObservedMs / 1_000) : null,
rafGapSamples: state.frames.length,
rafCallbackRate: duration ? (state.frames.length * 1000) / duration : 0,
observedProgressWindowRafCallbackRate: busyDuration ? (busyFrames.length * 1000) / busyDuration : null,
observedProgressWindowRafGapP95Ms: busySorted[Math.floor(busySorted.length * 0.95)] ?? null,
observedProgressWindowRafGaps: busyFrames.length,
maxObservedProgressIndex: Math.max(-1, ...state.applied.map((value) => value.index)),
observedProgressTransitions: state.applied.length,
rafGapP50Ms: sorted[Math.floor(sorted.length * 0.5)] ?? 0,
rafGapP95Ms: sorted[Math.floor(sorted.length * 0.95)] ?? 0,
rafGapP99Ms: sorted[Math.floor(sorted.length * 0.99)] ?? 0,
maxRafGapMs: sorted.at(-1) ?? 0,
rafGapsOver33Ms: state.frames.filter((value) => value > 33.34).length,
rafGapsOver50Ms: state.frames.filter((value) => value > 50).length,
missedFrameBudgetEquivalents: state.frames.reduce(
(sum, value) => sum + Math.max(0, Math.round(value / 16.67) - 1),
0,
),
longestRafGapOver33MsStreak: longestSlowStreak,
longTaskCount: state.longTasks.length,
longTaskTimeMs: state.longTasks.reduce((sum, value) => sum + value, 0),
visual,
geometry,
rowReplaced: row !== state.row,
markdownReplaced: markdown !== state.markdown,
domTextCharacters: part?.textContent?.length ?? 0,
}
}, options)
}

View File

@ -0,0 +1,335 @@
const words = [
"alpha",
"bravo",
"charlie",
"delta",
"echo",
"foxtrot",
"golf",
"hotel",
"india",
"juliet",
"kilo",
"lima",
"metro",
"nova",
"orbit",
"pixel",
"quartz",
"river",
"signal",
"vector",
]
const sourceID = "ses_smoke_source"
const targetID = "ses_smoke_target"
const directory = "C:/OpenCode/SmokeProject"
const projectID = "proj_smoke_timeline"
const model = { providerID: "opencode", modelID: "claude-opus-4-6", variant: "max" }
type MessageInfo = Record<string, unknown> & { id: string; role: "user" | "assistant" }
type MessagePart = Record<string, unknown> & { id: string; type: string; text?: string; tool?: string }
type Message = { info: MessageInfo; parts: MessagePart[] }
function lorem(seed: number, length: number) {
let out = ""
let i = seed
while (out.length < length) {
const word = words[i % words.length]
out += (out ? " " : "") + word
if (i % 17 === 0) out += ".\n\n"
i += 7
}
return out.slice(0, length)
}
function id(prefix: string, value: number) {
return `${prefix}_smoke_${String(value).padStart(4, "0")}`
}
function userMessage(sessionID: string, index: number, textLength: number, diffs: unknown[] = []): Message {
const messageID = id("msg_user", index)
return {
info: {
id: messageID,
sessionID,
role: "user",
time: { created: 1700000000000 + index * 10_000 },
summary: { diffs },
agent: "build",
model,
},
parts: [
{
id: id("prt_user_text", index),
sessionID,
messageID,
type: "text",
text: lorem(index, textLength),
},
],
}
}
function assistantMessage(sessionID: string, index: number, parentID: string, parts: MessagePart[]): Message {
const messageID = id("msg_assistant", index)
return {
info: {
id: messageID,
sessionID,
role: "assistant",
time: { created: 1700000000000 + index * 10_000 + 1_000, completed: 1700000000000 + index * 10_000 + 8_000 },
parentID,
modelID: model.modelID,
providerID: model.providerID,
mode: "build",
agent: "build",
path: { cwd: directory, root: directory },
cost: 0.01,
tokens: { input: 100, output: 200, reasoning: 0, cache: { read: 0, write: 0 } },
variant: "max",
finish: "stop",
},
parts: parts.map((part) => ({
...part,
sessionID,
messageID,
})),
}
}
function textPart(index: number, partIndex: number, length: number): MessagePart {
const prose = lorem(index * 13 + partIndex, length)
const text =
index % 12 === 0
? `${prose}\n\n\`\`\`ts\n${code(index, 80)}\n\`\`\``
: index % 5 === 0
? `${prose}\n\n\`\`\`ts\nexport const value = "${lorem(index, 220)}"\n\`\`\``
: index % 7 === 0
? `${prose}\n\nThe wrapped inline value is \`${lorem(index, 180)}\`.`
: prose
return { id: id(`prt_text_${partIndex}`, index), type: "text", text }
}
function reasoningPart(index: number, partIndex: number, length: number): MessagePart {
return {
id: id(`prt_reasoning_${partIndex}`, index),
type: "reasoning",
text: lorem(index * 19 + partIndex, length),
time: { start: 1700000000000 + index * 10_000, end: 1700000000000 + index * 10_000 + 500 },
}
}
function toolPart(
index: number,
partIndex: number,
tool: string,
input: Record<string, unknown>,
outputLength = 160,
): MessagePart {
const metadata =
tool === "apply_patch"
? { files: [patchFile(index, "update"), patchFile(index + 1, index % 2 === 0 ? "add" : "delete")] }
: tool === "edit" || tool === "write"
? {
filediff: fileDiff(String(input.filePath ?? `src/generated/file-${index}.ts`), index),
diff: patch(index, outputLength),
preview: patch(index + 1, 420),
}
: tool === "question"
? { answers: [["Proceed"], ["Keep sample output"]] }
: {}
return {
id: id(`prt_tool_${tool}_${partIndex}`, index),
type: "tool",
callID: id("call", index * 10 + partIndex),
tool,
state: {
status: "completed",
input,
output: lorem(index * 23 + partIndex, outputLength),
title: tool === "bash" ? "Verify generated output" : input.filePath || input.path || input.pattern || "completed",
metadata,
time: { start: 1700000000000 + index * 10_000, end: 1700000000000 + index * 10_000 + 400 },
},
}
}
function patchFile(seed: number, type: "add" | "update" | "delete") {
return {
filePath: `src/generated/patch-${seed}.ts`,
relativePath: `src/generated/patch-${seed}.ts`,
type,
additions: (seed % 7) + 1,
deletions: type === "add" ? 0 : seed % 4,
patch: patch(seed, 520),
before: type === "add" ? undefined : code(seed, 18),
after: type === "delete" ? undefined : code(seed + 1, 24),
}
}
function fileDiff(file: string, seed: number) {
const lines = seed % 12 === 0 ? 300 : seed % 8 === 0 ? 2 : 38
const before = code(seed, lines, seed % 10 === 0 ? 280 : 32)
const after =
lines === 2
? before.replace("value1", "updatedValue1")
: lines === 300
? code(seed + 1, lines, seed % 10 === 0 ? 280 : 32)
: before.replace("value4", "updatedValue4").replace("value20", "updatedValue20")
return {
file,
additions: lines === 300 ? 300 : lines === 2 ? 1 : 2,
deletions: lines === 300 ? 300 : lines === 2 ? 1 : 2,
before,
after,
}
}
function patch(seed: number, length: number) {
return `diff --git a/src/generated/file-${seed}.ts b/src/generated/file-${seed}.ts\n+${lorem(seed, length).replace(/\n/g, "\n+")}`
}
function code(seed: number, lines: number, width = 32) {
return Array.from(
{ length: lines },
(_, index) => `export const value${index} = "${lorem(seed + index, width)}"`,
).join("\n")
}
function turn(index: number): Message[] {
const diff = index % 9 === 0 ? [fileDiff(`src/generated/summary-${index}.ts`, index)] : []
const user = userMessage(targetID, index, 100 + (index % 4) * 80, diff)
const parts = [
...(index % 5 === 0 ? [reasoningPart(index, 0, 420)] : []),
...(index % 3 === 0
? [
toolPart(index, 0, "read", { filePath: `src/generated/file-${index}.ts`, offset: 0, limit: 80 }, 220),
toolPart(index, 5, "glob", { path: directory, pattern: `**/*sample-${index}*.ts` }, 140),
toolPart(index, 1, "grep", { path: directory, pattern: `sample-${index}`, include: "*.ts" }, 180),
toolPart(index, 6, "list", { path: `src/generated/${index}` }, 120),
]
: []),
textPart(index, 2, 160 + (index % 6) * 90),
...(index % 4 === 0 ? [toolPart(index, 3, "edit", { filePath: `src/generated/file-${index}.ts` }, 700)] : []),
...(index % 6 === 0
? [toolPart(index, 7, "write", { filePath: `src/generated/write-${index}.ts`, content: code(index, 28) }, 560)]
: []),
...(index % 8 === 0
? [toolPart(index, 8, "apply_patch", { files: [`src/generated/patch-${index}.ts`] }, 620)]
: []),
...(index % 7 === 0
? [toolPart(index, 4, "bash", { command: "bun typecheck", description: "Verify generated output" }, 620)]
: []),
...(index % 10 === 0 ? [toolPart(index, 9, "webfetch", { url: "https://example.com/docs/sample" }, 120)] : []),
...(index % 11 === 0 ? [toolPart(index, 10, "websearch", { query: "sample movement notes" }, 240)] : []),
...(index % 13 === 0
? [
toolPart(
index,
11,
"question",
{ questions: [{ question: "Use generated fixture?" }, { question: "Keep same row shape?" }] },
120,
),
]
: []),
...(index % 17 === 0
? [toolPart(index, 12, "task", { description: "Inspect generated fixture", subagent_type: "explore" }, 160)]
: []),
]
return [user, assistantMessage(targetID, index, user.info.id, parts)]
}
const targetMessages = Array.from({ length: 72 }, (_, index) => turn(index)).flat()
const sourceMessages = Array.from({ length: 12 }, (_, index) => [
userMessage(sourceID, index + 1000, 120),
assistantMessage(sourceID, index + 1000, id("msg_user", index + 1000), [textPart(index + 1000, 0, 240)]),
]).flat()
function renderable(part: MessagePart) {
if (part.type === "tool" && part.tool === "todowrite") return false
if (part.type === "text") return !!part.text.trim()
if (part.type === "reasoning") return !!part.text.trim()
return part.type !== "step-start" && part.type !== "step-finish" && part.type !== "patch"
}
function orderedParts(message: Message) {
return message.parts.slice().sort((a, b) => a.id.localeCompare(b.id))
}
export const fixture = {
directory,
project: {
id: projectID,
worktree: directory,
vcs: "git",
name: "smoke-project",
time: { created: 1700000000000, updated: 1700000000000 },
sandboxes: [],
},
provider: {
all: [
{
id: "opencode",
name: "OpenCode",
models: { "claude-opus-4-6": { id: "claude-opus-4-6", name: "Claude Opus 4.6", limit: { context: 200_000 } } },
},
],
connected: ["opencode"],
default: { providerID: "opencode", modelID: "claude-opus-4-6" },
},
sessions: [
{
id: sourceID,
slug: "source",
projectID,
directory,
title: "Uncommitted changes inquiry",
version: "dev",
time: { created: 1700000000000, updated: 1700000000000 },
},
{
id: targetID,
slug: "target",
projectID,
directory,
title: "Example Game: sample jump movement & sample physics analysis",
version: "dev",
time: { created: 1700000001000, updated: 1700000001000 },
},
],
sourceID,
targetID,
messages: { [sourceID]: sourceMessages, [targetID]: targetMessages },
expected: {
sourceTitle: "Uncommitted changes inquiry",
targetTitle: "Example Game: sample jump movement & sample physics analysis",
sourceMessageIDs: sourceMessages
.filter((message) => message.info.role === "user")
.map((message) => message.info.id),
targetMessageIDs: targetMessages
.filter((message) => message.info.role === "user")
.map((message) => message.info.id),
targetPartIDs: targetMessages.flatMap((message) =>
orderedParts(message)
.filter(renderable)
.map((part) => part.id),
),
},
}
export function pageMessages(sessionID: string, limit: number, before?: string) {
const messages = fixture.messages[sessionID as keyof typeof fixture.messages] ?? []
const end = before
? Math.max(
0,
messages.findIndex((message) => message.info.id === before),
)
: messages.length
const start = Math.max(0, end - limit)
return {
items: messages.slice(start, end),
cursor: start > 0 ? messages[start]!.info.id : undefined,
}
}

View File

@ -0,0 +1,67 @@
import type { Page } from "@playwright/test"
import { base64Encode } from "@opencode-ai/core/util/encode"
import { mockOpenCodeServer } from "../../utils/mock-server"
import { fixture } from "./session-timeline-stress.fixture"
export async function installTimelineSettings(page: Page) {
await page.addInitScript(() => {
localStorage.setItem(
"settings.v3",
JSON.stringify({
general: {
editToolPartsExpanded: true,
shellToolPartsExpanded: true,
showReasoningSummaries: true,
showSessionProgressBar: true,
},
}),
)
})
}
export function mockStressTimeline(page: Page) {
return mockOpenCodeServer(page, {
sessions: fixture.sessions,
provider: fixture.provider,
directory: fixture.directory,
project: fixture.project,
pageMessages: (sessionID) => ({ items: fixture.messages[sessionID as keyof typeof fixture.messages] ?? [] }),
})
}
export async function installStressSessionTabs(page: Page) {
const server = `http://${process.env.PLAYWRIGHT_SERVER_HOST ?? "127.0.0.1"}:${process.env.PLAYWRIGHT_SERVER_PORT ?? "4096"}`
await page.addInitScript(
({ directory, sourceID, targetID, dirBase64, server }) => {
localStorage.setItem(
"opencode.global.dat:server",
JSON.stringify({
projects: { local: [{ worktree: directory, expanded: true }] },
lastProject: { local: directory },
}),
)
localStorage.setItem(
"opencode.global.dat:tabs",
JSON.stringify(
[sourceID, targetID].map((sessionId) => ({
type: "session",
server,
dirBase64,
sessionId,
})),
),
)
},
{
directory: fixture.directory,
sourceID: fixture.sourceID,
targetID: fixture.targetID,
dirBase64: base64Encode(fixture.directory),
server,
},
)
}
export function stressSessionHref(sessionID: string) {
return `/${base64Encode(fixture.directory)}/session/${sessionID}`
}

View File

@ -0,0 +1,15 @@
import { expect, test } from "bun:test"
import { mkdtemp, rm } from "node:fs/promises"
import path from "node:path"
import os from "node:os"
import { prepareChromeTrace } from "../chrome-trace"
test("creates the configured trace directory", async () => {
const root = await mkdtemp(path.join(os.tmpdir(), "opencode-trace-"))
try {
const file = await prepareChromeTrace(path.join(root, "nested", "traces"), "session/tab", false, "test")
expect(file).toEndWith("-session-tab-458ed9e3-test.json")
} finally {
await rm(root, { recursive: true, force: true })
}
})

View File

@ -0,0 +1,42 @@
import { expect, test } from "bun:test"
import { compressCachedRepaintTrace, layoutShiftSample } from "../timeline/session-tab-repaint-probe"
test("compresses repeated repaint states without losing frame samples", () => {
const state = {
root: 1,
scrollTop: 10,
scrollHeight: 20,
bottomErrorPx: 0,
last: true,
rows: [{ key: "row", node: 2, top: 0, bottom: 10 }],
mounted: 1,
center: "content",
}
const trace = {
timeOriginEpochMs: 1_000,
startedAtPerformanceMs: 100,
samples: [
{ observedAtMs: 16, ...state, destination: ["target"], source: [] },
{ observedAtMs: 32, ...state, destination: ["target"], source: [] },
{ observedAtMs: 48, ...state, scrollTop: 11, destination: ["target"], source: [] },
],
mutations: [{ observedAtMs: 20, changed: [{ type: "add", node: 2 }] }],
shifts: [{ occurredAtMs: 24, value: 0.1 }],
windowMs: 1_000,
running: false,
stop() {},
}
const compressed = compressCachedRepaintTrace(trace)
const samples = compressed.samples.flatMap((group) =>
group.observedAtMs.map((observedAtMs) => ({ observedAtMs, ...group.state })),
)
expect(samples).toEqual(trace.samples)
expect(compressed.mutations).toEqual(trace.mutations)
expect(compressed.shifts).toEqual(trace.shifts)
})
test("records layout shifts at occurrence time within the probe window", () => {
expect(layoutShiftSample({ startTime: 99, value: 0.1 }, 100)).toBeUndefined()
expect(layoutShiftSample({ startTime: 124, value: 0.2 }, 100)).toEqual({ occurredAtMs: 24, value: 0.2 })
})

View File

@ -0,0 +1,54 @@
import { expect, test } from "bun:test"
import { classifySessionSwitch } from "../timeline/session-tab-switch-metrics"
test("counts source and blank samples before the destination is observed", () => {
const result = classifySessionSwitch([
{ observedAtMs: 16, destination: [], source: ["source"], hasVisibleRows: true, last: false },
{ observedAtMs: 32, destination: [], source: [], hasVisibleRows: false, last: false },
{ observedAtMs: 48, destination: ["destination"], source: [], hasVisibleRows: true, last: true, bottomErrorPx: 0 },
{ observedAtMs: 64, destination: ["destination"], source: [], hasVisibleRows: true, last: true, bottomErrorPx: 0 },
{ observedAtMs: 80, destination: ["destination"], source: [], hasVisibleRows: true, last: true, bottomErrorPx: 0 },
])
expect(result.blankSamples).toBe(1)
expect(result.sourceSamples).toBe(1)
expect(result.unknownSamples).toBe(0)
expect(result.firstDestinationObservedMs).toBe(48)
expect(result.stableObservedMs).toBe(80)
})
test("does not classify mixed source and destination content as correct", () => {
const result = classifySessionSwitch([
{
observedAtMs: 16,
destination: ["destination"],
source: ["source"],
hasVisibleRows: true,
last: true,
bottomErrorPx: 0,
},
{ observedAtMs: 32, destination: ["destination"], source: [], hasVisibleRows: true, last: true, bottomErrorPx: 0 },
{ observedAtMs: 48, destination: ["destination"], source: [], hasVisibleRows: true, last: true, bottomErrorPx: 0 },
{ observedAtMs: 64, destination: ["destination"], source: [], hasVisibleRows: true, last: true, bottomErrorPx: 0 },
])
expect(result.firstCorrectObservedMs).toBe(32)
expect(result.stableObservedMs).toBe(64)
})
test("reports missing correctness without throwing", () => {
const result = classifySessionSwitch([
{
observedAtMs: 16,
destination: ["destination"],
source: ["source"],
hasVisibleRows: true,
last: true,
bottomErrorPx: 0,
},
])
expect(result.firstDestinationObservedMs).toBe(16)
expect(result.firstCorrectObservedMs).toBeNull()
expect(result.stableObservedMs).toBeNull()
})

View File

@ -0,0 +1,14 @@
import { expect, test } from "bun:test"
import { streamChunk } from "../timeline/session-timeline-benchmark.fixture"
import { streamProgress } from "../timeline/session-timeline-stream-probe"
test("classifies emitted stream markers using the fixture cycle", () => {
expect(streamProgress("before stream-17 after stream-18")).toEqual({ index: 18, phase: "boundary" })
expect(streamProgress("before stream-18 after stream-19")).toEqual({ index: 19, phase: "stream" })
expect(streamProgress("benchmark-complete stream-36")).toEqual({ index: 36, phase: "complete" })
expect(streamProgress("no marker")).toEqual({ index: -1, phase: "unknown" })
})
test("emits progress markers at fixture boundaries", () => {
expect(streamProgress(streamChunk(18, 160))).toEqual({ index: 18, phase: "boundary" })
})

View File

@ -0,0 +1,16 @@
import { expect, test } from "bun:test"
import { layoutShiftValue, removeVisibleRow } from "../timeline/session-timeline-stream-probe"
test("excludes layout shifts before the probe window and recent input", () => {
expect(layoutShiftValue({ startTime: 9, value: 0.1 }, 10)).toBeUndefined()
expect(layoutShiftValue({ startTime: 10, value: 0.2, hadRecentInput: true }, 10)).toBeUndefined()
expect(layoutShiftValue({ startTime: 11, value: 0.3 }, 10)).toBe(0.3)
})
test("classifies removed rows from their last painted visibility", () => {
const row = {}
const visible = new Set([row])
expect(removeVisibleRow(visible, row)).toBe(true)
expect(removeVisibleRow(visible, row)).toBe(false)
})

View File

@ -44,7 +44,10 @@ export async function mockOpenCodeServer(page: Page, config: MockServerConfig) {
await page.route("**/*", async (route) => {
const url = new URL(route.request().url())
const targetPort = process.env.PLAYWRIGHT_SERVER_PORT ?? "4096"
if (url.port !== targetPort) return route.fallback()
const appPort = new URL(
process.env.PLAYWRIGHT_BASE_URL ?? `http://127.0.0.1:${process.env.PLAYWRIGHT_PORT ?? "3000"}`,
).port
if (url.port !== targetPort && url.port !== appPort) return route.fallback()
const path = url.pathname
if (path === "/global/event" || path === "/event") return sse(route, config.events?.(), config.eventRetry)
@ -72,7 +75,8 @@ export async function mockOpenCodeServer(page: Page, config: MockServerConfig) {
return json(route, pageData.items, pageData.cursor ? { "x-next-cursor": pageData.cursor } : undefined)
}
return json(route, {})
if (url.port === targetPort && targetPort !== appPort) return json(route, {})
return route.fallback()
})
}

View File

@ -24,7 +24,8 @@
"test:e2e": "playwright test",
"test:e2e:local": "playwright test",
"test:e2e:ui": "playwright test --ui",
"test:e2e:report": "playwright show-report e2e/playwright-report"
"test:e2e:report": "playwright show-report e2e/playwright-report",
"test:bench": "bun test ./e2e/performance/unit && playwright test --config e2e/performance/playwright.config.ts"
},
"license": "MIT",
"devDependencies": {

View File

@ -9,6 +9,7 @@ const reuse = !process.env.CI
const workers = Number(process.env.PLAYWRIGHT_WORKERS ?? (process.env.CI ? 5 : 0)) || undefined
export default defineConfig({
testDir: "./e2e",
testIgnore: process.env.OPENCODE_PERFORMANCE === "1" ? "performance/**/*.test.ts" : "performance/**",
outputDir: "./e2e/test-results",
timeout: 60_000,
expect: {