feat: add explicit gateway task case hints for openclaw-gateway-e2e-regression

This commit is contained in:
Haitao Pan 2026-06-17 16:00:54 +08:00
parent 96fbea6d63
commit ab0ecdd005
3 changed files with 216 additions and 34 deletions

View File

@ -16,11 +16,11 @@ legacy `sessionKey` compatibility field.
## 覆盖目标
- 连续出图7 张连续风格 PNG
- 模板出图:参考附件模板生成 7 张连续 PNG
- PDF拆章节、逐章生成图、汇总排版并输出 PDF
- 视频:围绕同一安全演进主线制作测试视频
- 视频流水线:拆章节、逐章调用 Codex/GPT Images、汇总排版并制作视频
- case1采集最新 AI 资讯,输出 Markdown 文件
- case2带图片附件制作视频输出 MP4并保留素材 manifest
- case3围绕安全演进主线制作 7 张连续风格 PNG
- case4围绕安全演进主线输出 5 份平台 Markdown 文案
- case5安全演进拆 7 章,每章调用 Codex/GPT Images 生成图,汇总排版输出 PDF
## 自动化落点
@ -35,6 +35,18 @@ legacy `sessionKey` compatibility field.
以下提示词按原始 E2E 输入记录,作为长期回归 case 的 canonical prompt。
### `OPENCLAW-E2E-000` AI 资讯 Markdown
```text
采集最新AI资讯保存在md文件
```
期望结果:
- 输出 `reports/ai-news-digest.md``reports/sources.md`
- 资讯包含日期、来源链接和摘要。
- 不把浏览器缓存、临时截图或 scratch JSON 当最终 artifact。
### `OPENCLAW-E2E-001` 连续出图
```text
@ -65,15 +77,15 @@ legacy `sessionKey` compatibility field.
### `OPENCLAW-E2E-003` PDF
```text
围绕
从单机权限 → 网络边界 → Web安全 → 云身份 → Zero Trust → AI Agent 身份 → AI模型与知识保护 演进
拆章节 -> 每章调用 Codex -> 每章 GPT images2 生成图 -> 汇总排版 -> 输出 PDF
右侧 artifact栏 显示的陈旧文件
```
期望结果:
- 每章图片素材和最终 PDF 归属当前 task scope。
- PDF 或相关素材出现在当前任务 artifact 区
- 输出 `exports/final.pdf`,并且 `assets/images/` 下有 7 张真实 PNG
- 回归缺陷点:右侧 artifact 栏不能显示其他 run 或历史 workspace 的陈旧文件。
- 如果 OpenClaw 没有实际导出文件App 显示 no exported artifacts而不是旧文件。
@ -91,19 +103,19 @@ legacy `sessionKey` compatibility field.
- 输出视频帧、配置或 MP4 时artifact 只属于当前任务。
- 失败时释放 active slot 并继续 drain 后续任务。
### `OPENCLAW-E2E-005` 视频流水线
### `OPENCLAW-E2E-005` PDF 流水线
```text
围绕
从单机权限 → 网络边界 → Web安全 → 云身份 → Zero Trust → AI Agent 身份 → AI模型与知识保护 演进
拆章节 -> 每章调用 Codex -> 每章 GPT images2 生成图 -> 汇总排版 -> 制作视频
拆章节 -> 每章调用 Codex -> 每章 GPT images2 生成图 -> 汇总排版 -> 输出 PDF
```
期望结果:
- 图片、manifest、视频配置、MP4/ffprobe 等产物按当前 run 隔离。
- 图片、manifest、`article.md`、`workflow.plan.md`、`exports/final.pdf` 等产物按当前 run 隔离。
- Bridge 和 OpenClaw Gateway 只建立稳定连接,不重复并发握手。
- 不出现 `invalid handshake: first request must be connect`、`SOCKET_CLOSED`、`ACP_HTTP_CONNECTION_CLOSED`。

View File

@ -482,6 +482,7 @@ extension AppControllerDesktopThreadActions on AppController {
gatewayTaskMetadataWithArtifactContractInternal(
baseMetadata: dispatch.metadata,
sessionKey: normalizedSessionKey,
userPrompt: message,
localWorkingDirectory: workingDirectory,
executionWorkingDirectory: executionWorkingDirectory,
remoteWorkingDirectoryHint: remoteWorkingDirectoryHint,
@ -1015,6 +1016,7 @@ extension AppControllerDesktopThreadActions on AppController {
Map<String, dynamic> gatewayTaskMetadataWithArtifactContractInternal({
required Map<String, dynamic> baseMetadata,
required String sessionKey,
required String userPrompt,
required String localWorkingDirectory,
required String executionWorkingDirectory,
required String remoteWorkingDirectoryHint,
@ -1022,8 +1024,25 @@ extension AppControllerDesktopThreadActions on AppController {
final localWorkspace = localWorkingDirectory.trim();
final executionWorkspace = executionWorkingDirectory.trim();
final remoteHint = remoteWorkingDirectoryHint.trim();
return <String, dynamic>{
final caseHint = gatewayCaseHintForPromptInternal(userPrompt);
final metadata = <String, dynamic>{
...baseMetadata,
if (caseHint.caseId.isNotEmpty) 'xworkmateCaseId': caseHint.caseId,
if (caseHint.taskLoadClass.isNotEmpty)
'taskLoadClass': caseHint.taskLoadClass,
if (caseHint.requiredArtifactExtensions.isNotEmpty)
'requiredArtifactExtensions': caseHint.requiredArtifactExtensions,
if (caseHint.expectedArtifactExtensions.isNotEmpty)
'expectedArtifactExtensions': caseHint.expectedArtifactExtensions,
};
if (caseHint.expectedFileCountByExtension.isNotEmpty) {
metadata['xworkmateArtifactConstraints'] = <String, dynamic>{
'schemaVersion': 1,
'expectedFileCountByExtension': caseHint.expectedFileCountByExtension,
};
}
return <String, dynamic>{
...metadata,
'xworkmateTaskArtifactContract': <String, dynamic>{
'schemaVersion': 1,
'appThreadKey': sessionKey,
@ -1031,14 +1050,20 @@ extension AppControllerDesktopThreadActions on AppController {
'finalDeliverableDetection': 'remote-runtime',
'requiresExportBeforeFinalResponse': true,
'rejectTextOnlyFileClaims': true,
'expectedArtifactDirs': const <String>[
'artifacts/',
'reports/',
'exports/',
'assets/',
'assets/images/',
'dist/',
],
'expectedArtifactDirs': caseHint.expectedArtifactDirs.isNotEmpty
? caseHint.expectedArtifactDirs
: const <String>[
'artifacts/',
'reports/',
'exports/',
'assets/',
'assets/images/',
'dist/',
],
if (caseHint.requiredArtifactExtensions.isNotEmpty)
'requiredArtifactExtensions': caseHint.requiredArtifactExtensions,
if (caseHint.expectedFileCountByExtension.isNotEmpty)
'expectedFileCountByExtension': caseHint.expectedFileCountByExtension,
'currentTaskWorkspace': executionWorkspace.isNotEmpty
? executionWorkspace
: (remoteHint.isNotEmpty ? remoteHint : localWorkspace),
@ -1048,6 +1073,115 @@ extension AppControllerDesktopThreadActions on AppController {
};
}
GatewayTaskCaseHintInternal gatewayCaseHintForPromptInternal(String prompt) {
final text = prompt.trim();
final lower = text.toLowerCase();
final hasSecurityEvolution =
text.contains('从单机权限') &&
text.contains('网络边界') &&
text.contains('Web安全') &&
text.contains('云身份') &&
text.contains('Zero Trust') &&
text.contains('AI Agent') &&
text.contains('AI模型');
final wantsAiNewsMd =
(text.contains('AI资讯') ||
text.contains('AI 资讯') ||
text.contains('AI新闻') ||
lower.contains('ai news')) &&
(text.contains('md文件') ||
text.contains('Markdown') ||
lower.contains('.md') ||
lower.contains('markdown'));
if (wantsAiNewsMd) {
return const GatewayTaskCaseHintInternal(
caseId: 'case1-ai-news-md',
taskLoadClass: 'long_task',
requiredArtifactExtensions: <String>['md'],
expectedArtifactExtensions: <String>['md'],
expectedArtifactDirs: <String>['reports/', 'artifacts/'],
);
}
final wantsVideo =
text.contains('制作视频') ||
text.contains('测试制作视频') ||
lower.contains('make video') ||
lower.contains('mp4');
final wantsImages =
text.contains('7张') ||
text.contains('七张') ||
text.contains('连续制作') ||
text.contains('系列图片') ||
text.contains('一些列图片') ||
text.contains('一系列图片');
final wantsPdf =
text.contains('输出 PDF') ||
text.contains('输出PDF') ||
text.contains('PDF文件') ||
lower.contains('final.pdf');
final wantsSocialCopy =
text.contains('微信公众号短图文') ||
text.contains('小红书风格') ||
text.contains('X文案串') ||
text.contains('头条号长文');
final wantsChapteredImages =
text.contains('拆章节') ||
text.contains('每章') ||
lower.contains('gpt images') ||
lower.contains('images2');
if (hasSecurityEvolution && wantsPdf && wantsChapteredImages) {
return const GatewayTaskCaseHintInternal(
caseId: 'case5-security-evolution-pdf',
taskLoadClass: 'complex_chain_task',
requiredArtifactExtensions: <String>['pdf', 'png', 'md'],
expectedArtifactExtensions: <String>['pdf', 'png', 'md'],
expectedArtifactDirs: <String>[
'exports/',
'assets/',
'assets/images/',
'prompts/',
'reports/',
],
expectedFileCountByExtension: <String, int>{'pdf': 1, 'png': 7},
);
}
if (hasSecurityEvolution && wantsVideo) {
return const GatewayTaskCaseHintInternal(
caseId: 'case2-security-evolution-video',
taskLoadClass: 'complex_chain_task',
requiredArtifactExtensions: <String>['mp4'],
expectedArtifactExtensions: <String>['mp4', 'png', 'md'],
expectedArtifactDirs: <String>[
'renders/',
'assets/',
'assets/images/',
'exports/',
],
);
}
if (hasSecurityEvolution && wantsImages) {
return const GatewayTaskCaseHintInternal(
caseId: 'case3-security-evolution-seven-images',
taskLoadClass: 'complex_chain_task',
requiredArtifactExtensions: <String>['png'],
expectedArtifactExtensions: <String>['png', 'md'],
expectedArtifactDirs: <String>['assets/', 'assets/images/', 'prompts/'],
expectedFileCountByExtension: <String, int>{'png': 7},
);
}
if (hasSecurityEvolution && wantsSocialCopy) {
return const GatewayTaskCaseHintInternal(
caseId: 'case4-security-evolution-social-copy',
taskLoadClass: 'long_task',
requiredArtifactExtensions: <String>['md'],
expectedArtifactExtensions: <String>['md'],
expectedArtifactDirs: <String>['reports/', 'artifacts/'],
expectedFileCountByExtension: <String, int>{'md': 5},
);
}
return const GatewayTaskCaseHintInternal();
}
bool usesOpenClawGatewayQueueInternal(
AssistantExecutionTarget target,
SingleAgentProvider provider,
@ -1725,3 +1859,21 @@ extension AppControllerDesktopThreadActions on AppController {
}
}
}
class GatewayTaskCaseHintInternal {
const GatewayTaskCaseHintInternal({
this.caseId = '',
this.taskLoadClass = '',
this.requiredArtifactExtensions = const <String>[],
this.expectedArtifactExtensions = const <String>[],
this.expectedArtifactDirs = const <String>[],
this.expectedFileCountByExtension = const <String, int>{},
});
final String caseId;
final String taskLoadClass;
final List<String> requiredArtifactExtensions;
final List<String> expectedArtifactExtensions;
final List<String> expectedArtifactDirs;
final Map<String, int> expectedFileCountByExtension;
}

View File

@ -1358,7 +1358,7 @@ void main() {
});
test(
'sendChatMessage leaves Gateway task classification to the remote runtime',
'sendChatMessage adds explicit case metadata for security video pipelines',
() async {
final fakeGoTaskService = _RecordingGoTaskServiceClient();
final controller = _connectedGatewayController(fakeGoTaskService);
@ -1376,8 +1376,17 @@ void main() {
expect(fakeGoTaskService.requests, hasLength(1));
final request = fakeGoTaskService.requests.single;
expect(request.metadata, isNot(contains('taskLoadClass')));
expect(request.metadata, isNot(contains('expectedArtifactExtensions')));
expect(
request.metadata['xworkmateCaseId'],
'case2-security-evolution-video',
);
expect(request.metadata['taskLoadClass'], 'complex_chain_task');
expect(request.metadata['requiredArtifactExtensions'], <String>['mp4']);
expect(request.metadata['expectedArtifactExtensions'], <String>[
'mp4',
'png',
'md',
]);
expect(request.metadata, contains('xworkmateTaskArtifactContract'));
final artifactContract =
(request.metadata['xworkmateTaskArtifactContract'] as Map)
@ -1388,14 +1397,12 @@ void main() {
expect(artifactContract['finalDeliverableDetection'], 'remote-runtime');
expect(artifactContract['requiresExportBeforeFinalResponse'], isTrue);
expect(artifactContract['expectedArtifactDirs'], const <String>[
'artifacts/',
'reports/',
'exports/',
'renders/',
'assets/',
'assets/images/',
'dist/',
'exports/',
]);
expect(artifactContract, isNot(contains('expectedArtifactExtensions')));
expect(artifactContract['requiredArtifactExtensions'], <String>['mp4']);
expect(request.prompt, isNot(contains('Task load classification:')));
expect(
request.prompt,
@ -1413,7 +1420,7 @@ void main() {
);
test(
'sendChatMessage leaves artifact expectations to the remote runtime',
'sendChatMessage adds explicit case metadata for chaptered PDF pipelines',
() async {
final fakeGoTaskService = _RecordingGoTaskServiceClient();
final controller = _connectedGatewayController(fakeGoTaskService);
@ -1432,8 +1439,16 @@ void main() {
expect(fakeGoTaskService.requests, hasLength(1));
final request = fakeGoTaskService.requests.single;
expect(request.metadata, isNot(contains('taskLoadClass')));
expect(request.metadata, isNot(contains('expectedArtifactExtensions')));
expect(
request.metadata['xworkmateCaseId'],
'case5-security-evolution-pdf',
);
expect(request.metadata['taskLoadClass'], 'complex_chain_task');
expect(request.metadata['requiredArtifactExtensions'], <String>[
'pdf',
'png',
'md',
]);
expect(request.metadata, contains('xworkmateTaskArtifactContract'));
final artifactContract =
(request.metadata['xworkmateTaskArtifactContract'] as Map)
@ -1444,13 +1459,16 @@ void main() {
expect(artifactContract['scopeKind'], 'task');
expect(artifactContract['rejectTextOnlyFileClaims'], isTrue);
expect(artifactContract['expectedArtifactDirs'], const <String>[
'artifacts/',
'reports/',
'exports/',
'assets/',
'assets/images/',
'dist/',
'prompts/',
'reports/',
]);
expect(artifactContract['expectedFileCountByExtension'], <String, int>{
'pdf': 1,
'png': 7,
});
expect(
artifactContract['currentTaskWorkspace'],
request.workingDirectory,