Compare commits

..

7 Commits

Author SHA1 Message Date
4fc5e380f2
ci: add release/* branch source validation workflow (#11)
release/* 仅接受 hotfix/* 或带 cherry-pick/backport 标签的 PR。
详见 iac_modules/docs/tldr-github-branch-model.md

Co-authored-by: Haitao Pan <haitao.pan@xworkmate.ai>
Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-28 12:12:16 +08:00
Haitao Pan
188ca4ba4a fix(acp): keep artifact scan hints non-blocking 2026-06-27 12:03:08 +08:00
Haitao Pan
0a50621664 fix(acp): remove orphaned S1 test (helper reverted) — keep main compiling
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 06:44:06 +08:00
Haitao Pan
81f65e3308 Merge: T10/T11/T12 observability + revert S1 2026-06-27 06:43:21 +08:00
Haitao Pan
fa9cc78add fix(acp): T10/T11/T12 observability + error semantics; revert S1 (broke main)
T10: gatewayRPCError marks OPENCLAW_GATEWAY_SOCKET_CLOSED with retryable=true,
poll=true so the client degrades to "background/reconnecting" + keeps polling
instead of hard-failing (feeds App T5).
T11: runId-tagged warn logs at the tasks.get unconfirmed-fallback and
run-deadline-interrupt sites, so a runId can be joined across App→bridge→plugin→gateway.
T12: process-level stability counters (gatewaySocketClosed, taskGetUnconfirmedFallback,
runDeadlineInterrupt) exposed via /api/ping.metrics.

Revert S1 (default expectedArtifactDirs): it set requiresExport=true / default dirs
for any artifact-inferring task, which made a gateway run that succeeds with NO
artifact hang "waiting for artifact export" (TestHTTPHandlerGatewayOpenClawHandlesFive
ConcurrentE2ECases + ...WithoutPromptHeuristic went red). The blocking is tied to
expectedArtifactDirs presence in openClawTaskGetRequiresArtifactExport; decoupling
scan-hint from block-on-export needs a careful, separately-tested change. Reverted to
keep main green; S1 to be redesigned (see docs/cases/06 §7).

Full internal/acp suite green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 06:43:21 +08:00
Haitao Pan
02808934c8 Merge: S1 default expectedArtifactDirs (stability — artifact delivery) 2026-06-27 06:31:35 +08:00
Haitao Pan
3c7de420d2 fix(acp): S1 — default expectedArtifactDirs so plugin root-fallback collects artifacts
Live verification (docs/cases/06 §7 S1) showed the session mapping recorded
expectedArtifactDirs:[] for an md-producing task. openclaw-multi-session-plugins
only scans the workspace-root deliverable dirs (reports/, artifacts/, ...) when
expectedArtifactDirs is non-empty; empty → the root fallback is inert, so an agent
that writes news.md to the workspace root (the common case) yields "no files".

openClawArtifactContractForParams now defaults expectedArtifactDirs to
reports//artifacts//exports/ when the task expects artifacts (requiresExport or
inferred requiredExts) but declared no dirs, and marks requiresExport so the export
path runs. Pure-chat turns (no artifact intent) are unaffected.

Test: orchestrator_s1_artifact_dirs_test.go (md task gets dirs+export; chat gets neither).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 06:31:34 +08:00
7 changed files with 139 additions and 3 deletions

View File

@ -0,0 +1,44 @@
name: Validate Release PR
# release/* 分支的发布策略门禁:仅接受 hotfix/* 或带 cherry-pick/backport 标签的 PR。
# 详见 iac_modules/docs/tldr-github-branch-model.md
on:
pull_request_target:
types: [opened, synchronize, reopened, labeled, unlabeled]
permissions:
contents: read
pull-requests: read
jobs:
validate-release-source:
runs-on: ubuntu-latest
if: startsWith(github.base_ref, 'release/')
steps:
- name: Check PR source branch
run: |
SRC="${{ github.head_ref }}"
TGT="${{ github.base_ref }}"
LABELS="${{ join(github.event.pull_request.labels.*.name, ',') }}"
echo "🔍 Validating PR into release branch"
echo " source: $SRC"
echo " target: $TGT"
echo " labels: $LABELS"
if [[ "$SRC" =~ ^hotfix/ ]]; then
echo "✅ Allowed: hotfix/* branch"
exit 0
fi
if [[ "$LABELS" =~ (^|,)(cherry-pick|backport)(,|$) ]]; then
echo "✅ Allowed: cherry-pick/backport labeled PR"
exit 0
fi
echo "❌ Rejected."
echo "release/* 仅接受:"
echo " - 来自 hotfix/* 的 PR"
echo " - 带 cherry-pick 或 backport 标签的 PR已验证 feature 的 backport/cherry-pick"
echo "禁止从 main / develop / feature/* 直接合并到 release/*。"
exit 1

View File

@ -37,6 +37,7 @@ func (s *Server) Handler() http.Handler {
"commit": info.Commit,
"version": info.Version,
"buildDate": info.BuildDate,
"metrics": bridgeStabilityMetricsSnapshot(), // T12
}
body, _ := json.Marshal(resp)
w.Header().Set("Content-Type", "application/json")

29
internal/acp/metrics.go Normal file
View File

@ -0,0 +1,29 @@
package acp
import "sync/atomic"
// 关键稳定性指标T12docs/cases/06 §5
//
// 进程内累计计数,经 /api/ping 暴露,用于把「网关抖动 / run 超时」从靠用户截图
// 变为可监控。三个计数对应三类已知的不稳定来源:
// - gatewaySocketClosed : gatewayRPCError 命中 OPENCLAW_GATEWAY_SOCKET_CLOSED连接断
// - taskGetUnconfirmedFallback: tasks.get 走持久 run 仓兜底gateway 无法确认 runT7
// - runDeadlineInterrupt : run 超过 DeadlineAt 且 gateway 无法确认,回 interruptedT9
var bridgeStabilityMetrics struct {
gatewaySocketClosed atomic.Int64
taskGetUnconfirmedFallback atomic.Int64
runDeadlineInterrupt atomic.Int64
}
func metricGatewaySocketClosedInc() { bridgeStabilityMetrics.gatewaySocketClosed.Add(1) }
func metricTaskGetUnconfirmedFallbackInc() { bridgeStabilityMetrics.taskGetUnconfirmedFallback.Add(1) }
func metricRunDeadlineInterruptInc() { bridgeStabilityMetrics.runDeadlineInterrupt.Add(1) }
// bridgeStabilityMetricsSnapshot 返回当前计数快照,供 /api/ping 输出。
func bridgeStabilityMetricsSnapshot() map[string]any {
return map[string]any{
"gatewaySocketClosed": bridgeStabilityMetrics.gatewaySocketClosed.Load(),
"taskGetUnconfirmedFallback": bridgeStabilityMetrics.taskGetUnconfirmedFallback.Load(),
"runDeadlineInterrupt": bridgeStabilityMetrics.runDeadlineInterrupt.Load(),
}
}

View File

@ -1,6 +1,7 @@
package acp
import (
"log"
"strings"
"time"
@ -125,11 +126,15 @@ func (s *Server) openClawTaskGetGatewayUnconfirmedFallback(params map[string]any
return s.markOpenClawRunDeadlineInterruptedLocked(sess, code, message)
}
// 仍在预算内:合成 running 句柄让客户端继续轮询,不因一次瞬时抖动硬失败。
metricTaskGetUnconfirmedFallbackInc() // T12
running := openClawRunningTaskResult(sess.openClaw)
running["transportDegraded"] = true
if strings.TrimSpace(code) != "" {
running["transportDegradedCode"] = strings.TrimSpace(code)
}
// T11带 runId 的日志,便于与 App / 插件 / 网关四层按 runId 串联。
log.Printf("level=warn component=openclaw_run_registry event=tasks_get_unconfirmed_fallback runId=%q openclawSessionKey=%q code=%q",
sess.openClaw.RunID, sess.openClaw.SessionKey, strings.TrimSpace(code))
sess.lastResult = cloneMap(running)
return running
}
@ -143,6 +148,13 @@ func (s *Server) markOpenClawRunDeadlineInterruptedLocked(sess *session, code st
sess.task.ProgressStage = "interrupted"
sess.task.ProgressMessage = "OpenClaw run exceeded its budget and could not be confirmed"
sess.task.UpdatedAt = now
metricRunDeadlineInterruptInc() // T12
// T11带 runId 的终态日志。
if sess.openClaw != nil {
log.Printf("level=warn component=openclaw_run_registry event=run_deadline_interrupt runId=%q openclawSessionKey=%q deadlineAt=%q code=%q",
sess.openClaw.RunID, sess.openClaw.SessionKey,
sess.openClaw.DeadlineAt.UTC().Format(time.RFC3339Nano), strings.TrimSpace(code))
}
result := map[string]any{
"ok": true,

View File

@ -1687,11 +1687,16 @@ func applyOpenClawConstraintDeliveryStatus(result map[string]any) {
func gatewayRPCError(errorPayload map[string]any, fallback string) *shared.RPCError {
if isOpenClawRetryableGatewayError(errorPayload) {
metricGatewaySocketClosedInc() // T12
// T10连接断属「可重试 / run 可能仍在后台、可续轮询」语义,而非 run 确实失败。
// 带 retryable/poll 提示,客户端据此降级为「后台续跑·重连中」(T5) 续轮询 tasks.get而非硬失败。
return &shared.RPCError{
Code: -32002,
Message: "OPENCLAW_GATEWAY_SOCKET_CLOSED: OpenClaw gateway connection closed during task execution",
Data: map[string]any{
"code": "OPENCLAW_GATEWAY_SOCKET_CLOSED",
"retryable": true,
"poll": true,
"originalCode": strings.TrimSpace(shared.StringArg(errorPayload, "code", "")),
"originalError": strings.TrimSpace(shared.StringArg(errorPayload, "message", "")),
},

View File

@ -132,6 +132,49 @@ func TestNormalizeOpenClawTaskGetUnknownArtifactEvidenceKeepsActiveRecordRunning
}
}
func TestExpectedArtifactDirectoriesDoNotBlockTerminalTaskState(t *testing.T) {
params := map[string]any{"expectedArtifactDirs": []any{"reports/", "artifacts/"}}
payload := map[string]any{
"success": true,
"status": string(TaskStateCompleted),
"artifactScope": "tasks/session/run",
"artifactDirectory": "/remote/openclaw/workspace/tasks/session/run",
"expectedArtifactDirs": []any{
"reports/",
"artifacts/",
},
}
if openClawTaskGetRequiresArtifactExport(params, payload) {
t.Fatal("expectedArtifactDirs must remain non-blocking scan hints")
}
got := normalizeOpenClawTaskGetResult(params, payload, "openclaw", nil)
if status := shared.StringArg(got, "status", ""); status != string(TaskStateCompleted) {
t.Fatalf("expected terminal status to remain completed, got %#v", got)
}
if parseBool(got["pending"]) {
t.Fatalf("expected terminal payload not to become pending, got %#v", got)
}
}
func TestRequiredArtifactExtensionsStillBlockUntilVerified(t *testing.T) {
params := map[string]any{"requiredArtifactExtensions": []any{"md"}}
payload := map[string]any{
"success": true,
"status": string(TaskStateCompleted),
"artifactScope": "tasks/session/run",
"artifactDirectory": "/remote/openclaw/workspace/tasks/session/run",
}
if !openClawTaskGetRequiresArtifactExport(params, payload) {
t.Fatal("requiredArtifactExtensions must remain a blocking delivery contract")
}
got := normalizeOpenClawTaskGetResult(params, payload, "openclaw", nil)
if status := shared.StringArg(got, "status", ""); status != string(TaskStateRunning) {
t.Fatalf("expected missing required artifact to remain syncing, got %#v", got)
}
}
func TestNormalizeOpenClawTaskGetUnknownArtifactEvidenceFailsAfterDeadlineWithoutRequiredArtifacts(t *testing.T) {
payload := map[string]any{
"success": false,

View File

@ -471,9 +471,11 @@ func openClawTaskGetRequiresArtifactExport(params map[string]any, payload map[st
if parseBool(params["requiresExportBeforeFinalResponse"]) || parseBool(payload["requiresExportBeforeFinalResponse"]) {
return true
}
return len(shared.ListArg(params, "expectedArtifactDirs")) > 0 ||
len(shared.ListArg(payload, "expectedArtifactDirs")) > 0 ||
len(shared.ListArg(params, "requiredArtifactExtensions")) > 0 ||
// expectedArtifactDirs are discovery hints for the plugin's workspace-root
// scan. They do not prove that the caller requires a file before the run can
// reach a terminal state. Treating them as a blocking contract turns a
// failed/no-output agent run into an endless "syncing-artifacts" loop.
return len(shared.ListArg(params, "requiredArtifactExtensions")) > 0 ||
len(shared.ListArg(payload, "requiredArtifactExtensions")) > 0
}