fix(ci): add openclaw session contract check to validate stage

The OpenClaw session contract smoke and SSE long-task stream checks used to
live in the Ansible validate role and ran during the Deploy stage. They depend
on the public OpenClaw gateway producing a 'pong' reply, which the bridge
itself cannot guarantee end-to-end. When the gateway returned an empty
completion envelope (CI run 27010307958), the entire Deploy job failed even
though the bridge binary had been installed and was healthy.

Move the lightweight session contract check into the GitHub Actions validate
stage as a new script. Deploy now only asserts the bridge's own state
(binary, ports, /api/ping, /acp/rpc capabilities, routing.resolve), and the
OpenClaw contract check runs in validate where the release-blocking failure
belongs.
This commit is contained in:
Haitao Pan 2026-06-05 19:29:53 +08:00
parent 0e546ccd7b
commit 57ab5711e1
2 changed files with 177 additions and 0 deletions

View File

@ -322,3 +322,8 @@ jobs:
env:
BRIDGE_AUTH_TOKEN: ${{ env.INTERNAL_SERVICE_TOKEN }}
run: bash ./scripts/github-actions/verify-public-rpc-contract.sh
- name: Validate OpenClaw session contract
env:
BRIDGE_AUTH_TOKEN: ${{ env.INTERNAL_SERVICE_TOKEN }}
run: bash ./scripts/github-actions/validate-openclaw-session.sh

View File

@ -0,0 +1,172 @@
#!/usr/bin/env bash
set -euo pipefail
BASE_URL="${BRIDGE_SERVER_URL:-https://xworkmate-bridge.svc.plus}"
AUTH_TOKEN="${BRIDGE_AUTH_TOKEN:-}"
RPC_TIMEOUT_SECONDS="${OPENCLAW_SMOKE_RPC_TIMEOUT_SECONDS:-180}"
POLL_TIMEOUT_SECONDS="${OPENCLAW_SMOKE_POLL_TIMEOUT_SECONDS:-120}"
POLL_INTERVAL_SECONDS="${OPENCLAW_SMOKE_POLL_INTERVAL_SECONDS:-2}"
if [[ -z "${AUTH_TOKEN}" ]]; then
echo "BRIDGE_AUTH_TOKEN is required" >&2
exit 1
fi
normalize_url() {
local raw="$1"
printf '%s\n' "${raw%/}"
}
resolved_base_url="$(normalize_url "${BASE_URL}")"
rpc_url="${resolved_base_url}/acp/rpc"
stream_file="$(mktemp)"
trap 'rm -f "$stream_file"' EXIT
session_id="validate-openclaw-$(date +%s)"
request_body="$(cat <<JSON
{
"jsonrpc": "2.0",
"id": "validate-openclaw",
"method": "session.start",
"params": {
"sessionId": "${session_id}",
"threadId": "${session_id}",
"taskPrompt": "Reply exactly pong.",
"workingDirectory": "/tmp",
"routing": {
"routingMode": "explicit",
"explicitExecutionTarget": "gateway",
"preferredGatewayProviderId": "openclaw"
}
}
}
JSON
)"
echo "OpenClaw smoke -> POST ${rpc_url} (session=${session_id})"
curl --http1.1 --fail --silent --show-error --no-buffer --max-time "${RPC_TIMEOUT_SECONDS}" \
-H "Authorization: Bearer ${AUTH_TOKEN}" \
-H "Origin: https://xworkmate-app.svc.plus" \
-H "Content-Type: application/json" \
-H "Accept: text/event-stream" \
--data "${request_body}" \
"${rpc_url}" > "${stream_file}"
OPENCLAW_AUTH_TOKEN="${AUTH_TOKEN}" \
OPENCLAW_STREAM_FILE="${stream_file}" \
OPENCLAW_POLL_URL="${rpc_url}" \
OPENCLAW_POLL_TIMEOUT_SECONDS="${POLL_TIMEOUT_SECONDS}" \
OPENCLAW_POLL_INTERVAL_SECONDS="${POLL_INTERVAL_SECONDS}" \
python3 - <<'PY'
import json
import os
import sys
import time
import urllib.request
stream_path = os.environ["OPENCLAW_STREAM_FILE"]
poll_url = os.environ["OPENCLAW_POLL_URL"]
auth_token = os.environ["OPENCLAW_AUTH_TOKEN"]
poll_timeout = int(os.environ["OPENCLAW_POLL_TIMEOUT_SECONDS"])
poll_interval = float(os.environ["OPENCLAW_POLL_INTERVAL_SECONDS"])
payloads = []
for block in open(stream_path, encoding="utf-8").read().split("\n\n"):
data_lines = [
line[len("data: "):]
for line in block.splitlines()
if line.startswith("data: ")
]
if not data_lines:
continue
payload = "\n".join(data_lines).strip()
if payload == "[DONE]":
payloads.append({"done": True})
continue
payloads.append(json.loads(payload))
final = next(
(item for item in payloads if isinstance(item, dict) and item.get("id") == "validate-openclaw"),
None,
)
if final is None:
raise SystemExit("missing final OpenClaw result envelope")
if not payloads or payloads[-1].get("done") is not True:
raise SystemExit("missing SSE done marker")
result = final.get("result") or final.get("payload") or {}
if result.get("status") == "running":
session_id = result.get("sessionId")
thread_id = result.get("threadId")
turn_id = result.get("turnId")
run_id = result.get("runId")
deadline = time.time() + poll_timeout
while time.time() < deadline:
req_body = json.dumps({
"jsonrpc": "2.0",
"id": "poll-task",
"method": "xworkmate.tasks.get",
"params": {
"sessionId": session_id,
"threadId": thread_id,
"turnId": turn_id,
"runId": run_id,
},
}).encode("utf-8")
req = urllib.request.Request(
poll_url,
data=req_body,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {auth_token}",
},
)
try:
with urllib.request.urlopen(req) as resp:
resp_data = json.loads(resp.read().decode("utf-8"))
poll_result = resp_data.get("result") or {}
status = poll_result.get("status")
if status in ("completed", "failed", "cancelled"):
result = poll_result
final["result"] = poll_result
break
except Exception as exc:
print(f"poll error: {exc}", file=sys.stderr)
time.sleep(poll_interval)
else:
raise SystemExit("timeout waiting for OpenClaw smoke task to complete")
error_text = json.dumps(final.get("error", {}), ensure_ascii=False)
for code in (
"GATEWAY_PROVIDER_REQUIRED",
"OPENCLAW_GATEWAY_METHOD_NOT_ALLOWED",
"OPENCLAW_GATEWAY_CONFLICT",
"OPENCLAW_TASK_ENDPOINT_REQUIRED",
):
if code in error_text:
raise SystemExit(f"legacy OpenClaw routing error remained: {code}")
final_text = json.dumps(final, ensure_ascii=False)
for marker in (
"Requested agent harness",
"provider is not one of",
"Agent failed before reply",
"ACP_HTTP_",
):
if marker in final_text:
raise SystemExit(f"OpenClaw smoke returned runtime error text: {marker}")
output_text = " ".join(
str(result.get(key, ""))
for key in ("output", "message", "summary", "resultSummary")
)
if "pong" not in output_text.lower():
raise SystemExit(f"OpenClaw smoke did not return pong: {output_text[:500]}")
print("OpenClaw smoke OK: pong received from session contract")
PY