ci: harden deploy validation retries
This commit is contained in:
parent
425a38f1e8
commit
e3bf2063a2
226
scripts/github-actions/test-validate-deploy.sh
Normal file
226
scripts/github-actions/test-validate-deploy.sh
Normal file
@ -0,0 +1,226 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
||||
SCRIPT_PATH="${ROOT_DIR}/scripts/github-actions/validate-deploy.sh"
|
||||
IMAGE_REF="ghcr.io/x-evor/xworkmate-bridge:425a38f1e8076899400d4a858d4678dffd876afb"
|
||||
|
||||
RUN_OUTPUT=""
|
||||
RUN_STATUS=0
|
||||
RUN_TMP_DIR=""
|
||||
RUN_STATE_DIR=""
|
||||
|
||||
fail() {
|
||||
printf 'FAIL: %s\n' "$*" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
assert_contains() {
|
||||
local haystack="$1"
|
||||
local needle="$2"
|
||||
if [[ "${haystack}" != *"${needle}"* ]]; then
|
||||
fail "expected output to contain: ${needle}"
|
||||
fi
|
||||
}
|
||||
|
||||
assert_not_contains() {
|
||||
local haystack="$1"
|
||||
local needle="$2"
|
||||
if [[ "${haystack}" == *"${needle}"* ]]; then
|
||||
fail "expected output to not contain: ${needle}"
|
||||
fi
|
||||
}
|
||||
|
||||
cleanup_run() {
|
||||
if [[ -n "${RUN_TMP_DIR}" && -d "${RUN_TMP_DIR}" ]]; then
|
||||
rm -rf "${RUN_TMP_DIR}"
|
||||
fi
|
||||
RUN_OUTPUT=""
|
||||
RUN_STATUS=0
|
||||
RUN_TMP_DIR=""
|
||||
RUN_STATE_DIR=""
|
||||
}
|
||||
|
||||
create_fake_tools() {
|
||||
local scenario="$1"
|
||||
local tmp_dir="$2"
|
||||
|
||||
mkdir -p "${tmp_dir}/bin" "${tmp_dir}/state"
|
||||
|
||||
cat >"${tmp_dir}/bin/curl" <<'EOF'
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
state_dir="${FAKE_CURL_STATE_DIR:?}"
|
||||
scenario="${FAKE_CURL_SCENARIO:?}"
|
||||
|
||||
url="${@: -1}"
|
||||
data=""
|
||||
write_out=""
|
||||
|
||||
for ((i = 1; i <= $#; i += 1)); do
|
||||
arg="${!i}"
|
||||
case "${arg}" in
|
||||
--data)
|
||||
next_index=$((i + 1))
|
||||
data="${!next_index}"
|
||||
;;
|
||||
--write-out)
|
||||
next_index=$((i + 1))
|
||||
write_out="${!next_index}"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
counter_file() {
|
||||
printf '%s/%s.count\n' "${state_dir}" "$1"
|
||||
}
|
||||
|
||||
read_count() {
|
||||
local file
|
||||
file="$(counter_file "$1")"
|
||||
if [[ -f "${file}" ]]; then
|
||||
cat "${file}"
|
||||
return
|
||||
fi
|
||||
printf '0\n'
|
||||
}
|
||||
|
||||
bump_count() {
|
||||
local name="$1"
|
||||
local file value
|
||||
file="$(counter_file "${name}")"
|
||||
value=$(( $(read_count "${name}") + 1 ))
|
||||
printf '%s\n' "${value}" >"${file}"
|
||||
printf '%s\n' "${value}"
|
||||
}
|
||||
|
||||
if [[ -n "${write_out}" ]]; then
|
||||
printf '200'
|
||||
exit 0
|
||||
fi
|
||||
|
||||
case "${scenario}" in
|
||||
bridge-timeout)
|
||||
case "${url}" in
|
||||
https://xworkmate-bridge.svc.plus/api/ping)
|
||||
printf '{"status":"ok","image":"ghcr.io/x-evor/xworkmate-bridge:425a38f1e8076899400d4a858d4678dffd876afb","tag":"425a38f1e8076899400d4a858d4678dffd876afb","commit":"425a38f1e8076899400d4a858d4678dffd876afb","version":"425a38f1e8076899400d4a858d4678dffd876afb"}\n'
|
||||
;;
|
||||
https://xworkmate-bridge.svc.plus/)
|
||||
printf 'xworkmate-bridge is running\n'
|
||||
;;
|
||||
https://acp-server.svc.plus/*/acp/rpc)
|
||||
printf '{"jsonrpc":"2.0","result":{"providers":["ok"]}}\n'
|
||||
;;
|
||||
https://xworkmate-bridge.svc.plus/acp/rpc)
|
||||
printf 'curl: (28) Operation timed out after 20001 milliseconds with 0 bytes received\n' >&2
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
printf 'unexpected url in bridge-timeout scenario: %s\n' "${url}" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
retry-success)
|
||||
case "${url}" in
|
||||
https://xworkmate-bridge.svc.plus/api/ping)
|
||||
ping_attempt="$(bump_count ping)"
|
||||
if (( ping_attempt < 3 )); then
|
||||
printf 'curl: (28) Operation timed out after 20001 milliseconds with 0 bytes received\n' >&2
|
||||
exit 1
|
||||
fi
|
||||
printf '{"status":"ok","image":"ghcr.io/x-evor/xworkmate-bridge:425a38f1e8076899400d4a858d4678dffd876afb","tag":"425a38f1e8076899400d4a858d4678dffd876afb","commit":"425a38f1e8076899400d4a858d4678dffd876afb","version":"425a38f1e8076899400d4a858d4678dffd876afb"}\n'
|
||||
;;
|
||||
https://xworkmate-bridge.svc.plus/)
|
||||
printf 'xworkmate-bridge is running\n'
|
||||
;;
|
||||
https://acp-server.svc.plus/*/acp/rpc)
|
||||
printf '{"jsonrpc":"2.0","result":{"providers":["ok"]}}\n'
|
||||
;;
|
||||
https://xworkmate-bridge.svc.plus/acp/rpc)
|
||||
printf '{"jsonrpc":"2.0","result":{"success":true,"output":"pong"}}\n'
|
||||
;;
|
||||
*)
|
||||
printf 'unexpected url in retry-success scenario: %s\n' "${url}" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
*)
|
||||
printf 'unsupported fake curl scenario: %s\n' "${scenario}" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
EOF
|
||||
|
||||
cat >"${tmp_dir}/bin/sleep" <<'EOF'
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
exit 0
|
||||
EOF
|
||||
|
||||
chmod +x "${tmp_dir}/bin/curl" "${tmp_dir}/bin/sleep"
|
||||
}
|
||||
|
||||
run_validate_capture() {
|
||||
local scenario="$1"
|
||||
cleanup_run
|
||||
|
||||
RUN_TMP_DIR="$(mktemp -d)"
|
||||
RUN_STATE_DIR="${RUN_TMP_DIR}/state"
|
||||
create_fake_tools "${scenario}" "${RUN_TMP_DIR}"
|
||||
|
||||
set +e
|
||||
RUN_OUTPUT="$(
|
||||
PATH="${RUN_TMP_DIR}/bin:${PATH}" \
|
||||
FAKE_CURL_SCENARIO="${scenario}" \
|
||||
FAKE_CURL_STATE_DIR="${RUN_STATE_DIR}" \
|
||||
BRIDGE_SERVER_URL="https://xworkmate-bridge.svc.plus" \
|
||||
OPENCLAW_URL="wss://openclaw.svc.plus" \
|
||||
CODEX_RPC_URL="https://acp-server.svc.plus/codex/acp/rpc" \
|
||||
OPENCODE_RPC_URL="https://acp-server.svc.plus/opencode/acp/rpc" \
|
||||
GEMINI_RPC_URL="https://acp-server.svc.plus/gemini/acp/rpc" \
|
||||
INTERNAL_SERVICE_TOKEN="test-token" \
|
||||
bash "${SCRIPT_PATH}" "${IMAGE_REF}" 2>&1
|
||||
)"
|
||||
RUN_STATUS=$?
|
||||
set -e
|
||||
}
|
||||
|
||||
test_bridge_timeout_stops_without_json_decode_noise() {
|
||||
run_validate_capture "bridge-timeout"
|
||||
|
||||
if [[ "${RUN_STATUS}" -eq 0 ]]; then
|
||||
fail "expected bridge-timeout scenario to fail"
|
||||
fi
|
||||
|
||||
assert_contains "${RUN_OUTPUT}" "bridge rpc https://xworkmate-bridge.svc.plus/acp/rpc request failed"
|
||||
assert_not_contains "${RUN_OUTPUT}" "JSONDecodeError"
|
||||
cleanup_run
|
||||
}
|
||||
|
||||
test_ping_retry_reaches_successful_release_validation() {
|
||||
run_validate_capture "retry-success"
|
||||
|
||||
if [[ "${RUN_STATUS}" -ne 0 ]]; then
|
||||
printf '%s\n' "${RUN_OUTPUT}" >&2
|
||||
fail "expected retry-success scenario to pass"
|
||||
fi
|
||||
|
||||
if [[ ! -f "${RUN_STATE_DIR}/ping.count" ]]; then
|
||||
fail "expected ping retry counter to be recorded"
|
||||
fi
|
||||
|
||||
ping_attempts="$(tr -d '\n' <"${RUN_STATE_DIR}/ping.count")"
|
||||
if [[ "${ping_attempts}" != "3" ]]; then
|
||||
fail "expected ping to succeed on third attempt, got ${ping_attempts}"
|
||||
fi
|
||||
|
||||
cleanup_run
|
||||
}
|
||||
|
||||
test_bridge_timeout_stops_without_json_decode_noise
|
||||
test_ping_retry_reaches_successful_release_validation
|
||||
|
||||
printf 'validate-deploy regression tests passed\n'
|
||||
@ -2,6 +2,10 @@
|
||||
set -euo pipefail
|
||||
|
||||
IMAGE_REF="${1:?image_ref is required}"
|
||||
RETRYABLE_TRANSPORT=10
|
||||
RETRYABLE_NOT_READY=11
|
||||
FAST_HTTP_TIMEOUT_SECONDS=20
|
||||
BRIDGE_RPC_TIMEOUT_SECONDS=130
|
||||
|
||||
normalize_url() {
|
||||
local value="$1"
|
||||
@ -55,12 +59,20 @@ OPENCODE_RPC_URL="$(normalize_url "${OPENCODE_RPC_URL:-${5:-https://acp-server.s
|
||||
GEMINI_RPC_URL="$(normalize_url "${GEMINI_RPC_URL:-${6:-https://acp-server.svc.plus/gemini/acp/rpc}}")"
|
||||
AUTH_TOKEN="${BRIDGE_AUTH_TOKEN:-${INTERNAL_SERVICE_TOKEN:-${7:-}}}"
|
||||
|
||||
curl_common=(
|
||||
fast_http_curl_common=(
|
||||
--silent
|
||||
--show-error
|
||||
--fail
|
||||
--location
|
||||
--max-time 20
|
||||
--max-time "${FAST_HTTP_TIMEOUT_SECONDS}"
|
||||
)
|
||||
|
||||
bridge_rpc_curl_common=(
|
||||
--silent
|
||||
--show-error
|
||||
--fail
|
||||
--location
|
||||
--max-time "${BRIDGE_RPC_TIMEOUT_SECONDS}"
|
||||
)
|
||||
|
||||
auth_headers=()
|
||||
@ -68,6 +80,8 @@ if [[ -n "${AUTH_TOKEN}" ]]; then
|
||||
auth_headers+=(-H "Authorization: Bearer ${AUTH_TOKEN}")
|
||||
fi
|
||||
|
||||
# Use explicit assignment guards so transport failures are not swallowed inside
|
||||
# nested command substitutions when bash runs without inherit_errexit.
|
||||
capture_http_response() {
|
||||
local label="$1"
|
||||
shift
|
||||
@ -75,18 +89,63 @@ capture_http_response() {
|
||||
local response
|
||||
if ! response="$(curl "$@" 2>&1)"; then
|
||||
printf '%s request failed: %s\n' "${label}" "${response}" >&2
|
||||
return 1
|
||||
return "${RETRYABLE_TRANSPORT}"
|
||||
fi
|
||||
|
||||
if [[ -z "${response}" ]]; then
|
||||
printf '%s request returned an empty response\n' "${label}" >&2
|
||||
return 1
|
||||
return "${RETRYABLE_TRANSPORT}"
|
||||
fi
|
||||
|
||||
printf '%s\n' "${response}"
|
||||
}
|
||||
|
||||
probe_jsonrpc_capabilities() {
|
||||
should_retry_exit_code() {
|
||||
local exit_code="$1"
|
||||
local allowed="$2"
|
||||
local candidate
|
||||
|
||||
IFS=',' read -r -a candidates <<<"${allowed}"
|
||||
for candidate in "${candidates[@]}"; do
|
||||
if [[ "${exit_code}" == "${candidate}" ]]; then
|
||||
return 0
|
||||
fi
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
run_with_retry() {
|
||||
local label="$1"
|
||||
local attempts="$2"
|
||||
local sleep_seconds="$3"
|
||||
local retryable_codes="$4"
|
||||
shift 4
|
||||
|
||||
local attempt exit_code
|
||||
for ((attempt = 1; attempt <= attempts; attempt += 1)); do
|
||||
if "$@"; then
|
||||
return 0
|
||||
else
|
||||
exit_code=$?
|
||||
fi
|
||||
|
||||
if (( attempt == attempts )) || ! should_retry_exit_code "${exit_code}" "${retryable_codes}"; then
|
||||
return "${exit_code}"
|
||||
fi
|
||||
|
||||
printf '%s attempt %d/%d failed; retrying in %ss\n' \
|
||||
"${label}" \
|
||||
"${attempt}" \
|
||||
"${attempts}" \
|
||||
"${sleep_seconds}" >&2
|
||||
sleep "${sleep_seconds}"
|
||||
done
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
probe_jsonrpc_capabilities_once() {
|
||||
local endpoint="$1"
|
||||
local response
|
||||
local headers=(
|
||||
@ -96,19 +155,28 @@ probe_jsonrpc_capabilities() {
|
||||
|
||||
headers+=("${auth_headers[@]}")
|
||||
|
||||
response="$(
|
||||
if response="$(
|
||||
capture_http_response "capabilities ${endpoint}" \
|
||||
"${curl_common[@]}" \
|
||||
"${fast_http_curl_common[@]}" \
|
||||
"${headers[@]}" \
|
||||
--data '{"jsonrpc":"2.0","id":"cap-1","method":"acp.capabilities"}' \
|
||||
"${endpoint}"
|
||||
)"
|
||||
)"; then
|
||||
:
|
||||
else
|
||||
local exit_code=$?
|
||||
return "${exit_code}"
|
||||
fi
|
||||
|
||||
RESPONSE_JSON="${response}" python3 - <<'PY'
|
||||
import json
|
||||
import os
|
||||
|
||||
payload = json.loads(os.environ["RESPONSE_JSON"])
|
||||
try:
|
||||
payload = json.loads(os.environ["RESPONSE_JSON"])
|
||||
except json.JSONDecodeError as exc:
|
||||
raise SystemExit(f"capabilities response returned invalid JSON: {exc}") from None
|
||||
|
||||
if payload.get("jsonrpc") != "2.0":
|
||||
raise SystemExit("capabilities response missing jsonrpc envelope")
|
||||
|
||||
@ -131,18 +199,23 @@ jsonrpc_bridge_call() {
|
||||
|
||||
headers+=("${auth_headers[@]}")
|
||||
|
||||
response="$(
|
||||
if response="$(
|
||||
capture_http_response "bridge rpc ${BASE_URL}/acp/rpc" \
|
||||
"${curl_common[@]}" \
|
||||
"${bridge_rpc_curl_common[@]}" \
|
||||
"${headers[@]}" \
|
||||
--data "${payload}" \
|
||||
"${BASE_URL}/acp/rpc"
|
||||
)"
|
||||
)"; then
|
||||
:
|
||||
else
|
||||
local exit_code=$?
|
||||
return "${exit_code}"
|
||||
fi
|
||||
|
||||
printf '%s\n' "${response}"
|
||||
}
|
||||
|
||||
probe_bridge_single_agent_smoke() {
|
||||
probe_bridge_single_agent_smoke_once() {
|
||||
local provider_id="$1"
|
||||
local request_id="smoke-${provider_id}-$(date +%s)"
|
||||
local session_id="validate-${provider_id}-$(date +%s)"
|
||||
@ -154,14 +227,22 @@ probe_bridge_single_agent_smoke() {
|
||||
JSON
|
||||
)"
|
||||
|
||||
response="$(jsonrpc_bridge_call "${payload}")"
|
||||
if response="$(jsonrpc_bridge_call "${payload}")"; then
|
||||
:
|
||||
else
|
||||
local exit_code=$?
|
||||
return "${exit_code}"
|
||||
fi
|
||||
|
||||
PROVIDER_ID="${provider_id}" RESPONSE_JSON="${response}" python3 - <<'PY'
|
||||
import json
|
||||
import os
|
||||
|
||||
provider = os.environ["PROVIDER_ID"]
|
||||
payload = json.loads(os.environ["RESPONSE_JSON"])
|
||||
try:
|
||||
payload = json.loads(os.environ["RESPONSE_JSON"])
|
||||
except json.JSONDecodeError as exc:
|
||||
raise SystemExit(f"{provider}: bridge rpc returned invalid JSON: {exc}") from None
|
||||
|
||||
if payload.get("jsonrpc") != "2.0":
|
||||
raise SystemExit(f"{provider}: missing jsonrpc envelope")
|
||||
@ -205,12 +286,12 @@ probe_safe_http_endpoint() {
|
||||
--output /dev/null \
|
||||
--write-out '%{http_code}' \
|
||||
--location \
|
||||
--max-time 20 \
|
||||
--max-time "${FAST_HTTP_TIMEOUT_SECONDS}" \
|
||||
"${auth_headers[@]}" \
|
||||
"${endpoint}" 2>&1
|
||||
)"; then
|
||||
printf 'HTTP probe failed for %s: %s\n' "${endpoint}" "${status}" >&2
|
||||
return 1
|
||||
return "${RETRYABLE_TRANSPORT}"
|
||||
fi
|
||||
|
||||
case "${status}" in
|
||||
@ -224,20 +305,30 @@ probe_safe_http_endpoint() {
|
||||
esac
|
||||
}
|
||||
|
||||
ping_json="$(
|
||||
capture_http_response "bridge ping ${BASE_URL}/api/ping" \
|
||||
"${curl_common[@]}" \
|
||||
"${auth_headers[@]}" \
|
||||
"${BASE_URL}/api/ping"
|
||||
)"
|
||||
wait_for_release_ping_once() {
|
||||
local ping_json
|
||||
|
||||
PING_JSON="${ping_json}" python3 - "${image_ref}" "${tag}" "${commit}" "${version}" <<'PY'
|
||||
if ping_json="$(
|
||||
capture_http_response "bridge ping ${BASE_URL}/api/ping" \
|
||||
"${fast_http_curl_common[@]}" \
|
||||
"${BASE_URL}/api/ping"
|
||||
)"; then
|
||||
:
|
||||
else
|
||||
local exit_code=$?
|
||||
return "${exit_code}"
|
||||
fi
|
||||
|
||||
if PING_JSON="${ping_json}" python3 - "${image_ref}" "${tag}" "${commit}" "${version}" <<'PY'
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
image_ref, tag, commit, version = sys.argv[1:5]
|
||||
payload = json.loads(os.environ["PING_JSON"])
|
||||
try:
|
||||
payload = json.loads(os.environ["PING_JSON"])
|
||||
except json.JSONDecodeError as exc:
|
||||
raise SystemExit(f"bridge ping returned invalid JSON: {exc}") from None
|
||||
|
||||
if payload.get("status") != "ok":
|
||||
raise SystemExit("ping status not ok")
|
||||
@ -254,19 +345,37 @@ if commit and payload.get("commit") != commit:
|
||||
if version and payload.get("version") != version:
|
||||
raise SystemExit(f"expected version {version!r}, got {payload.get('version')!r}")
|
||||
PY
|
||||
then
|
||||
return 0
|
||||
fi
|
||||
|
||||
bridge_root="$(
|
||||
capture_http_response "bridge root ${BASE_URL}/" \
|
||||
"${curl_common[@]}" \
|
||||
"${auth_headers[@]}" \
|
||||
"${BASE_URL}/"
|
||||
)"
|
||||
grep -qi 'xworkmate-bridge' <<<"${bridge_root}"
|
||||
return "${RETRYABLE_NOT_READY}"
|
||||
}
|
||||
|
||||
probe_bridge_root() {
|
||||
local bridge_root
|
||||
|
||||
if bridge_root="$(
|
||||
capture_http_response "bridge root ${BASE_URL}/" \
|
||||
"${fast_http_curl_common[@]}" \
|
||||
"${BASE_URL}/"
|
||||
)"; then
|
||||
:
|
||||
else
|
||||
local exit_code=$?
|
||||
return "${exit_code}"
|
||||
fi
|
||||
|
||||
grep -qi 'xworkmate-bridge' <<<"${bridge_root}"
|
||||
}
|
||||
|
||||
run_with_retry "bridge ping ${BASE_URL}/api/ping" 6 5 "${RETRYABLE_TRANSPORT},${RETRYABLE_NOT_READY}" wait_for_release_ping_once
|
||||
probe_bridge_root
|
||||
|
||||
probe_safe_http_endpoint "${OPENCLAW_HTTP_PROBE_URL}"
|
||||
probe_jsonrpc_capabilities "${CODEX_RPC_URL}"
|
||||
probe_jsonrpc_capabilities "${OPENCODE_RPC_URL}"
|
||||
probe_jsonrpc_capabilities "${GEMINI_RPC_URL}"
|
||||
probe_bridge_single_agent_smoke "codex"
|
||||
probe_bridge_single_agent_smoke "opencode"
|
||||
probe_bridge_single_agent_smoke "gemini"
|
||||
run_with_retry "capabilities ${CODEX_RPC_URL}" 3 5 "${RETRYABLE_TRANSPORT}" probe_jsonrpc_capabilities_once "${CODEX_RPC_URL}"
|
||||
run_with_retry "capabilities ${OPENCODE_RPC_URL}" 3 5 "${RETRYABLE_TRANSPORT}" probe_jsonrpc_capabilities_once "${OPENCODE_RPC_URL}"
|
||||
run_with_retry "capabilities ${GEMINI_RPC_URL}" 3 5 "${RETRYABLE_TRANSPORT}" probe_jsonrpc_capabilities_once "${GEMINI_RPC_URL}"
|
||||
run_with_retry "bridge single-agent smoke codex" 3 10 "${RETRYABLE_TRANSPORT}" probe_bridge_single_agent_smoke_once "codex"
|
||||
run_with_retry "bridge single-agent smoke opencode" 3 10 "${RETRYABLE_TRANSPORT}" probe_bridge_single_agent_smoke_once "opencode"
|
||||
run_with_retry "bridge single-agent smoke gemini" 3 10 "${RETRYABLE_TRANSPORT}" probe_bridge_single_agent_smoke_once "gemini"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user