fix: deploy bridge native service as ubuntu user

This commit is contained in:
Haitao Pan 2026-06-06 14:30:26 +08:00
parent 772f47a5fe
commit c19631fd9c
7 changed files with 269 additions and 32 deletions

View File

@ -62,7 +62,16 @@ The deploy stage checks out:
- this service repository into `xworkmate-bridge/`
- the `x-evor/playbooks` repository into `playbooks/`
Then it runs `playbooks/deploy_xworkmate_bridge_vhosts.yml`, which builds the service for `linux/amd64` and deploys it to the target host with Ansible.
Then it installs the native `linux/amd64` bridge binary with
`scripts/github-actions/deploy-native-binary.sh`. The native bridge runs as the
`ubuntu` user's systemd user service:
- binary: `/home/ubuntu/.local/bin/xworkmate-go-core`
- unit: `/home/ubuntu/.config/systemd/user/xworkmate-bridge.service`
- restart: `systemctl --user restart xworkmate-bridge.service`
During migration the script performs a one-time stop/disable of the old system
unit, then deploys and restarts through `ubuntu@<target>`.
### Validate stage

View File

@ -272,7 +272,7 @@ OpenClaw 的 `session.message` 复用同一 `sessionId` / `threadId`,继续提
## 8. 线上环境事实
以下为 2026-05-03 通过 `ssh root@xworkmate-bridge.svc.plus` 核对的部署事实。它们用于 bridge 运维和验证,不属于 APP contract。
以下为 2026-06-06 通过 `ssh ubuntu@xworkmate-bridge.svc.plus` 核对的部署事实。它们用于 bridge 运维和验证,不属于 APP contract。
### Caddy
@ -296,7 +296,7 @@ Authorization: Bearer $BRIDGE_AUTH_TOKEN
| Unit / Runtime | Listener | 说明 |
| --- | --- | --- |
| `xworkmate-bridge.service` | `127.0.0.1:8787` | Public bridge origin |
| `~/.config/systemd/user/xworkmate-bridge.service` | `127.0.0.1:8787` | Public bridge origin, runs as `ubuntu`, binary at `/home/ubuntu/.local/bin/xworkmate-go-core` |
| `acp-codex.service` | `127.0.0.1:9001` | Codex ACP backend |
| `acp-gemini.service` | `127.0.0.1:8791` | Gemini adapter |
| `acp-hermes.service` | `127.0.0.1:3920` | Hermes adapter |
@ -305,7 +305,7 @@ Authorization: Bearer $BRIDGE_AUTH_TOKEN
这些地址只允许 bridge 内部使用。APP 不保存、不展示、不请求这些地址。
验证时 `xworkmate-bridge`、`acp-codex`、`acp-gemini`、`acp-hermes`、`acp-opencode` 均为 `active``openclaw-gateway.service` 返回 `inactive`,但 `ss` 显示 `openclaw` 进程仍监听 `127.0.0.1:18789``[::1]:18789`。因此 APP contract 只记录 `openclaw` 作为 `gatewayProviders` 能力,不把 systemd unit 状态作为 APP 可见状态。
验证时 `systemctl --user status xworkmate-bridge.service` 为 `active`,系统级 `/etc/systemd/system/xworkmate-bridge.service``disabled/inactive`。`acp-codex`、`acp-gemini`、`acp-hermes`、`acp-opencode` 仍由现有本机 service 提供。APP contract 只记录 provider/gateway 能力,不把 systemd unit 类型作为 APP 可见状态。
## 9. 线上验证结果

View File

@ -79,6 +79,19 @@ func (s *Server) executeSessionTask(t task) (map[string]any, *shared.RPCError) {
}, t.notify)
}
func TestOpenClawTaskLookupParamsIncludesWorkspaceDir(t *testing.T) {
params := openClawTaskLookupParams(map[string]any{
"appThreadKey": "draft:sample-task",
"openclawSessionKey": "agent:main:draft:sample-task",
"runId": "turn-sample",
"includeArtifacts": true,
})
if got := shared.StringArg(params, "workspaceDir", ""); got != "~/.openclaw/workspace" {
t.Fatalf("expected default OpenClaw workspaceDir, got %#v", params)
}
}
func newExternalSingleAgentProvider(
t *testing.T,
providerID string,

View File

@ -203,11 +203,17 @@ func openClawTaskLookupParams(params map[string]any) map[string]any {
"includeArtifacts",
"includeContent",
"expectedArtifactDirs",
"workspaceDir",
} {
if value, ok := params[key]; ok {
result[key] = value
}
}
if strings.TrimSpace(shared.StringArg(result, "workspaceDir", "")) == "" {
if workspaceDir := openClawArtifactWorkspaceDir(params); workspaceDir != "" {
result["workspaceDir"] = workspaceDir
}
}
return result
}

View File

@ -4,16 +4,31 @@ set -euo pipefail
TARGET_HOST="${1:?target host is required}"
BINARY_PATH="${2:?binary path is required}"
EXPECTED_COMMIT="${3:?expected short commit is required}"
DEPLOY_USER="${DEPLOY_USER:-ubuntu}"
REMOTE_TMP="/tmp/xworkmate-bridge-${EXPECTED_COMMIT}"
REMOTE_BINARY="${REMOTE_BINARY:-/usr/local/bin/xworkmate-go-core}"
STALE_DROPIN="/etc/systemd/system/xworkmate-bridge.service.d/10-hotfix-openclaw-artifacts.conf"
SERVICE_NAME="xworkmate-bridge.service"
REMOTE_BINARY="${REMOTE_BINARY:-/home/${DEPLOY_USER}/.local/bin/xworkmate-go-core}"
REMOTE_WORKING_DIR="${REMOTE_WORKING_DIR:-/opt/cloud-neutral/xworkmate-bridge}"
BRIDGE_CONFIG_PATH="${BRIDGE_CONFIG_PATH:-/opt/cloud-neutral/xworkmate-bridge/config.yaml}"
SERVICE_NAME="${SERVICE_NAME:-xworkmate-bridge.service}"
SERVICE_LISTEN_ADDR="${SERVICE_LISTEN_ADDR:-127.0.0.1:8787}"
USER_SYSTEMD_DIR="${USER_SYSTEMD_DIR:-/home/${DEPLOY_USER}/.config/systemd/user}"
SYSTEM_SERVICE_NAME="${SYSTEM_SERVICE_NAME:-xworkmate-bridge.service}"
MIGRATE_SYSTEM_SERVICE="${MIGRATE_SYSTEM_SERVICE:-true}"
SYSTEM_MIGRATION_USER="${SYSTEM_MIGRATION_USER:-root}"
STALE_DROPIN="${STALE_DROPIN:-/etc/systemd/system/xworkmate-bridge.service.d/10-hotfix-openclaw-artifacts.conf}"
DEPLOY_NATIVE_SKIP_PROC_CHECK="${DEPLOY_NATIVE_SKIP_PROC_CHECK:-false}"
if [[ ! "${TARGET_HOST}" =~ ^[A-Za-z0-9._-]+$ ]]; then
echo "invalid target host: ${TARGET_HOST}" >&2
exit 1
fi
if [[ ! "${DEPLOY_USER}" =~ ^[A-Za-z0-9._-]+$ ]]; then
echo "invalid deploy user: ${DEPLOY_USER}" >&2
exit 1
fi
if [[ ! "${EXPECTED_COMMIT}" =~ ^[0-9a-f]{7,40}$ ]]; then
echo "invalid expected commit: ${EXPECTED_COMMIT}" >&2
exit 1
@ -24,34 +39,51 @@ if [[ ! -f "${BINARY_PATH}" ]]; then
exit 1
fi
chmod +x "${BINARY_PATH}"
scp -q "${BINARY_PATH}" "root@${TARGET_HOST}:${REMOTE_TMP}"
ssh "root@${TARGET_HOST}" "EXPECTED_COMMIT='${EXPECTED_COMMIT}' REMOTE_TMP='${REMOTE_TMP}' REMOTE_BINARY='${REMOTE_BINARY}' STALE_DROPIN='${STALE_DROPIN}' SERVICE_NAME='${SERVICE_NAME}' bash -s" <<'REMOTE'
set -euo pipefail
had_immutable=0
restore_immutable() {
if [[ "${had_immutable}" == "1" ]] && command -v chattr >/dev/null 2>&1 && [[ -e "${REMOTE_BINARY}" ]]; then
chattr +i "${REMOTE_BINARY}" 2>/dev/null || true
fi
escape_systemd_env() {
python3 - "$1" <<'PY'
import sys
value = sys.argv[1]
print(value.replace("\\", "\\\\").replace('"', '\\"'))
PY
}
trap restore_immutable EXIT
if command -v lsattr >/dev/null 2>&1 && [[ -e "${REMOTE_BINARY}" ]]; then
attrs="$(lsattr "${REMOTE_BINARY}" 2>/dev/null || true)"
if [[ "${attrs}" == *i* ]]; then
had_immutable=1
chattr -i "${REMOTE_BINARY}"
fi
AUTH_TOKEN_LINE=""
if [[ -n "${BRIDGE_AUTH_TOKEN:-}" ]]; then
AUTH_TOKEN_LINE="Environment=\"BRIDGE_AUTH_TOKEN=$(escape_systemd_env "${BRIDGE_AUTH_TOKEN}")\""
fi
install -o root -g root -m 0755 "${REMOTE_TMP}" "${REMOTE_BINARY}"
REVIEW_TOKEN_LINE=""
if [[ -n "${BRIDGE_REVIEW_AUTH_TOKEN:-}" ]]; then
REVIEW_TOKEN_LINE="Environment=\"BRIDGE_REVIEW_AUTH_TOKEN=$(escape_systemd_env "${BRIDGE_REVIEW_AUTH_TOKEN}")\""
fi
UNIT_ENV_LINES_B64="$(printf '%s\n%s\n' "${AUTH_TOKEN_LINE}" "${REVIEW_TOKEN_LINE}" | base64 | tr -d '\n')"
restore_immutable
chmod +x "${BINARY_PATH}"
if [[ "${MIGRATE_SYSTEM_SERVICE}" == "true" ]]; then
ssh "${SYSTEM_MIGRATION_USER}@${TARGET_HOST}" \
"SYSTEM_SERVICE_NAME='${SYSTEM_SERVICE_NAME}' STALE_DROPIN='${STALE_DROPIN}' bash -s" <<'REMOTE_ROOT'
set -euo pipefail
if systemctl list-unit-files "${SYSTEM_SERVICE_NAME}" >/dev/null 2>&1; then
systemctl disable --now "${SYSTEM_SERVICE_NAME}" >/dev/null 2>&1 || systemctl stop "${SYSTEM_SERVICE_NAME}" >/dev/null 2>&1 || true
fi
rm -f "${STALE_DROPIN}"
rmdir --ignore-fail-on-non-empty "$(dirname "${STALE_DROPIN}")" 2>/dev/null || true
systemctl daemon-reload
REMOTE_ROOT
fi
scp -q "${BINARY_PATH}" "${DEPLOY_USER}@${TARGET_HOST}:${REMOTE_TMP}"
ssh "${DEPLOY_USER}@${TARGET_HOST}" \
"EXPECTED_COMMIT='${EXPECTED_COMMIT}' REMOTE_TMP='${REMOTE_TMP}' REMOTE_BINARY='${REMOTE_BINARY}' REMOTE_WORKING_DIR='${REMOTE_WORKING_DIR}' BRIDGE_CONFIG_PATH='${BRIDGE_CONFIG_PATH}' SERVICE_NAME='${SERVICE_NAME}' SERVICE_LISTEN_ADDR='${SERVICE_LISTEN_ADDR}' USER_SYSTEMD_DIR='${USER_SYSTEMD_DIR}' SYSTEM_SERVICE_NAME='${SYSTEM_SERVICE_NAME}' UNIT_ENV_LINES_B64='${UNIT_ENV_LINES_B64}' DEPLOY_NATIVE_SKIP_PROC_CHECK='${DEPLOY_NATIVE_SKIP_PROC_CHECK}' bash -s" <<'REMOTE'
set -euo pipefail
mkdir -p "$(dirname "${REMOTE_BINARY}")" "${USER_SYSTEMD_DIR}"
install -m 0755 "${REMOTE_TMP}" "${REMOTE_BINARY}"
rm -f "${REMOTE_TMP}"
version_json="$("${REMOTE_BINARY}" version)"
actual_commit="$(VERSION_JSON="${version_json}" python3 - <<'PY'
@ -66,14 +98,73 @@ if [[ "${actual_commit}" != "${EXPECTED_COMMIT}" ]]; then
exit 1
fi
systemctl daemon-reload
systemctl restart "${SERVICE_NAME}"
unit_env_lines="$(printf '%s' "${UNIT_ENV_LINES_B64}" | base64 -d | sed '/^$/d')"
existing_env="$(
{
systemctl --user show -p Environment --value "${SERVICE_NAME}" 2>/dev/null || true
systemctl show -p Environment --value "${SYSTEM_SERVICE_NAME}" 2>/dev/null || true
} | sed '/^$/d' | head -n 1
)"
unit_env_lines="$(
UNIT_ENV_LINES="${unit_env_lines}" EXISTING_ENV="${existing_env}" python3 - <<'PY'
import os
import shlex
lines = [line for line in os.environ.get("UNIT_ENV_LINES", "").splitlines() if line.strip()]
present = set()
for line in lines:
prefix = 'Environment="'
if line.startswith(prefix):
key = line[len(prefix):].split("=", 1)[0]
present.add(key)
for item in shlex.split(os.environ.get("EXISTING_ENV", "")):
key, sep, value = item.partition("=")
if sep and key in {"BRIDGE_AUTH_TOKEN", "BRIDGE_REVIEW_AUTH_TOKEN"} and key not in present:
escaped = value.replace("\\", "\\\\").replace('"', '\\"')
lines.append(f'Environment="{key}={escaped}"')
present.add(key)
print("\n".join(lines))
PY
)"
cat >"${USER_SYSTEMD_DIR}/${SERVICE_NAME}" <<UNIT
[Unit]
Description=XWorkmate bridge control plane
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
WorkingDirectory=${REMOTE_WORKING_DIR}
Environment="HOME=/home/${USER}"
Environment="TERM=xterm-256color"
Environment="BRIDGE_CONFIG_PATH=${BRIDGE_CONFIG_PATH}"
${unit_env_lines}
ExecStart=${REMOTE_BINARY} serve --listen ${SERVICE_LISTEN_ADDR}
Restart=always
RestartSec=2
[Install]
WantedBy=default.target
UNIT
systemctl --user daemon-reload
systemctl --user enable "${SERVICE_NAME}" >/dev/null
if systemctl is-active --quiet "${SYSTEM_SERVICE_NAME}" 2>/dev/null; then
echo "${SYSTEM_SERVICE_NAME} is still active as a system service; disable it before starting the user service" >&2
exit 1
fi
systemctl --user restart "${SERVICE_NAME}"
deadline=$((SECONDS + 20))
actual_exe=""
pid=""
while (( SECONDS < deadline )); do
pid="$(systemctl show -p MainPID --value "${SERVICE_NAME}")"
if [[ -n "${pid}" && "${pid}" != "0" && -e "/proc/${pid}/exe" ]]; then
pid="$(systemctl --user show -p MainPID --value "${SERVICE_NAME}")"
if [[ -n "${pid}" && "${pid}" != "0" ]] && [[ "${DEPLOY_NATIVE_SKIP_PROC_CHECK}" == "true" || -e "/proc/${pid}/exe" ]]; then
actual_exe="$(readlink -f "/proc/${pid}/exe" 2>/dev/null || true)"
if [[ "${actual_exe}" == "${REMOTE_BINARY}" ]]; then
exit 0
@ -82,7 +173,7 @@ while (( SECONDS < deadline )); do
sleep 1
done
if [[ -z "${pid}" || "${pid}" == "0" ]]; then
echo "${SERVICE_NAME} did not start" >&2
echo "${SERVICE_NAME} did not start as a user service" >&2
exit 1
fi
echo "${SERVICE_NAME} is not running ${REMOTE_BINARY}; pid=${pid}; actual=${actual_exe:-unknown}" >&2

View File

@ -5,3 +5,4 @@ go mod download
go mod verify
golangci-lint run ./...
go test ./...
bash scripts/github-actions/test-deploy-native-binary.sh

View File

@ -0,0 +1,117 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
SCRIPT_PATH="${ROOT_DIR}/scripts/github-actions/deploy-native-binary.sh"
EXPECTED_COMMIT="425a38f"
fail() {
printf 'FAIL: %s\n' "$*" >&2
exit 1
}
assert_contains() {
local haystack="$1"
local needle="$2"
if [[ "${haystack}" != *"${needle}"* ]]; then
fail "expected output to contain: ${needle}"
fi
}
tmp_dir="$(mktemp -d)"
trap 'rm -rf "${tmp_dir}"' EXIT
mkdir -p "${tmp_dir}/bin" "${tmp_dir}/remote/tmp" "${tmp_dir}/remote/opt/cloud-neutral/xworkmate-bridge"
fake_binary="${tmp_dir}/xworkmate-bridge"
cat >"${fake_binary}" <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
if [[ "${1:-}" == "version" ]]; then
printf '{"commit":"425a38f","version":"test"}\n'
exit 0
fi
sleep 3600
EOF
chmod +x "${fake_binary}"
cat >"${tmp_dir}/bin/scp" <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
src="$2"
dest="$3"
printf 'scp %s\n' "${dest}" >>"${FAKE_DEPLOY_LOG}"
dest_path="${dest#*:}"
cp "${src}" "${dest_path}"
EOF
cat >"${tmp_dir}/bin/ssh" <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
target="$1"
shift
printf 'ssh %s\n' "${target}" >>"${FAKE_DEPLOY_LOG}"
bash -c "$*"
EOF
cat >"${tmp_dir}/bin/systemctl" <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
printf 'systemctl %s\n' "$*" >>"${FAKE_DEPLOY_LOG}"
if [[ "${1:-}" == "is-active" ]]; then
exit 1
fi
if [[ "${1:-}" == "--user" && "${2:-}" == "show" ]]; then
printf '1\n'
exit 0
fi
exit 0
EOF
cat >"${tmp_dir}/bin/readlink" <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
if [[ "${1:-}" == "-f" && "${2:-}" == /proc/*/exe ]]; then
printf '%s\n' "${REMOTE_BINARY}"
exit 0
fi
/usr/bin/readlink "$@"
EOF
cat >"${tmp_dir}/bin/sleep" <<'EOF'
#!/usr/bin/env bash
set -euo pipefail
exit 0
EOF
chmod +x "${tmp_dir}/bin/"*
log_file="${tmp_dir}/deploy.log"
PATH="${tmp_dir}/bin:${PATH}" \
FAKE_DEPLOY_LOG="${log_file}" \
REMOTE_TMP="${tmp_dir}/remote/tmp/xworkmate-bridge-${EXPECTED_COMMIT}" \
REMOTE_BINARY="${tmp_dir}/remote/home/ubuntu/.local/bin/xworkmate-go-core" \
REMOTE_WORKING_DIR="${tmp_dir}/remote/opt/cloud-neutral/xworkmate-bridge" \
BRIDGE_CONFIG_PATH="${tmp_dir}/remote/opt/cloud-neutral/xworkmate-bridge/config.yaml" \
USER_SYSTEMD_DIR="${tmp_dir}/remote/home/ubuntu/.config/systemd/user" \
DEPLOY_NATIVE_SKIP_PROC_CHECK=true \
BRIDGE_AUTH_TOKEN="test-token" \
bash "${SCRIPT_PATH}" "example.test" "${fake_binary}" "${EXPECTED_COMMIT}"
log_output="$(cat "${log_file}")"
assert_contains "${log_output}" "ssh root@example.test"
assert_contains "${log_output}" "scp ubuntu@example.test:"
assert_contains "${log_output}" "ssh ubuntu@example.test"
assert_contains "${log_output}" "systemctl --user restart xworkmate-bridge.service"
unit_file="${tmp_dir}/remote/home/ubuntu/.config/systemd/user/xworkmate-bridge.service"
if [[ ! -f "${unit_file}" ]]; then
fail "expected user service unit to be written"
fi
unit_output="$(cat "${unit_file}")"
assert_contains "${unit_output}" "ExecStart=${tmp_dir}/remote/home/ubuntu/.local/bin/xworkmate-go-core serve --listen 127.0.0.1:8787"
assert_contains "${unit_output}" 'Environment="BRIDGE_AUTH_TOKEN=test-token"'
assert_contains "${unit_output}" "WantedBy=default.target"
printf 'deploy-native-binary regression tests passed\n'