xworkspace-console/scripts/run-on-host-bootstrap.sh
Haitao Pan 5a76c5ed06 fix(deploy): on-host bootstrap defaults to online mode (pull fixed main playbooks)
The deploy job ran curl|bash with no AI_WORKSPACE_OFFLINE_MODE -> auto -> stale
offline package, which still ships the pinned-Chrome / root-PGDATA playbooks that
were already fixed in playbooks main. Pipeline kept failing at the Chrome task.

- run-on-host-bootstrap.sh: thread AI_WORKSPACE_OFFLINE_MODE (default off) into the
  remote env so the bootstrap git-clones latest main instead of the stale package.
- workflow: add offline_mode input (off|auto|force, default off); flip back to auto
  once the offline package is republished with the fixes.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-25 22:47:14 +08:00

108 lines
3.7 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
set -euo pipefail
cmdb_path=${CMDB_PATH:-cmdb/cmdb.json}
host=${MATRIX_HOST:?MATRIX_HOST is required}
ssh_key=${SSH_KEY_PATH:-"$HOME/.ssh/id_deploy"}
ssh_key="${ssh_key/#\~/$HOME}"
run_id=${GITHUB_RUN_ID:-manual}
ip="$(jq -r --arg host "$host" '.[$host].ip' "$cmdb_path")"
user="$(jq -r --arg host "$host" '.[$host].ansible_user // "root"' "$cmdb_path")"
domain="${XWORKMATE_BRIDGE_DOMAIN:-}"
if [ -z "$domain" ]; then
domain="$(jq -r --arg host "$host" '.[$host].host_vars.service_domains // ""' "$cmdb_path" | cut -d, -f1 | tr -d ' ')"
fi
if [ -z "$ip" ] || [ "$ip" = "null" ]; then
echo "::error::No IP found in ${cmdb_path} for ${host}" >&2
exit 1
fi
ssh_opts=(
-i "$ssh_key"
-o StrictHostKeyChecking=accept-new
-o ServerAliveInterval=30
-o ServerAliveCountMax=60
-o ConnectTimeout=20
-o BatchMode=yes
)
remote_dir="/tmp/xworkspace-bootstrap-${run_id}-${host//[^A-Za-z0-9_.-]/_}"
remote_env="${remote_dir}/env"
remote_log="${remote_dir}/bootstrap.log"
remote_rc="${remote_dir}/bootstrap.rc"
remote_runner="${remote_dir}/run.sh"
echo "Bootstrapping ${host} (${user}@${ip}) on-host, domain=${domain:-<none>} ..."
remote_payload="$(mktemp)"
trap 'rm -f "$remote_payload"' EXIT
# 离线包是按 release 快照打包的;当其落后于 playbooks main如 Chrome 版本钉点、
# postgres PGDATA 属主等已在 main 修复但未重新发包)时,默认 offline=auto 会用到
# 过期 playbook 导致部署失败。默认 off让 on-host 引导在线 git clone 最新 main
# 待离线包重新发布后可改回 auto 以恢复离线加速。
{
printf 'AI_WORKSPACE_OFFLINE_MODE=%q\n' "${AI_WORKSPACE_OFFLINE_MODE:-off}"
printf 'XWORKMATE_BRIDGE_DOMAIN=%q\n' "$domain"
printf 'DEEPSEEK_API_KEY=%q\n' "${DEEPSEEK_API_KEY:-}"
printf 'NVIDIA_API_KEY=%q\n' "${NVIDIA_API_KEY:-}"
printf 'OLLAMA_API_KEY=%q\n' "${OLLAMA_API_KEY:-}"
} > "$remote_payload"
ssh "${ssh_opts[@]}" "${user}@${ip}" "mkdir -p '$remote_dir' && chmod 700 '$remote_dir'"
scp "${ssh_opts[@]}" "$remote_payload" "${user}@${ip}:${remote_env}" >/dev/null
ssh "${ssh_opts[@]}" "${user}@${ip}" "chmod 600 '$remote_env'"
ssh "${ssh_opts[@]}" "${user}@${ip}" "cat > '$remote_runner' && chmod 700 '$remote_runner'" <<'REMOTE_SCRIPT'
#!/usr/bin/env bash
set -euo pipefail
remote_env=$1
remote_log=$2
remote_rc=$3
if [ -f "$remote_rc" ]; then
exit 0
fi
(
set +e
source "$remote_env"
export AI_WORKSPACE_OFFLINE_MODE XWORKMATE_BRIDGE_DOMAIN DEEPSEEK_API_KEY NVIDIA_API_KEY OLLAMA_API_KEY
bash -lc 'curl -sfL https://install.svc.plus/ai-workspace | bash -'
rc=$?
printf '%s\n' "$rc" > "$remote_rc"
exit "$rc"
) > "$remote_log" 2>&1 &
REMOTE_SCRIPT
ssh "${ssh_opts[@]}" "${user}@${ip}" "nohup '$remote_runner' '$remote_env' '$remote_log' '$remote_rc' >/dev/null 2>&1 &"
last_lines=0
while true; do
poll_output="$(ssh "${ssh_opts[@]}" "${user}@${ip}" "if [ -f '$remote_log' ]; then wc -l < '$remote_log'; else echo 0; fi; if [ -f '$remote_rc' ]; then cat '$remote_rc'; else echo RUNNING; fi" 2>/dev/null || true)"
line_count="$(printf '%s\n' "$poll_output" | sed -n '1p')"
rc_value="$(printf '%s\n' "$poll_output" | sed -n '2p')"
case "$line_count" in
''|*[!0-9]*) line_count=0 ;;
esac
if [ "$line_count" -gt "$last_lines" ]; then
start=$((last_lines + 1))
ssh "${ssh_opts[@]}" "${user}@${ip}" "sed -n '${start},${line_count}p' '$remote_log'" || true
last_lines="$line_count"
else
echo "[INFO] Bootstrap still running on ${host}; no new log lines."
fi
if [ "$rc_value" != "RUNNING" ] && [ -n "$rc_value" ]; then
if [ "$rc_value" = "0" ]; then
echo "[SUCCESS] Bootstrap completed on ${host}."
exit 0
fi
echo "::error::Bootstrap failed on ${host} with exit code ${rc_value}."
exit "$rc_value"
fi
sleep 20
done