vault-action ignoreNotFound only suppresses path-level 404, not missing keys within an existing path. Removing the key from vault-action secrets list avoids the 'No match data was found' error when the key is absent. Token is now sourced exclusively from the ai_workspace_auth_token workflow_dispatch input. To use Vault as the default source, store the token there and pass it via the input at dispatch time, or wire a dedicated vault read step when the key is guaranteed to exist. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
487 lines
22 KiB
YAML
487 lines
22 KiB
YAML
name: Deploy AI Workspace (IaC + Ansible + Cloudflare)
|
||
|
||
# =============================================================================
|
||
# IaC ↔ Ansible 动态 inventory 联动的最终部署流水线(矩阵模式)
|
||
#
|
||
# ── TLDR 前置条件(首次运行必须全部就绪)────────────────────────────────────
|
||
#
|
||
# 1. Vault JWT auth(一次性,已存在)
|
||
# - auth/jwt 挂载,oidc_discovery_url = https://token.actions.githubusercontent.com
|
||
#
|
||
# 2. Vault role + policy(已创建)
|
||
# - policy: github-actions-xworkspace-console(读 kv/CICD + kv/openclaw)
|
||
# - role: github-actions-xworkspace-console(JWT,bound 本仓库 OIDC)
|
||
# → 创建/校验命令见 docs/operations/vault-github-actions.md §2
|
||
#
|
||
# 3. Vault KV 必填键(vault kv patch kv/CICD ... / vault kv patch kv/openclaw ...)
|
||
# kv/CICD:
|
||
# VULTR_API_KEY → Vultr 账号 API key(provision 创主机)
|
||
# SSH_PRIVATE_DEPLOY_KEY_B64 → 部署 SSH 私钥 base64(deploy 登录主机,优先)
|
||
# SSH_PRIVATE_DEPLOY_KEY → 同上原始多行格式(回退,二选一必填)
|
||
# CLOUDFLARE_DNS_API_TOKEN → CF Zone DNS Edit token(dns 同步)
|
||
# CLOUDFLARE_API_TOKEN → 兼容旧名;DNS job 优先使用 CLOUDFLARE_DNS_API_TOKEN
|
||
# kv/openclaw:
|
||
# DEEPSEEK_API_KEY → LLM provider key(deploy 注入主机)
|
||
# NVIDIA_API_KEY → 同上
|
||
# OLLAMA_API_KEY → 同上
|
||
#
|
||
# 4. Vault KV 必填键(远端 S3 兼容 state 后端强制启用,缺失即 fail-fast;
|
||
# 不再回退本地 state,确保 destroy 不丢 state)
|
||
# kv/CICD:
|
||
# TF_STATE_ENDPOINT → S3 兼容对象存储 API URL(如 https://<acct>.r2.cloudflarestorage.com)
|
||
# TF_STATE_BUCKET → bucket 名(如 ai-workspace-tfstate)
|
||
# TF_STATE_ACCESS_KEY / TF_STATE_SECRET_KEY → 对象存储凭据
|
||
# TF_STATE_REGION → 地域(Cloudflare R2 必须填 auto;Vultr 填 us-east-1)
|
||
# → 对象存储搭建指南见 docs/operations/iac-prerequisites.md §3
|
||
#
|
||
# 5. ai-workspace-infra 私有仓库(可选加速)
|
||
# - kv/CICD.CODEX_GITHUB_PERSONAL_ACCESS_TOKEN → checkout iac_modules + playbooks
|
||
# - 不填则 actions/checkout 走公开访问(仓库须为 public)
|
||
#
|
||
# 6. SSH 公钥注入 infra hosts.yaml
|
||
# - SSH_PRIVATE_DEPLOY_KEY 对应的公钥须写入
|
||
# ai-workspace-infra/vultr-vps/config/resources/ai-workspace-hosts.yaml
|
||
# 的 ssh_keys[].public,否则 Terraform 创机后 runner 无法 SSH 登录。
|
||
#
|
||
# 7. AI_WORKSPACE_AUTH_TOKEN(LiteLLM 认证 token,存储在 Vault)
|
||
# - 用于 OpenCode ACP adapter 的 LITELLM_MASTER_KEY
|
||
# - 存储位置:vault kv patch kv/CICD AI_WORKSPACE_AUTH_TOKEN=<your-token>
|
||
# - TLDR 生成:python3 -c 'import uuid; print(uuid.uuid4())'
|
||
# - 部署时自动从 Vault 读取,注入 ansible role 的 acp_opencode_auth_token
|
||
#
|
||
# ── 流水线结构 ───────────────────────────────────────────────────────────────
|
||
#
|
||
# provision : 批量起机模式(开关:terraform_action=apply / run_deploy)。
|
||
# 用 vultr-vps/envs/ai-workspace 创建主机(Python+Jinja2 渲染显式
|
||
# HCL,无 for_each),导出 cmdb.json + inventory.ini,并据此动态
|
||
# 生成下游部署矩阵。
|
||
# deploy : 矩阵按主机并行,ssh 到主机本地跑官方引导(curl|bash → host 内部
|
||
# ansible -c local,自动离线包加速)。与用户 self-host 同一路径;
|
||
# 不在 runner 远程跑 all-in-one(会撞 agent_skills delegate_to localhost)。
|
||
# dns : 部署完成后,依据 inventory 的 service_domains/IP 同步 Cloudflare DNS。
|
||
#
|
||
# 数据契约 cmdb.json 由 ai-workspace-infra 的 generate.py 产出,贯穿三个 job。
|
||
#
|
||
# 密钥管理:不使用 GitHub Actions Secrets,统一从 HashiCorp Vault
|
||
# (https://vault.svc.plus) KV 安全获取,认证走 GitHub OIDC(JWT,无静态 token)。
|
||
# - Vault 角色: github-actions-xworkspace-console (jwt auth, audience=vault)
|
||
# - KV 路径: kv/data/CICD(共享 CICD 机密) + kv/data/openclaw(LLM keys)
|
||
# - 详细说明: docs/operations/vault-github-actions.md
|
||
# docs/operations/iac-prerequisites.md
|
||
# =============================================================================
|
||
|
||
on:
|
||
workflow_dispatch:
|
||
inputs:
|
||
infra_ref:
|
||
description: "ai-workspace-infra git ref (iac_modules + playbooks)"
|
||
required: false
|
||
default: "main"
|
||
type: string
|
||
bridge_domain:
|
||
description: "XWORKMATE_BRIDGE_DOMAIN 覆盖(留空则取各主机 CMDB service_domains)"
|
||
required: false
|
||
default: ""
|
||
type: string
|
||
offline_mode:
|
||
description: "on-host 离线包模式: off=在线拉最新 main(默认,离线包落后时用); auto=离线加速; force=强制离线"
|
||
required: false
|
||
default: "off"
|
||
type: choice
|
||
options: ["off", "auto", "force"]
|
||
terraform_action:
|
||
description: "apply 创建/更新,destroy 销毁"
|
||
required: false
|
||
default: "apply"
|
||
type: choice
|
||
options: [apply, destroy]
|
||
run_deploy:
|
||
description: "provision 后是否执行 on-host 引导部署"
|
||
required: false
|
||
default: true
|
||
type: boolean
|
||
run_dns:
|
||
description: "部署后是否同步 Cloudflare DNS"
|
||
required: false
|
||
default: true
|
||
type: boolean
|
||
use_deepseek:
|
||
description: "是否接入 DeepSeek API key"
|
||
required: false
|
||
default: true
|
||
type: boolean
|
||
use_nvidia:
|
||
description: "是否接入 NVIDIA API key"
|
||
required: false
|
||
default: true
|
||
type: boolean
|
||
use_ollama:
|
||
description: "是否接入 Ollama API key"
|
||
required: false
|
||
default: true
|
||
type: boolean
|
||
ai_workspace_auth_token:
|
||
description: "AI Workspace auth token 覆盖(留空则取 Vault kv/CICD/AI_WORKSPACE_AUTH_TOKEN;生成: python3 -c 'import uuid; print(uuid.uuid4())')"
|
||
required: false
|
||
default: ""
|
||
type: string
|
||
|
||
# id-token: write 用于 Vault 的 GitHub OIDC(JWT) 认证;contents: read 拉代码
|
||
permissions:
|
||
contents: read
|
||
id-token: write
|
||
|
||
concurrency:
|
||
group: deploy-ai-workspace-iac
|
||
cancel-in-progress: false
|
||
|
||
env:
|
||
VAULT_ADDR: https://vault.svc.plus
|
||
VAULT_ROLE: github-actions-xworkspace-console
|
||
# 共享 CICD 机密路径(KV v2 读路径含 data/)。键名见 docs/operations/vault-github-actions.md
|
||
VAULT_KV: kv/data/CICD
|
||
# LLM provider keys 放在 openclaw 路径
|
||
VAULT_KV_OPENCLAW: kv/data/openclaw
|
||
# vultr-vps 根(共享 scripts/ templates/ config/);ENV_DIR 为 terraform 运行目录(workdir)
|
||
VPS_ROOT: infra/iac_modules/terraform-hcl-standard/vultr-vps
|
||
ENV_DIR: infra/iac_modules/terraform-hcl-standard/vultr-vps/envs/ai-workspace
|
||
PLAYBOOKS_DIR: infra/playbooks
|
||
|
||
jobs:
|
||
# ---------------------------------------------------------------------------
|
||
provision:
|
||
name: Provision (terraform + render CMDB)
|
||
runs-on: ubuntu-latest
|
||
outputs:
|
||
hosts: ${{ steps.matrix.outputs.hosts }}
|
||
count: ${{ steps.matrix.outputs.count }}
|
||
steps:
|
||
- name: Load Vault secrets (OIDC)
|
||
id: vault
|
||
uses: hashicorp/vault-action@v4
|
||
with:
|
||
url: ${{ env.VAULT_ADDR }}
|
||
method: jwt
|
||
role: ${{ env.VAULT_ROLE }}
|
||
jwtGithubAudience: vault
|
||
ignoreNotFound: true
|
||
secrets: |
|
||
${{ env.VAULT_KV }} VULTR_API_KEY | VULTR_API_KEY ;
|
||
${{ env.VAULT_KV }} TF_STATE_ENDPOINT | TF_STATE_ENDPOINT ;
|
||
${{ env.VAULT_KV }} TF_STATE_BUCKET | TF_STATE_BUCKET ;
|
||
${{ env.VAULT_KV }} TF_STATE_ACCESS_KEY | TF_STATE_ACCESS_KEY ;
|
||
${{ env.VAULT_KV }} TF_STATE_SECRET_KEY | TF_STATE_SECRET_KEY ;
|
||
${{ env.VAULT_KV }} TF_STATE_REGION | TF_STATE_REGION ;
|
||
${{ env.VAULT_KV }} CLOUDFLARE_DNS_API_TOKEN | CLOUDFLARE_DNS_API_TOKEN ;
|
||
${{ env.VAULT_KV }} CLOUDFLARE_API_TOKEN | CLOUDFLARE_API_TOKEN
|
||
|
||
- name: Validate required secrets
|
||
env:
|
||
VULTR_API_KEY: ${{ steps.vault.outputs.VULTR_API_KEY }}
|
||
TF_STATE_ENDPOINT: ${{ steps.vault.outputs.TF_STATE_ENDPOINT }}
|
||
TF_STATE_BUCKET: ${{ steps.vault.outputs.TF_STATE_BUCKET }}
|
||
TF_STATE_ACCESS_KEY: ${{ steps.vault.outputs.TF_STATE_ACCESS_KEY }}
|
||
TF_STATE_SECRET_KEY: ${{ steps.vault.outputs.TF_STATE_SECRET_KEY }}
|
||
TF_STATE_REGION: ${{ steps.vault.outputs.TF_STATE_REGION }}
|
||
CLOUDFLARE_DNS_API_TOKEN: ${{ steps.vault.outputs.CLOUDFLARE_DNS_API_TOKEN }}
|
||
CLOUDFLARE_API_TOKEN: ${{ steps.vault.outputs.CLOUDFLARE_API_TOKEN }}
|
||
run: |
|
||
set -euo pipefail
|
||
# 校验 REQUIRED 机密非空(不打印任何值,仅判空)。
|
||
# 远端 S3 兼容 state 后端为强制要求(默认开启,不再回退本地 state)。
|
||
missing=0
|
||
if [ -z "${VULTR_API_KEY:-}" ]; then
|
||
echo "::error::缺少必需机密 VULTR_API_KEY (Vault: ${VAULT_KV}/VULTR_API_KEY)"
|
||
missing=1
|
||
fi
|
||
for k in TF_STATE_ENDPOINT TF_STATE_BUCKET TF_STATE_ACCESS_KEY TF_STATE_SECRET_KEY TF_STATE_REGION; do
|
||
if [ -z "$(eval echo \"\${$k:-}\")" ]; then
|
||
echo "::error::缺少必需机密 $k (Vault: ${VAULT_KV}/$k) —— 远端 S3 state 后端为强制要求"
|
||
missing=1
|
||
fi
|
||
done
|
||
[ "$missing" -eq 0 ] || { echo "::error::必需机密缺失,终止 provision"; exit 1; }
|
||
|
||
- name: Checkout iac_modules
|
||
uses: actions/checkout@v7
|
||
with:
|
||
repository: ai-workspace-infra/iac_modules
|
||
ref: ${{ github.event.inputs.infra_ref || 'main' }}
|
||
path: infra/iac_modules
|
||
|
||
- name: Checkout playbooks
|
||
uses: actions/checkout@v7
|
||
with:
|
||
repository: ai-workspace-infra/playbooks
|
||
ref: ${{ github.event.inputs.infra_ref || 'main' }}
|
||
path: infra/playbooks
|
||
|
||
- uses: hashicorp/setup-terraform@v3
|
||
with:
|
||
terraform_version: "1.9.8"
|
||
|
||
- uses: actions/setup-python@v6
|
||
with:
|
||
python-version: "3.12"
|
||
|
||
- name: Install render deps
|
||
run: pip install --quiet pyyaml jinja2
|
||
|
||
- name: Configure remote backend (S3-compatible, required)
|
||
working-directory: ${{ env.ENV_DIR }}
|
||
env:
|
||
TF_STATE_ENDPOINT: ${{ steps.vault.outputs.TF_STATE_ENDPOINT }}
|
||
TF_STATE_REGION: ${{ steps.vault.outputs.TF_STATE_REGION }}
|
||
run: python3 $GITHUB_WORKSPACE/${{ env.VPS_ROOT }}/scripts/render_backend_tf.py backend.tf
|
||
|
||
- name: generate.py render (YAML -> 显式 HCL + tfvars)
|
||
working-directory: ${{ env.VPS_ROOT }}
|
||
run: python3 scripts/generate.py render
|
||
|
||
- name: Terraform init
|
||
working-directory: ${{ env.ENV_DIR }}
|
||
env:
|
||
AWS_ACCESS_KEY_ID: ${{ steps.vault.outputs.TF_STATE_ACCESS_KEY }}
|
||
AWS_SECRET_ACCESS_KEY: ${{ steps.vault.outputs.TF_STATE_SECRET_KEY }}
|
||
TF_STATE_ENDPOINT: ${{ steps.vault.outputs.TF_STATE_ENDPOINT }}
|
||
TF_STATE_BUCKET: ${{ steps.vault.outputs.TF_STATE_BUCKET }}
|
||
TF_STATE_REGION: ${{ steps.vault.outputs.TF_STATE_REGION }}
|
||
run: |
|
||
set -euo pipefail
|
||
# 远端 S3 兼容 state 后端强制启用(backend.tf 已由上一步渲染);
|
||
# 缺失 bucket 直接失败,不回退本地 state。
|
||
if [ -z "${TF_STATE_BUCKET}" ]; then
|
||
echo "::error::TF_STATE_BUCKET 为空 —— 远端 state 后端为强制要求,终止"
|
||
exit 1
|
||
fi
|
||
terraform init -input=false \
|
||
-backend-config="bucket=${TF_STATE_BUCKET}" \
|
||
-backend-config="key=ai-workspace/terraform.tfstate" \
|
||
-backend-config="region=${TF_STATE_REGION}"
|
||
|
||
- name: Terraform ${{ github.event.inputs.terraform_action || 'apply' }}
|
||
working-directory: ${{ env.ENV_DIR }}
|
||
env:
|
||
AWS_ACCESS_KEY_ID: ${{ steps.vault.outputs.TF_STATE_ACCESS_KEY }}
|
||
AWS_SECRET_ACCESS_KEY: ${{ steps.vault.outputs.TF_STATE_SECRET_KEY }}
|
||
TF_VAR_vultr_api_key: ${{ steps.vault.outputs.VULTR_API_KEY }}
|
||
run: |
|
||
set -euo pipefail
|
||
terraform ${{ github.event.inputs.terraform_action || 'apply' }} -auto-approve -input=false
|
||
|
||
- name: generate.py inventory (terraform output + YAML -> cmdb.json + inventory.ini)
|
||
if: ${{ (github.event.inputs.terraform_action || 'apply') == 'apply' }}
|
||
working-directory: ${{ env.VPS_ROOT }}
|
||
env:
|
||
AWS_ACCESS_KEY_ID: ${{ steps.vault.outputs.TF_STATE_ACCESS_KEY }}
|
||
AWS_SECRET_ACCESS_KEY: ${{ steps.vault.outputs.TF_STATE_SECRET_KEY }}
|
||
run: python3 scripts/generate.py inventory
|
||
|
||
- name: Build deploy matrix from cmdb.json
|
||
id: matrix
|
||
if: ${{ (github.event.inputs.terraform_action || 'apply') == 'apply' }}
|
||
working-directory: ${{ env.ENV_DIR }}
|
||
run: |
|
||
set -euo pipefail
|
||
hosts="$(jq -c 'keys' cmdb.json)"
|
||
echo "hosts=${hosts}" >> "$GITHUB_OUTPUT"
|
||
echo "count=$(jq 'length' cmdb.json)" >> "$GITHUB_OUTPUT"
|
||
echo "matrix hosts: ${hosts}"
|
||
|
||
- name: Upload CMDB + inventory artifact
|
||
if: ${{ (github.event.inputs.terraform_action || 'apply') == 'apply' }}
|
||
uses: actions/upload-artifact@v7
|
||
with:
|
||
name: ai-workspace-cmdb
|
||
path: |
|
||
${{ env.ENV_DIR }}/cmdb.json
|
||
${{ env.ENV_DIR }}/inventory.ini
|
||
if-no-files-found: error
|
||
|
||
# ---------------------------------------------------------------------------
|
||
deploy:
|
||
name: Deploy ${{ matrix.host }} (on-host bootstrap)
|
||
needs: provision
|
||
if: ${{ needs.provision.outputs.count != '0' && (github.event.inputs.run_deploy == 'true' || github.event.inputs.run_deploy == null) }}
|
||
runs-on: ubuntu-latest
|
||
strategy:
|
||
fail-fast: false
|
||
matrix:
|
||
host: ${{ fromJSON(needs.provision.outputs.hosts) }}
|
||
steps:
|
||
# all-in-one 是“在目标主机本地执行”的模型(host 内部 ansible-playbook -c local,
|
||
# 自动走离线包加速)。从 runner 远程跑 all-in-one 会撞 roles/agent_skills 的
|
||
# delegate_to: localhost(写 runner 本地 /root),故 deploy 改为 ssh 到主机本地
|
||
# 跑官方引导脚本——与用户 self-host 的 curl|bash 完全同一路径。
|
||
- name: Load Vault secrets (OIDC)
|
||
id: vault
|
||
uses: hashicorp/vault-action@v4
|
||
with:
|
||
url: ${{ env.VAULT_ADDR }}
|
||
method: jwt
|
||
role: ${{ env.VAULT_ROLE }}
|
||
jwtGithubAudience: vault
|
||
ignoreNotFound: true
|
||
secrets: |
|
||
${{ env.VAULT_KV }} SSH_PRIVATE_DEPLOY_KEY | ANSIBLE_SSH_KEY ;
|
||
${{ env.VAULT_KV }} SSH_PRIVATE_DEPLOY_KEY_B64 | ANSIBLE_SSH_KEY_B64 ;
|
||
${{ env.VAULT_KV_OPENCLAW }} DEEPSEEK_API_KEY | DEEPSEEK_API_KEY ;
|
||
${{ env.VAULT_KV_OPENCLAW }} NVIDIA_API_KEY | NVIDIA_API_KEY ;
|
||
${{ env.VAULT_KV_OPENCLAW }} OLLAMA_API_KEY | OLLAMA_API_KEY
|
||
|
||
- name: Report provider key wiring
|
||
run: |
|
||
set -euo pipefail
|
||
echo "DeepSeek: ${{ github.event.inputs.use_deepseek == 'false' && 'skipped' || 'enabled' }}"
|
||
echo "NVIDIA: ${{ github.event.inputs.use_nvidia == 'false' && 'skipped' || 'enabled' }}"
|
||
echo "Ollama: ${{ github.event.inputs.use_ollama == 'false' && 'skipped' || 'enabled' }}"
|
||
|
||
- name: Validate required secrets
|
||
env:
|
||
ANSIBLE_SSH_KEY: ${{ steps.vault.outputs.ANSIBLE_SSH_KEY }}
|
||
ANSIBLE_SSH_KEY_B64: ${{ steps.vault.outputs.ANSIBLE_SSH_KEY_B64 }}
|
||
DEEPSEEK_API_KEY: ${{ github.event.inputs.use_deepseek == 'false' && '' || steps.vault.outputs.DEEPSEEK_API_KEY }}
|
||
NVIDIA_API_KEY: ${{ github.event.inputs.use_nvidia == 'false' && '' || steps.vault.outputs.NVIDIA_API_KEY }}
|
||
OLLAMA_API_KEY: ${{ github.event.inputs.use_ollama == 'false' && '' || steps.vault.outputs.OLLAMA_API_KEY }}
|
||
run: |
|
||
set -euo pipefail
|
||
# 只校验 REQUIRED 机密非空(不打印任何值,仅判空)。
|
||
missing=0
|
||
# SSH 私钥:B64 与原始至少有一个非空。
|
||
if [ -z "${ANSIBLE_SSH_KEY_B64:-}" ] && [ -z "${ANSIBLE_SSH_KEY:-}" ]; then
|
||
echo "::error::缺少必需机密 SSH 私钥 (Vault: ${VAULT_KV}/SSH_PRIVATE_DEPLOY_KEY_B64 或 ${VAULT_KV}/SSH_PRIVATE_DEPLOY_KEY,至少一个)"
|
||
missing=1
|
||
fi
|
||
if [ "${{ github.event.inputs.use_deepseek || 'true' }}" = "true" ] && [ -z "${DEEPSEEK_API_KEY:-}" ]; then
|
||
echo "::error::缺少必需机密 DEEPSEEK_API_KEY (Vault: ${VAULT_KV_OPENCLAW}/DEEPSEEK_API_KEY)"
|
||
missing=1
|
||
fi
|
||
if [ "${{ github.event.inputs.use_nvidia || 'true' }}" = "true" ] && [ -z "${NVIDIA_API_KEY:-}" ]; then
|
||
echo "::error::缺少必需机密 NVIDIA_API_KEY (Vault: ${VAULT_KV_OPENCLAW}/NVIDIA_API_KEY)"
|
||
missing=1
|
||
fi
|
||
if [ "${{ github.event.inputs.use_ollama || 'true' }}" = "true" ] && [ -z "${OLLAMA_API_KEY:-}" ]; then
|
||
echo "::error::缺少必需机密 OLLAMA_API_KEY (Vault: ${VAULT_KV_OPENCLAW}/OLLAMA_API_KEY)"
|
||
missing=1
|
||
fi
|
||
[ "$missing" -eq 0 ] || { echo "::error::必需机密缺失,终止 deploy"; exit 1; }
|
||
|
||
- name: Checkout xworkspace-console helpers
|
||
uses: actions/checkout@v7
|
||
|
||
- name: Download CMDB (host IP source)
|
||
uses: actions/download-artifact@v8
|
||
with:
|
||
name: ai-workspace-cmdb
|
||
path: cmdb
|
||
|
||
- name: Configure SSH (prefer base64 key, fall back to raw)
|
||
env:
|
||
ANSIBLE_SSH_KEY: ${{ steps.vault.outputs.ANSIBLE_SSH_KEY }}
|
||
ANSIBLE_SSH_KEY_B64: ${{ steps.vault.outputs.ANSIBLE_SSH_KEY_B64 }}
|
||
run: |
|
||
set -euo pipefail
|
||
mkdir -p ~/.ssh
|
||
# 历史约定:优先解码单行 *_B64,再回退原始多行私钥,避免 GitHub Actions
|
||
# 处理多行私钥时的 "Load key ... error in libcrypto"。
|
||
if [ -n "${ANSIBLE_SSH_KEY_B64:-}" ]; then
|
||
printf '%s' "${ANSIBLE_SSH_KEY_B64}" | base64 -d > ~/.ssh/id_deploy
|
||
elif [ -n "${ANSIBLE_SSH_KEY:-}" ]; then
|
||
printf '%s\n' "${ANSIBLE_SSH_KEY}" > ~/.ssh/id_deploy
|
||
else
|
||
echo "::error::Vault 未提供 ANSIBLE_SSH_KEY[_B64]"; exit 1
|
||
fi
|
||
chmod 600 ~/.ssh/id_deploy
|
||
ssh-keygen -y -f ~/.ssh/id_deploy >/dev/null
|
||
|
||
- name: Wait for host SSH
|
||
run: |
|
||
set -euo pipefail
|
||
ip="$(jq -r '.["${{ matrix.host }}"].ip' cmdb/cmdb.json)"
|
||
echo "Waiting for ${{ matrix.host }} (${ip}:22) ..."
|
||
for _ in $(seq 1 60); do
|
||
if nc -z -w 5 "$ip" 22; then echo "SSH up"; exit 0; fi
|
||
sleep 10
|
||
done
|
||
echo "::error::Timed out waiting for ${ip}:22"; exit 1
|
||
|
||
- name: Run on-host bootstrap (curl | bash, local-mode install)
|
||
env:
|
||
MATRIX_HOST: ${{ matrix.host }}
|
||
CMDB_PATH: cmdb/cmdb.json
|
||
SSH_KEY_PATH: ~/.ssh/id_deploy
|
||
# 离线包落后于 main 时用在线模式拉最新 playbook(见 run-on-host-bootstrap.sh)。
|
||
# 离线包重新发布后可设为 auto 恢复离线加速。
|
||
AI_WORKSPACE_OFFLINE_MODE: ${{ github.event.inputs.offline_mode || 'off' }}
|
||
XWORKMATE_BRIDGE_DOMAIN: ${{ github.event.inputs.bridge_domain }}
|
||
AI_WORKSPACE_AUTH_TOKEN: ${{ github.event.inputs.ai_workspace_auth_token }}
|
||
DEEPSEEK_API_KEY: ${{ github.event.inputs.use_deepseek == 'false' && '' || steps.vault.outputs.DEEPSEEK_API_KEY }}
|
||
NVIDIA_API_KEY: ${{ github.event.inputs.use_nvidia == 'false' && '' || steps.vault.outputs.NVIDIA_API_KEY }}
|
||
OLLAMA_API_KEY: ${{ github.event.inputs.use_ollama == 'false' && '' || steps.vault.outputs.OLLAMA_API_KEY }}
|
||
run: bash scripts/run-on-host-bootstrap.sh
|
||
|
||
# ---------------------------------------------------------------------------
|
||
dns:
|
||
name: Sync Cloudflare DNS
|
||
needs: [provision, deploy]
|
||
if: ${{ needs.provision.outputs.count != '0' && (github.event.inputs.run_dns == 'true' || github.event.inputs.run_dns == null) }}
|
||
runs-on: ubuntu-latest
|
||
steps:
|
||
- name: Load Vault secrets (OIDC)
|
||
id: vault
|
||
uses: hashicorp/vault-action@v4
|
||
with:
|
||
url: ${{ env.VAULT_ADDR }}
|
||
method: jwt
|
||
role: ${{ env.VAULT_ROLE }}
|
||
jwtGithubAudience: vault
|
||
ignoreNotFound: true
|
||
secrets: |
|
||
${{ env.VAULT_KV }} CLOUDFLARE_DNS_API_TOKEN | CLOUDFLARE_DNS_API_TOKEN
|
||
|
||
- name: Validate required secrets
|
||
env:
|
||
CLOUDFLARE_DNS_API_TOKEN: ${{ steps.vault.outputs.CLOUDFLARE_DNS_API_TOKEN }}
|
||
run: |
|
||
set -euo pipefail
|
||
# 只校验 REQUIRED 机密非空(不打印任何值,仅判空);INFRA_REPO_TOKEN 可选不校验。
|
||
missing=0
|
||
if [ -z "${CLOUDFLARE_DNS_API_TOKEN:-}" ]; then
|
||
echo "::error::缺少必需机密 CLOUDFLARE_DNS_API_TOKEN (Vault: ${VAULT_KV}/CLOUDFLARE_DNS_API_TOKEN)"
|
||
missing=1
|
||
fi
|
||
[ "$missing" -eq 0 ] || { echo "::error::必需机密缺失,终止 dns"; exit 1; }
|
||
|
||
- name: Checkout playbooks
|
||
uses: actions/checkout@v7
|
||
with:
|
||
repository: ai-workspace-infra/playbooks
|
||
ref: ${{ github.event.inputs.infra_ref || 'main' }}
|
||
path: infra/playbooks
|
||
|
||
- name: Download CMDB + inventory
|
||
uses: actions/download-artifact@v8
|
||
with:
|
||
name: ai-workspace-cmdb
|
||
path: cmdb
|
||
|
||
- uses: actions/setup-python@v6
|
||
with:
|
||
python-version: "3.12"
|
||
|
||
- name: Install Ansible
|
||
run: pip install --quiet ansible
|
||
|
||
- name: Reconcile Cloudflare DNS from inventory
|
||
working-directory: ${{ env.PLAYBOOKS_DIR }}
|
||
env:
|
||
CLOUDFLARE_DNS_API_TOKEN: ${{ steps.vault.outputs.CLOUDFLARE_DNS_API_TOKEN }}
|
||
run: |
|
||
set -euo pipefail
|
||
# 只为本次新建的 ai_workspace 组主机同步 A 记录(域名取各主机
|
||
# service_domains hostvar,内容取其公网 IP),不动其它静态记录。
|
||
ansible-playbook \
|
||
-i "${GITHUB_WORKSPACE}/cmdb/inventory.ini" \
|
||
update_cloudflare_dns.yml \
|
||
-e '{"cloudflare_dns_source_hosts":["ai_workspace"],"cloudflare_dns_static_records":[]}'
|