Replace GitHub Actions Secrets with HashiCorp Vault (https://vault.svc.plus): - permissions: id-token: write; auth via hashicorp/vault-action@v2 (method=jwt, role=github-actions-xworkspace-console, audience=vault) — no static token. - Each job loads only the keys it needs from kv/data/github-actions/xworkspace-console (VULTR_API_KEY, INFRA_REPO_TOKEN, ANSIBLE_SSH_KEY, CLOUDFLARE_API_TOKEN, DEEPSEEK/NVIDIA/OLLAMA_API_KEY, optional TF_STATE_*). - Backend gating now keys off the Vault output (steps.vault.outputs.TF_STATE_BUCKET). - Drop unused 'playbook' input (deploy is on-host bootstrap). Pattern mirrors xworkmate-app/.github/workflows/build-and-release.yml. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
323 lines
13 KiB
YAML
323 lines
13 KiB
YAML
name: Deploy AI Workspace (IaC + Ansible + Cloudflare)
|
||
|
||
# =============================================================================
|
||
# IaC ↔ Ansible 动态 inventory 联动的最终部署流水线(矩阵模式)
|
||
#
|
||
# provision : 批量起机模式(开关:terraform_action=apply / run_deploy)。
|
||
# 用 vultr-vps/envs/ai-workspace 创建主机(Python+Jinja2 渲染显式
|
||
# HCL,无 for_each),导出 cmdb.json + inventory.ini,并据此动态
|
||
# 生成下游部署矩阵。
|
||
# deploy : 矩阵按主机并行,ssh 到主机本地跑官方引导(curl|bash → host 内部
|
||
# ansible -c local,自动离线包加速)。与用户 self-host 同一路径;
|
||
# 不在 runner 远程跑 all-in-one(会撞 agent_skills delegate_to localhost)。
|
||
# dns : 部署完成后,依据 inventory 的 service_domains/IP 同步 Cloudflare DNS。
|
||
#
|
||
# 数据契约 cmdb.json 由 ai-workspace-infra 的 generate.py 产出,贯穿三个 job。
|
||
#
|
||
# 密钥管理:不使用 GitHub Actions Secrets,统一从 HashiCorp Vault
|
||
# (https://vault.svc.plus) KV 安全获取,认证走 GitHub OIDC(JWT,无静态 token)。
|
||
# - Vault 角色: github-actions-xworkspace-console (jwt auth, audience=vault)
|
||
# - KV 路径: kv/data/github-actions/xworkspace-console
|
||
# - 需在该 KV 写入的键:
|
||
# VULTR_API_KEY Vultr API Key(→ TF_VAR_vultr_api_key)
|
||
# INFRA_REPO_TOKEN 可读 ai-workspace-infra 的 PAT(私有仓库时必需)
|
||
# ANSIBLE_SSH_KEY 与 hosts.yaml 公钥配对的 SSH 私钥(连主机用)
|
||
# CLOUDFLARE_API_TOKEN Cloudflare DNS 编辑权限 token
|
||
# DEEPSEEK_API_KEY / NVIDIA_API_KEY / OLLAMA_API_KEY LLM provider keys
|
||
# 可选(远端 TF state,S3 兼容 / Vultr 对象存储):
|
||
# TF_STATE_ENDPOINT TF_STATE_BUCKET TF_STATE_ACCESS_KEY TF_STATE_SECRET_KEY TF_STATE_REGION
|
||
# =============================================================================
|
||
|
||
on:
|
||
workflow_dispatch:
|
||
inputs:
|
||
infra_ref:
|
||
description: "ai-workspace-infra git ref (iac_modules + playbooks)"
|
||
required: false
|
||
default: "main"
|
||
type: string
|
||
terraform_action:
|
||
description: "apply 创建/更新,destroy 销毁"
|
||
required: false
|
||
default: "apply"
|
||
type: choice
|
||
options: [apply, destroy]
|
||
run_deploy:
|
||
description: "provision 后是否执行 on-host 引导部署"
|
||
required: false
|
||
default: true
|
||
type: boolean
|
||
run_dns:
|
||
description: "部署后是否同步 Cloudflare DNS"
|
||
required: false
|
||
default: true
|
||
type: boolean
|
||
|
||
# id-token: write 用于 Vault 的 GitHub OIDC(JWT) 认证;contents: read 拉代码
|
||
permissions:
|
||
contents: read
|
||
id-token: write
|
||
|
||
concurrency:
|
||
group: deploy-ai-workspace-iac
|
||
cancel-in-progress: false
|
||
|
||
env:
|
||
VAULT_ADDR: https://vault.svc.plus
|
||
VAULT_ROLE: github-actions-xworkspace-console
|
||
VAULT_KV: kv/data/github-actions/xworkspace-console
|
||
INFRA_REPO: ${{ github.repository_owner }}/ai-workspace-infra
|
||
# vultr-vps 根(共享 scripts/ templates/ config/);ENV_DIR 为 terraform 运行目录(workdir)
|
||
VPS_ROOT: infra/iac_modules/terraform-hcl-standard/vultr-vps
|
||
ENV_DIR: infra/iac_modules/terraform-hcl-standard/vultr-vps/envs/ai-workspace
|
||
PLAYBOOKS_DIR: infra/playbooks
|
||
|
||
jobs:
|
||
# ---------------------------------------------------------------------------
|
||
provision:
|
||
name: Provision (terraform + render CMDB)
|
||
runs-on: ubuntu-latest
|
||
outputs:
|
||
hosts: ${{ steps.matrix.outputs.hosts }}
|
||
count: ${{ steps.matrix.outputs.count }}
|
||
steps:
|
||
- name: Load Vault secrets (OIDC)
|
||
id: vault
|
||
uses: hashicorp/vault-action@v2
|
||
with:
|
||
url: ${{ env.VAULT_ADDR }}
|
||
method: jwt
|
||
role: ${{ env.VAULT_ROLE }}
|
||
jwtGithubAudience: vault
|
||
ignoreNotFound: true
|
||
secrets: |
|
||
${{ env.VAULT_KV }} VULTR_API_KEY | VULTR_API_KEY ;
|
||
${{ env.VAULT_KV }} INFRA_REPO_TOKEN | INFRA_REPO_TOKEN ;
|
||
${{ env.VAULT_KV }} TF_STATE_ENDPOINT | TF_STATE_ENDPOINT ;
|
||
${{ env.VAULT_KV }} TF_STATE_BUCKET | TF_STATE_BUCKET ;
|
||
${{ env.VAULT_KV }} TF_STATE_ACCESS_KEY | TF_STATE_ACCESS_KEY ;
|
||
${{ env.VAULT_KV }} TF_STATE_SECRET_KEY | TF_STATE_SECRET_KEY ;
|
||
${{ env.VAULT_KV }} TF_STATE_REGION | TF_STATE_REGION
|
||
|
||
- name: Checkout infra (iac_modules + playbooks)
|
||
uses: actions/checkout@v4
|
||
with:
|
||
repository: ${{ env.INFRA_REPO }}
|
||
ref: ${{ github.event.inputs.infra_ref || 'main' }}
|
||
token: ${{ steps.vault.outputs.INFRA_REPO_TOKEN || github.token }}
|
||
path: infra
|
||
|
||
- uses: hashicorp/setup-terraform@v3
|
||
with:
|
||
terraform_version: "1.9.8"
|
||
|
||
- uses: actions/setup-python@v5
|
||
with:
|
||
python-version: "3.12"
|
||
|
||
- name: Install render deps
|
||
run: pip install --quiet pyyaml jinja2
|
||
|
||
- name: Configure remote backend (optional)
|
||
if: ${{ steps.vault.outputs.TF_STATE_BUCKET != '' }}
|
||
working-directory: ${{ env.ENV_DIR }}
|
||
run: |
|
||
set -euo pipefail
|
||
cat > backend.tf <<'EOF'
|
||
terraform {
|
||
backend "s3" {
|
||
skip_credentials_validation = true
|
||
skip_region_validation = true
|
||
skip_requesting_account_id = true
|
||
skip_metadata_api_check = true
|
||
force_path_style = true
|
||
}
|
||
}
|
||
EOF
|
||
|
||
- name: generate.py render (YAML -> 显式 HCL + tfvars)
|
||
working-directory: ${{ env.VPS_ROOT }}
|
||
run: python3 scripts/generate.py render
|
||
|
||
- name: Terraform init
|
||
working-directory: ${{ env.ENV_DIR }}
|
||
env:
|
||
AWS_ACCESS_KEY_ID: ${{ steps.vault.outputs.TF_STATE_ACCESS_KEY }}
|
||
AWS_SECRET_ACCESS_KEY: ${{ steps.vault.outputs.TF_STATE_SECRET_KEY }}
|
||
TF_STATE_ENDPOINT: ${{ steps.vault.outputs.TF_STATE_ENDPOINT }}
|
||
TF_STATE_BUCKET: ${{ steps.vault.outputs.TF_STATE_BUCKET }}
|
||
TF_STATE_REGION: ${{ steps.vault.outputs.TF_STATE_REGION }}
|
||
run: |
|
||
set -euo pipefail
|
||
if [ -n "${TF_STATE_BUCKET}" ]; then
|
||
terraform init -input=false \
|
||
-backend-config="endpoint=${TF_STATE_ENDPOINT}" \
|
||
-backend-config="bucket=${TF_STATE_BUCKET}" \
|
||
-backend-config="key=ai-workspace/terraform.tfstate" \
|
||
-backend-config="region=${TF_STATE_REGION:-us-east-1}"
|
||
else
|
||
echo "::warning::未配置远端 state(Vault 无 TF_STATE_BUCKET),使用本地 state(仅适合一次性演示,destroy 需同一次运行)"
|
||
terraform init -input=false
|
||
fi
|
||
|
||
- name: Terraform ${{ github.event.inputs.terraform_action || 'apply' }}
|
||
working-directory: ${{ env.ENV_DIR }}
|
||
env:
|
||
TF_VAR_vultr_api_key: ${{ steps.vault.outputs.VULTR_API_KEY }}
|
||
run: |
|
||
set -euo pipefail
|
||
terraform ${{ github.event.inputs.terraform_action || 'apply' }} -auto-approve -input=false
|
||
|
||
- name: generate.py inventory (terraform output + YAML -> cmdb.json + inventory.ini)
|
||
if: ${{ (github.event.inputs.terraform_action || 'apply') == 'apply' }}
|
||
working-directory: ${{ env.VPS_ROOT }}
|
||
run: python3 scripts/generate.py inventory
|
||
|
||
- name: Build deploy matrix from cmdb.json
|
||
id: matrix
|
||
if: ${{ (github.event.inputs.terraform_action || 'apply') == 'apply' }}
|
||
working-directory: ${{ env.ENV_DIR }}
|
||
run: |
|
||
set -euo pipefail
|
||
hosts="$(jq -c 'keys' cmdb.json)"
|
||
echo "hosts=${hosts}" >> "$GITHUB_OUTPUT"
|
||
echo "count=$(jq 'length' cmdb.json)" >> "$GITHUB_OUTPUT"
|
||
echo "matrix hosts: ${hosts}"
|
||
|
||
- name: Upload CMDB + inventory artifact
|
||
if: ${{ (github.event.inputs.terraform_action || 'apply') == 'apply' }}
|
||
uses: actions/upload-artifact@v4
|
||
with:
|
||
name: ai-workspace-cmdb
|
||
path: |
|
||
${{ env.ENV_DIR }}/cmdb.json
|
||
${{ env.ENV_DIR }}/inventory.ini
|
||
if-no-files-found: error
|
||
|
||
# ---------------------------------------------------------------------------
|
||
deploy:
|
||
name: Deploy ${{ matrix.host }} (on-host bootstrap)
|
||
needs: provision
|
||
if: ${{ needs.provision.outputs.count != '0' && (github.event.inputs.run_deploy == 'true' || github.event.inputs.run_deploy == null) }}
|
||
runs-on: ubuntu-latest
|
||
strategy:
|
||
fail-fast: false
|
||
matrix:
|
||
host: ${{ fromJSON(needs.provision.outputs.hosts) }}
|
||
steps:
|
||
# all-in-one 是“在目标主机本地执行”的模型(host 内部 ansible-playbook -c local,
|
||
# 自动走离线包加速)。从 runner 远程跑 all-in-one 会撞 roles/agent_skills 的
|
||
# delegate_to: localhost(写 runner 本地 /root),故 deploy 改为 ssh 到主机本地
|
||
# 跑官方引导脚本——与用户 self-host 的 curl|bash 完全同一路径。
|
||
- name: Load Vault secrets (OIDC)
|
||
id: vault
|
||
uses: hashicorp/vault-action@v2
|
||
with:
|
||
url: ${{ env.VAULT_ADDR }}
|
||
method: jwt
|
||
role: ${{ env.VAULT_ROLE }}
|
||
jwtGithubAudience: vault
|
||
secrets: |
|
||
${{ env.VAULT_KV }} ANSIBLE_SSH_KEY | ANSIBLE_SSH_KEY ;
|
||
${{ env.VAULT_KV }} DEEPSEEK_API_KEY | DEEPSEEK_API_KEY ;
|
||
${{ env.VAULT_KV }} NVIDIA_API_KEY | NVIDIA_API_KEY ;
|
||
${{ env.VAULT_KV }} OLLAMA_API_KEY | OLLAMA_API_KEY
|
||
|
||
- name: Download CMDB (host IP source)
|
||
uses: actions/download-artifact@v4
|
||
with:
|
||
name: ai-workspace-cmdb
|
||
path: cmdb
|
||
|
||
- name: Configure SSH
|
||
run: |
|
||
set -euo pipefail
|
||
mkdir -p ~/.ssh
|
||
printf '%s\n' "${{ steps.vault.outputs.ANSIBLE_SSH_KEY }}" > ~/.ssh/id_ed25519
|
||
chmod 600 ~/.ssh/id_ed25519
|
||
|
||
- name: Wait for host SSH
|
||
run: |
|
||
set -euo pipefail
|
||
ip="$(jq -r '.["${{ matrix.host }}"].ip' cmdb/cmdb.json)"
|
||
echo "Waiting for ${{ matrix.host }} (${ip}:22) ..."
|
||
for _ in $(seq 1 60); do
|
||
if nc -z -w 5 "$ip" 22; then echo "SSH up"; exit 0; fi
|
||
sleep 10
|
||
done
|
||
echo "::error::Timed out waiting for ${ip}:22"; exit 1
|
||
|
||
- name: Run on-host bootstrap (curl | bash, local-mode install)
|
||
env:
|
||
DEEPSEEK_API_KEY: ${{ steps.vault.outputs.DEEPSEEK_API_KEY }}
|
||
NVIDIA_API_KEY: ${{ steps.vault.outputs.NVIDIA_API_KEY }}
|
||
OLLAMA_API_KEY: ${{ steps.vault.outputs.OLLAMA_API_KEY }}
|
||
run: |
|
||
set -euo pipefail
|
||
ip="$(jq -r '.["${{ matrix.host }}"].ip' cmdb/cmdb.json)"
|
||
user="$(jq -r '.["${{ matrix.host }}"].ansible_user // "root"' cmdb/cmdb.json)"
|
||
echo "Bootstrapping ${{ matrix.host }} (${user}@${ip}) on-host ..."
|
||
ssh -i ~/.ssh/id_ed25519 \
|
||
-o StrictHostKeyChecking=accept-new \
|
||
-o ServerAliveInterval=20 -o ServerAliveCountMax=15 \
|
||
-o ConnectTimeout=20 \
|
||
"${user}@${ip}" \
|
||
"DEEPSEEK_API_KEY='${DEEPSEEK_API_KEY}' \
|
||
NVIDIA_API_KEY='${NVIDIA_API_KEY}' \
|
||
OLLAMA_API_KEY='${OLLAMA_API_KEY}' \
|
||
bash -lc 'curl -sfL https://install.svc.plus/ai-workspace | bash -'"
|
||
|
||
# ---------------------------------------------------------------------------
|
||
dns:
|
||
name: Sync Cloudflare DNS
|
||
needs: [provision, deploy]
|
||
if: ${{ needs.provision.outputs.count != '0' && (github.event.inputs.run_dns == 'true' || github.event.inputs.run_dns == null) }}
|
||
runs-on: ubuntu-latest
|
||
steps:
|
||
- name: Load Vault secrets (OIDC)
|
||
id: vault
|
||
uses: hashicorp/vault-action@v2
|
||
with:
|
||
url: ${{ env.VAULT_ADDR }}
|
||
method: jwt
|
||
role: ${{ env.VAULT_ROLE }}
|
||
jwtGithubAudience: vault
|
||
ignoreNotFound: true
|
||
secrets: |
|
||
${{ env.VAULT_KV }} INFRA_REPO_TOKEN | INFRA_REPO_TOKEN ;
|
||
${{ env.VAULT_KV }} CLOUDFLARE_API_TOKEN | CLOUDFLARE_API_TOKEN
|
||
|
||
- name: Checkout infra (playbooks)
|
||
uses: actions/checkout@v4
|
||
with:
|
||
repository: ${{ env.INFRA_REPO }}
|
||
ref: ${{ github.event.inputs.infra_ref || 'main' }}
|
||
token: ${{ steps.vault.outputs.INFRA_REPO_TOKEN || github.token }}
|
||
path: infra
|
||
|
||
- name: Download CMDB + inventory
|
||
uses: actions/download-artifact@v4
|
||
with:
|
||
name: ai-workspace-cmdb
|
||
path: cmdb
|
||
|
||
- uses: actions/setup-python@v5
|
||
with:
|
||
python-version: "3.12"
|
||
|
||
- name: Install Ansible
|
||
run: pip install --quiet ansible
|
||
|
||
- name: Reconcile Cloudflare DNS from inventory
|
||
working-directory: ${{ env.PLAYBOOKS_DIR }}
|
||
env:
|
||
CLOUDFLARE_DNS_API_TOKEN: ${{ steps.vault.outputs.CLOUDFLARE_API_TOKEN }}
|
||
run: |
|
||
set -euo pipefail
|
||
# 只为本次新建的 ai_workspace 组主机同步 A 记录(域名取各主机
|
||
# service_domains hostvar,内容取其公网 IP),不动其它静态记录。
|
||
ansible-playbook \
|
||
-i "${GITHUB_WORKSPACE}/cmdb/inventory.ini" \
|
||
update_cloudflare_dns.yml \
|
||
-e '{"cloudflare_dns_source_hosts":["ai_workspace"],"cloudflare_dns_static_records":[]}'
|