From ea2bbc6acdadde10423466547a8d47bc0be0c301 Mon Sep 17 00:00:00 2001 From: Haitao Pan Date: Thu, 28 Aug 2025 11:46:14 +0800 Subject: [PATCH] refactor(deepflow): split image pull script into arm64/x86 - removed legacy `pull_save_scp_image.sh` - added separate scripts for arm64 and x86 - updated `images.txt` and `deploy_deepflow_agent.sh` - adjusted OpenResty config templates (`artifact.conf.j2`, `nginx.conf.j2`) --- .../OpenResty/templates/artifact.conf.j2 | 10 +- .../vhosts/OpenResty/templates/nginx.conf.j2 | 1 + scripts/deepflow/images.txt | 6 + scripts/deepflow/pull_save_scp_image.sh | 32 --- scripts/deepflow/pull_save_scp_image_arm64.sh | 198 ++++++++++++++++++ scripts/deepflow/pull_save_scp_image_x86.sh | 196 +++++++++++++++++ scripts/deploy_deepflow_agent.sh | 164 ++++++++++++--- 7 files changed, 538 insertions(+), 69 deletions(-) create mode 100644 scripts/deepflow/images.txt delete mode 100644 scripts/deepflow/pull_save_scp_image.sh create mode 100644 scripts/deepflow/pull_save_scp_image_arm64.sh create mode 100644 scripts/deepflow/pull_save_scp_image_x86.sh diff --git a/playbooks/roles/vhosts/OpenResty/templates/artifact.conf.j2 b/playbooks/roles/vhosts/OpenResty/templates/artifact.conf.j2 index b051572..02f5459 100644 --- a/playbooks/roles/vhosts/OpenResty/templates/artifact.conf.j2 +++ b/playbooks/roles/vhosts/OpenResty/templates/artifact.conf.j2 @@ -16,24 +16,22 @@ server { {% set autoindex_paths = item.autoindex_paths | default(vhost_defaults.autoindex_paths) %} # 目录浏览(打开 autoindex)—可列出整个 {{ item.root | default(vhost_defaults.root) }} + {% if '/' in autoindex_paths %} location / { - {% if '/' in autoindex_paths %} autoindex on; autoindex_exact_size off; autoindex_localtime on; - {% endif %} add_header Accept-Ranges bytes; try_files $uri $uri/ =404; # 保持原有 404 语义 } - - {% for path in autoindex_paths %} - {% if path != '/' %} + {% elif %} + {% for path in autoindex_paths %} location {{ path }} { autoindex on; autoindex_exact_size off; autoindex_localtime on; } - {% endif %} + {% endif %} {% endfor %} # 常见安装包直下读文件(大小写不敏感) diff --git a/playbooks/roles/vhosts/OpenResty/templates/nginx.conf.j2 b/playbooks/roles/vhosts/OpenResty/templates/nginx.conf.j2 index bb93d4e..8677a55 100644 --- a/playbooks/roles/vhosts/OpenResty/templates/nginx.conf.j2 +++ b/playbooks/roles/vhosts/OpenResty/templates/nginx.conf.j2 @@ -8,6 +8,7 @@ events { http { include mime.types; default_type application/octet-stream; + lua_package_path "/usr/local/openresty/lualib/?.lua;;"; sendfile on; keepalive_timeout 65; diff --git a/scripts/deepflow/images.txt b/scripts/deepflow/images.txt new file mode 100644 index 0000000..14d7646 --- /dev/null +++ b/scripts/deepflow/images.txt @@ -0,0 +1,6 @@ +dfcloud-image-registry-vpc.cn-beijing.cr.aliyuncs.com/dev/df-analyze:latest.515 +dfcloud-image-registry-vpc.cn-beijing.cr.aliyuncs.com/dev/df-web-agent:latest.2986036 +dfcloud-image-registry-vpc.cn-beijing.cr.aliyuncs.com/dev/df-web-composer:latest.2984869 +dfcloud-image-registry-vpc.cn-beijing.cr.aliyuncs.com/dev/apientry:latest.268 +hub.deepflow.yunshan.net/dev/alpine:latest +hub.deepflow.yunshan.net/dev/weaviate:1.30.0 diff --git a/scripts/deepflow/pull_save_scp_image.sh b/scripts/deepflow/pull_save_scp_image.sh deleted file mode 100644 index a5a53cc..0000000 --- a/scripts/deepflow/pull_save_scp_image.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -set -e - -REMOTE_HOST="root@10.1.3.179" - -if [ -z "$1" ]; then - echo "❌ 用法: $0 " - echo "示例: $0 dfcloud-image-registry-vpc.cn-beijing.cr.aliyuncs.com/dev/df-web-ai:v6.6.18839" - exit 1 -fi - -IMAGE="$1" - -# 提取镜像名和版本号 -NAME_TAG="${IMAGE##*/}" # df-web-ai:v6.6.18839 -NAME="${NAME_TAG%%:*}" # df-web-ai -TAG="${NAME_TAG##*:}" # v6.6.18839 -FILE_NAME="${NAME}-${TAG//v/}.tar" # df-web-ai-6.6.18839.tar - -echo "📦 镜像: $IMAGE" -echo "📁 导出文件名: $FILE_NAME" - -echo "🚀 在远程拉取镜像..." -ssh $REMOTE_HOST docker pull "$IMAGE" - -echo "💾 在远程保存镜像为 /tmp/$FILE_NAME..." -ssh $REMOTE_HOST "docker save $IMAGE > /tmp/$FILE_NAME" - -echo "📥 拷贝镜像回本地 ~/Desktop..." -scp $REMOTE_HOST:/tmp/$FILE_NAME ~/Desktop - -echo "✅ 完成!镜像保存于:~/Desktop/$FILE_NAME" diff --git a/scripts/deepflow/pull_save_scp_image_arm64.sh b/scripts/deepflow/pull_save_scp_image_arm64.sh new file mode 100644 index 0000000..8b32110 --- /dev/null +++ b/scripts/deepflow/pull_save_scp_image_arm64.sh @@ -0,0 +1,198 @@ +#!/bin/bash +# deepflow/pull_save_scp_image_arm64.sh +# 目标:即使远端是 x86,也只拉取/保存 arm64 变体,并强校验;支持批量与 --rm-remote 清理 +set -euo pipefail + +REMOTE_HOST="${REMOTE_HOST:-root@10.1.3.179}" +DEST_DIR="${DEST_DIR:-$HOME/Desktop}" +RM_REMOTE=0 + +usage() { + cat < [image2 ...] [--rm-remote] + $0 -f images.txt [--rm-remote] + +说明: + - 支持批量处理: + • 多个参数: ./pull_save_scp_image_arm64.sh image1 image2 ... + • 文件清单: ./pull_save_scp_image_arm64.sh -f images.txt + (清单支持 # 注释与空行) + + - 只拉 arm64 & save arm64: + • docker pull --platform=linux/arm64 + • docker image inspect --format '{{.Architecture}}' 二次确认 + • 以镜像ID保存,避免 tag→manifest list 在 x86 上回退到 amd64 + + - 保存后校验: + • 在远端解析 tar 的 manifest.json 和对应 config + • 逐个检查 "architecture":"arm64",确保 tar 内确实是 arm64 + + - 可配置环境变量: + • REMOTE_HOST (默认 root@10.1.3.179) + • DEST_DIR (默认 ~/Desktop) + + - 额外选项: + • --rm-remote 成功拷贝到本地后自动删除远端 /tmp/*.tar; + 任一步失败也会自动清理远端临时文件,避免残留。 + +示例: + $0 dfcloud-image-registry-vpc.cn-beijing.cr.aliyuncs.com/dev/df-web-ai:v6.6.18839 + $0 -f images.txt --rm-remote +EOF +} + +# -------- 参数解析 -------- +IMAGES=() +LIST_FILE="" +if [[ $# -eq 0 ]]; then usage; exit 1; fi + +ARGS=() +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) usage; exit 0 ;; + --rm-remote) RM_REMOTE=1; shift ;; + -f) + [[ $# -ge 2 ]] || { echo "❌ 缺少镜像清单文件"; exit 1; } + LIST_FILE="$2"; shift 2 ;; + *) + ARGS+=("$1"); shift ;; + esac +done + +if [[ -n "$LIST_FILE" ]]; then + [[ -f "$LIST_FILE" ]] || { echo "❌ 文件不存在: $LIST_FILE"; exit 1; } + while IFS= read -r line; do + line="${line%%#*}" + line="$(echo -n "$line" | xargs || true)" + [[ -n "$line" ]] || continue + IMAGES+=("$line") + done < "$LIST_FILE" +fi + +if [[ ${#ARGS[@]} -gt 0 ]]; then + IMAGES+=("${ARGS[@]}") +fi + +[[ ${#IMAGES[@]} -gt 0 ]] || { echo "❌ 没有可处理的镜像"; exit 1; } + +echo "🖥️ 远端: $REMOTE_HOST" +echo "💾 本地保存目录: $DEST_DIR" +echo "🧹 rm-remote: $([[ $RM_REMOTE -eq 1 ]] && echo ON || echo OFF)" +mkdir -p "$DEST_DIR" + +# -------- 远端校验脚本内容(用 cat heredoc 赋值,兼容 macOS 老 bash) -------- +REMOTE_VERIFY_PY="$(cat <<'PYCODE' +import sys, tarfile, json +tar_path = sys.argv[1] +with tarfile.open(tar_path, "r") as tf: + manifest = json.load(tf.extractfile("manifest.json")) + for item in manifest: + cfg = item.get("Config") + if not cfg: + print("NO_CONFIG_IN_MANIFEST", file=sys.stderr); sys.exit(2) + f = tf.extractfile(cfg) + if f is None: + print("CONFIG_NOT_FOUND", file=sys.stderr); sys.exit(3) + cfg_json = json.load(f) + arch = cfg_json.get("architecture") + if arch != "arm64": + print(f"BAD_ARCH:{arch}", file=sys.stderr); sys.exit(4) +print("OK") +PYCODE +)" + +# 为了安全传输到远端,这里将 Python 内容做一次 printf %q 转义 +#(避免某些 shell/ssh 环境下的字符解释问题) +escape_for_ssh() { + printf "%s" "$1" | python3 - <<'P' +import sys, shlex +data=sys.stdin.read() +print(shlex.quote(data)) +P +} + +REMOTE_VERIFY_PY_Q=$(escape_for_ssh "$REMOTE_VERIFY_PY") + +# -------- 处理单个镜像 -------- +process_image() { + local IMAGE="$1" + + local NAME_TAG="${IMAGE##*/}" # e.g. weaviate:1.30.0 + local NAME="${NAME_TAG%%:*}" # weaviate + local TAG="${NAME_TAG##*:}" # 1.30.0 + if [[ "$NAME" == "$NAME_TAG" ]]; then TAG="latest"; fi + + local FILE_NAME="${NAME}-${TAG}.arm64.tar" + local REMOTE_TAR="/tmp/${FILE_NAME}" + local DEST_PATH="${DEST_DIR}/${FILE_NAME}" + + echo + echo "==============================" + echo "📦 镜像: $IMAGE" + echo "🎯 仅拉取平台: linux/arm64" + echo "📁 导出文件名: $FILE_NAME" + echo "==============================" + + # 失败即清理远端临时文件 + local CLEAN_ON_FAILURE=1 + trap 'if [[ "${CLEAN_ON_FAILURE:-0}" -eq 1 ]]; then ssh -o BatchMode=yes "'"$REMOTE_HOST"'" "rm -f \"'"$REMOTE_TAR"'\"" || true; fi' RETURN + + # 1) 强制拉 arm64 + echo "🚀 远端拉取镜像..." + ssh -o BatchMode=yes "$REMOTE_HOST" "docker pull --platform=linux/arm64 \"$IMAGE\"" + + # 2) 获取 arm64 变体镜像ID + echo "🔎 提取 arm64 镜像ID..." + local IMAGE_ID + IMAGE_ID="$(ssh "$REMOTE_HOST" " + docker image inspect \"$IMAGE\" \ + --format '{{.Id}} {{.Architecture}}' 2>/dev/null \ + | awk '\$2==\"arm64\"{print \$1; exit}' + ")" + if [[ -z "${IMAGE_ID:-}" ]]; then + echo "❌ 未找到 arm64 变体镜像ID,可能仓库不包含 arm64。"; return 12 + fi + echo "✅ arm64 镜像ID: $IMAGE_ID" + + # 3) 二次确认该 ID 的架构为 arm64 + echo "🧪 inspect 架构确认..." + ssh "$REMOTE_HOST" " + arch=\$(docker image inspect --format '{{.Architecture}}' $IMAGE_ID | head -n1); \ + if [[ \"\$arch\" != \"arm64\" ]]; then + echo '❌ 镜像ID架构校验失败: '\"\$arch\"; exit 13; fi; \ + echo '✅ 架构: '\"\$arch\" + " + + # 4) 以镜像ID保存 + echo "💾 保存为: $REMOTE_TAR ..." + ssh "$REMOTE_HOST" "docker save $IMAGE_ID > \"$REMOTE_TAR\"" + + # 5) 解包校验 tar 内架构 + echo "🧬 校验 tar 包内部 architecture..." + ssh "$REMOTE_HOST" "python3 -c $REMOTE_VERIFY_PY_Q \"$REMOTE_TAR\"" + + # 6) 拷回本地 + echo "📥 拷贝到本地: $DEST_PATH ..." + scp "$REMOTE_HOST:$REMOTE_TAR" "$DEST_PATH" + + # 7) 可选删除远端临时 tar;关闭失败清理 trap + if [[ $RM_REMOTE -eq 1 ]]; then + echo "🧹 删除远端临时文件: $REMOTE_TAR" + ssh "$REMOTE_HOST" "rm -f \"$REMOTE_TAR\"" + else + echo "ℹ️ 远端临时文件保留: $REMOTE_TAR" + fi + + CLEAN_ON_FAILURE=0 + trap - RETURN + echo "✅ 完成: $DEST_PATH (arm64 only)" +} + +# -------- 批量执行 -------- +for img in "${IMAGES[@]}"; do + process_image "$img" +done + +echo +echo "🎉 所有任务完成。" diff --git a/scripts/deepflow/pull_save_scp_image_x86.sh b/scripts/deepflow/pull_save_scp_image_x86.sh new file mode 100644 index 0000000..0164f5a --- /dev/null +++ b/scripts/deepflow/pull_save_scp_image_x86.sh @@ -0,0 +1,196 @@ +#!/bin/bash +# deepflow/pull_save_scp_image_amd64.sh +# 目标:仅拉取/保存 amd64 变体,并强校验;支持批量与 --rm-remote 清理 +set -euo pipefail + +REMOTE_HOST="${REMOTE_HOST:-root@10.1.3.179}" +DEST_DIR="${DEST_DIR:-$HOME/Desktop}" +RM_REMOTE=0 + +usage() { + cat < [image2 ...] [--rm-remote] + $0 -f images.txt [--rm-remote] + +说明: + - 支持批量处理: + • 多个参数: ./pull_save_scp_image_amd64.sh image1 image2 ... + • 文件清单: ./pull_save_scp_image_amd64.sh -f images.txt + (清单支持 # 注释与空行) + + - 只拉 amd64 & save amd64: + • docker pull --platform=linux/amd64 + • docker image inspect --format '{{.Architecture}}' 二次确认 + • 以镜像ID保存,避免 tag→manifest list 在异构主机上回退到其他架构 + + - 保存后校验: + • 在远端解析 tar 的 manifest.json 和对应 config + • 逐个检查 "architecture":"amd64",确保 tar 内确实是 amd64 + + - 可配置环境变量: + • REMOTE_HOST (默认 root@10.1.3.179) + • DEST_DIR (默认 ~/Desktop) + + - 额外选项: + • --rm-remote 成功拷贝到本地后自动删除远端 /tmp/*.tar; + 任一步失败也会自动清理远端临时文件,避免残留。 + +示例: + $0 dfcloud-image-registry-vpc.cn-beijing.cr.aliyuncs.com/dev/df-web-ai:v6.6.18839 + $0 -f images.txt --rm-remote +EOF +} + +# -------- 参数解析 -------- +IMAGES=() +LIST_FILE="" +if [[ $# -eq 0 ]]; then usage; exit 1; fi + +ARGS=() +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) usage; exit 0 ;; + --rm-remote) RM_REMOTE=1; shift ;; + -f) + [[ $# -ge 2 ]] || { echo "❌ 缺少镜像清单文件"; exit 1; } + LIST_FILE="$2"; shift 2 ;; + *) + ARGS+=("$1"); shift ;; + esac +done + +if [[ -n "$LIST_FILE" ]]; then + [[ -f "$LIST_FILE" ]] || { echo "❌ 文件不存在: $LIST_FILE"; exit 1; } + while IFS= read -r line; do + line="${line%%#*}" + line="$(echo -n "$line" | xargs || true)" + [[ -n "$line" ]] || continue + IMAGES+=("$line") + done < "$LIST_FILE" +fi + +if [[ ${#ARGS[@]} -gt 0 ]]; then + IMAGES+=("${ARGS[@]}") +fi + +[[ ${#IMAGES[@]} -gt 0 ]] || { echo "❌ 没有可处理的镜像"; exit 1; } + +echo "🖥️ 远端: $REMOTE_HOST" +echo "💾 本地保存目录: $DEST_DIR" +echo "🧹 rm-remote: $([[ $RM_REMOTE -eq 1 ]] && echo ON || echo OFF)" +mkdir -p "$DEST_DIR" + +# -------- 远端校验脚本内容(用 cat heredoc 赋值,兼容 macOS 老 bash) -------- +REMOTE_VERIFY_PY="$(cat <<'PYCODE' +import sys, tarfile, json +tar_path = sys.argv[1] +with tarfile.open(tar_path, "r") as tf: + manifest = json.load(tf.extractfile("manifest.json")) + for item in manifest: + cfg = item.get("Config") + if not cfg: + print("NO_CONFIG_IN_MANIFEST", file=sys.stderr); sys.exit(2) + f = tf.extractfile(cfg) + if f is None: + print("CONFIG_NOT_FOUND", file=sys.stderr); sys.exit(3) + cfg_json = json.load(f) + arch = cfg_json.get("architecture") + if arch != "amd64": + print(f"BAD_ARCH:{arch}", file=sys.stderr); sys.exit(4) +print("OK") +PYCODE +)" + +escape_for_ssh() { + printf "%s" "$1" | python3 - <<'P' +import sys, shlex +data=sys.stdin.read() +print(shlex.quote(data)) +P +} +REMOTE_VERIFY_PY_Q=$(escape_for_ssh "$REMOTE_VERIFY_PY") + +# -------- 处理单个镜像 -------- +process_image() { + local IMAGE="$1" + + local NAME_TAG="${IMAGE##*/}" # e.g. weaviate:1.30.0 + local NAME="${NAME_TAG%%:*}" # weaviate + local TAG="${NAME_TAG##*:}" # 1.30.0 + if [[ "$NAME" == "$NAME_TAG" ]]; then TAG="latest"; fi + + local FILE_NAME="${NAME}-${TAG}.amd64.tar" + local REMOTE_TAR="/tmp/${FILE_NAME}" + local DEST_PATH="${DEST_DIR}/${FILE_NAME}" + + echo + echo "==============================" + echo "📦 镜像: $IMAGE" + echo "🎯 仅拉取平台: linux/amd64" + echo "📁 导出文件名: $FILE_NAME" + echo "==============================" + + # 失败即清理远端临时文件 + local CLEAN_ON_FAILURE=1 + trap 'if [[ "${CLEAN_ON_FAILURE:-0}" -eq 1 ]]; then ssh -o BatchMode=yes "'"$REMOTE_HOST"'" "rm -f \"'"$REMOTE_TAR"'\"" || true; fi' RETURN + + # 1) 强制拉 amd64 + echo "🚀 远端拉取镜像..." + ssh -o BatchMode=yes "$REMOTE_HOST" "docker pull --platform=linux/amd64 \"$IMAGE\"" + + # 2) 获取 amd64 变体镜像ID + echo "🔎 提取 amd64 镜像ID..." + local IMAGE_ID + IMAGE_ID="$(ssh "$REMOTE_HOST" " + docker image inspect \"$IMAGE\" \ + --format '{{.Id}} {{.Architecture}}' 2>/dev/null \ + | awk '\$2==\"amd64\"{print \$1; exit}' + ")" + if [[ -z "${IMAGE_ID:-}" ]]; then + echo "❌ 未找到 amd64 变体镜像ID,可能仓库不包含 amd64。"; return 12 + fi + echo "✅ amd64 镜像ID: $IMAGE_ID" + + # 3) 二次确认该 ID 的架构为 amd64 + echo "🧪 inspect 架构确认..." + ssh "$REMOTE_HOST" " + arch=\$(docker image inspect --format '{{.Architecture}}' $IMAGE_ID | head -n1); \ + if [[ \"\$arch\" != \"amd64\" ]]; then + echo '❌ 镜像ID架构校验失败: '\"\$arch\"; exit 13; fi; \ + echo '✅ 架构: '\"\$arch\" + " + + # 4) 以镜像ID保存 + echo "💾 保存为: $REMOTE_TAR ..." + ssh "$REMOTE_HOST" "docker save $IMAGE_ID > \"$REMOTE_TAR\"" + + # 5) 解包校验 tar 内架构 + echo "🧬 校验 tar 包内部 architecture..." + ssh "$REMOTE_HOST" "python3 -c $REMOTE_VERIFY_PY_Q \"$REMOTE_TAR\"" + + # 6) 拷回本地 + echo "📥 拷贝到本地: $DEST_PATH ..." + scp "$REMOTE_HOST:$REMOTE_TAR" "$DEST_PATH" + + # 7) 可选删除远端临时 tar;关闭失败清理 trap + if [[ $RM_REMOTE -eq 1 ]]; then + echo "🧹 删除远端临时文件: $REMOTE_TAR" + ssh "$REMOTE_HOST" "rm -f \"$REMOTE_TAR\"" + else + echo "ℹ️ 远端临时文件保留: $REMOTE_TAR" + fi + + CLEAN_ON_FAILURE=0 + trap - RETURN + echo "✅ 完成: $DEST_PATH (amd64 only)" +} + +# -------- 批量执行 -------- +for img in "${IMAGES[@]}"; do + process_image "$img" +done + +echo +echo "🎉 所有任务完成。" + diff --git a/scripts/deploy_deepflow_agent.sh b/scripts/deploy_deepflow_agent.sh index 8986d5b..4d16d41 100644 --- a/scripts/deploy_deepflow_agent.sh +++ b/scripts/deploy_deepflow_agent.sh @@ -3,7 +3,7 @@ set -e #################################### -# 🌐 配置区 +# ������ 配置区 #################################### IP_LIST="./ip.list" @@ -14,6 +14,7 @@ MAX_PARALLEL=5 CONTROLLER_IP="" VTAP_GROUP_ID="" LIMIT="" +SUDO_MODE="sudo" # 可选: sudo | sudo-i SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=15" @@ -23,11 +24,11 @@ SUCCESS_FILE="success_hosts.txt" > "$SUCCESS_FILE" #################################### -# 参数解析 +# 参数解析(新增 --sudo-mode) #################################### if [[ $# -eq 0 ]]; then - echo "用法: $0 {deploy|upgrade|verify} --controller --group [--limit ip1,ip2]" + echo "用法: $0 {deploy|upgrade|verify} --controller --group [--limit ip1,ip2] [--sudo-mode sudo|sudo-i]" exit 1 fi @@ -48,6 +49,18 @@ while [[ $# -gt 0 ]]; do LIMIT="$2" shift 2 ;; + --sudo-mode) + case "$2" in + sudo|sudo-i) + SUDO_MODE="$2" + shift 2 + ;; + *) + echo "❌ --sudo-mode 必须是 'sudo' 或 'sudo-i'" + exit 1 + ;; + esac + ;; *) echo "未知参数: $1" exit 1 @@ -56,7 +69,7 @@ while [[ $# -gt 0 ]]; do done if [[ "$ACTION" != "deploy" && "$ACTION" != "upgrade" && "$ACTION" != "verify" ]]; then - echo "用法: $0 {deploy|upgrade|verify} --controller --group [--limit ip1,ip2]" + echo "用法: $0 {deploy|upgrade|verify} --controller --group [--limit ip1,ip2] [--sudo-mode sudo|sudo-i]" exit 1 fi @@ -66,7 +79,7 @@ if [[ "$ACTION" != "verify" && ( -z "$CONTROLLER_IP" || -z "$VTAP_GROUP_ID" ) ]] fi #################################### -# 核心函数 +# 核心函数(重点修改:SUDO 处理 + 重启逻辑) #################################### worker() { @@ -74,7 +87,7 @@ worker() { local user="$2" local pass="$3" - echo "🔧 [$ACTION] 处理主机 $ip ($user)" + echo "������ [$ACTION] 处理主机 $ip ($user)" if [[ "$ACTION" == "verify" ]]; then verify_agent "$ip" "$user" "$pass" && { @@ -109,13 +122,20 @@ worker() { return fi - install_agent "$ip" "$user" "$pass" "$pkg_path" && update_config "$ip" "$user" "$pass" && { - echo "✅ $ip $ACTION 完成" - echo "$ip" >> "$SUCCESS_FILE" - } || { + # 安装 + 配置 + if install_agent "$ip" "$user" "$pass" "$pkg_path" && update_config "$ip" "$user" "$pass"; then + # ✅ 配置完成后,再次重启服务,确保新配置生效 + restart_agent_service "$ip" "$user" "$pass" && { + echo "✅ $ip $ACTION 完成" + echo "$ip" >> "$SUCCESS_FILE" + } || { + echo "❌ $ip 服务重启失败" + echo "$ip" >> "$FAILED_FILE" + } + else echo "❌ $ip 安装或配置失败" echo "$ip" >> "$FAILED_FILE" - } + fi echo "-------------------------------------------" } @@ -164,75 +184,155 @@ choose_agent_package() { if [[ ${#files[@]} -gt 0 ]]; then latest=$(printf "%s\n" "${files[@]}" | sort -V | tail -1) - echo "🎯 选择安装包: $latest" >&2 + echo "������ 选择安装包: $latest" >&2 echo "$latest" else echo "UNSUPPORTED" fi } +# ✅ 修改 install_agent:支持 sudo 和 sudo-i install_agent() { local ip="$1" user="$2" pass="$3" pkg_path="$4" local remote_pkg="/tmp/agent.${pkg_path##*.}" - sshpass -p "$pass" scp $SSH_OPTS "$pkg_path" "$user@$ip:$remote_pkg" + sshpass -p "$pass" scp $SSH_OPTS "$pkg_path" "$user@$ip:$remote_pkg" || { + echo "❌ $ip 上传安装包失败" + return 1 + } + + # 构建 SUDO 前缀 + local SUDO_CMD="" + case "$SUDO_MODE" in + sudo) + SUDO_CMD="sudo" + ;; + sudo-i) + SUDO_CMD="sudo -i" + ;; + *) + SUDO_CMD="sudo" + ;; + esac sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" bash </dev/null; then SUDO="sudo"; else SUDO=""; fi +# 使用指定模式 +SUDO_MODE_CMD='$SUDO_CMD' + +echo "������ 使用权限模式: \$SUDO_MODE_CMD" + if [[ "$remote_pkg" == *.rpm ]]; then - \$SUDO rpm -Uvh --replacepkgs "$remote_pkg" + \$SUDO_MODE_CMD rpm -Uvh --replacepkgs "$remote_pkg" elif [[ "$remote_pkg" == *.deb ]]; then - \$SUDO dpkg -i "$remote_pkg" || \$SUDO apt-get install -f -y + \$SUDO_MODE_CMD dpkg -i "$remote_pkg" || \$SUDO_MODE_CMD apt-get install -f -y else echo "❌ 不支持的安装包格式" exit 1 fi +# 服务管理(注意:sudo -i 下 systemctl 可能仍可用) if command -v systemctl &>/dev/null; then - \$SUDO systemctl enable $SERVICE_NAME - \$SUDO systemctl restart $SERVICE_NAME + \$SUDO_MODE_CMD systemctl enable $SERVICE_NAME + \$SUDO_MODE_CMD systemctl restart $SERVICE_NAME elif command -v service &>/dev/null; then - \$SUDO service $SERVICE_NAME restart - \$SUDO chkconfig $SERVICE_NAME on + \$SUDO_MODE_CMD service $SERVICE_NAME restart + \$SUDO_MODE_CMD chkconfig $SERVICE_NAME on elif command -v initctl &>/dev/null; then - \$SUDO initctl restart $SERVICE_NAME || \$SUDO initctl start $SERVICE_NAME + \$SUDO_MODE_CMD initctl restart $SERVICE_NAME || \$SUDO_MODE_CMD initctl start $SERVICE_NAME else echo "❌ 无法识别服务管理方式" + exit 1 fi EOF } +# ✅ 修改 update_config:确保配置写入 /etc/ update_config() { local ip="$1" user="$2" pass="$3" + local SUDO_CMD="" + case "$SUDO_MODE" in + sudo) + SUDO_CMD="sudo" + ;; + sudo-i) + SUDO_CMD="sudo -i" + ;; + *) + SUDO_CMD="sudo" + ;; + esac + sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" bash </dev/null; then SUDO="sudo"; else SUDO=""; fi +SUDO_MODE_CMD='$SUDO_CMD' CONFIG_FILE="/etc/deepflow-agent.yaml" -\$SUDO mkdir -p \$(dirname \$CONFIG_FILE) -cat </dev/null + +# 使用 sudo -i 创建目录(更可靠) +\$SUDO_MODE_CMD mkdir -p \$(dirname \$CONFIG_FILE) + +# 写入配置(使用 tee 避免重定向权限问题) +cat <<'CFG' | \$SUDO_MODE_CMD tee "\$CONFIG_FILE" >/dev/null controller-ips: - $CONTROLLER_IP vtap-group-id: "$VTAP_GROUP_ID" CFG -\$SUDO chmod 644 "\$CONFIG_FILE" -\$SUDO chown root:root "\$CONFIG_FILE" + +\$SUDO_MODE_CMD chmod 644 "\$CONFIG_FILE" +\$SUDO_MODE_CMD chown root:root "\$CONFIG_FILE" +EOF +} + +# ✅ 新增函数:服务重启 + 状态检查 +restart_agent_service() { + local ip="$1" user="$2" pass="$3" + local SUDO_CMD="" + case "$SUDO_MODE" in + sudo) + SUDO_CMD="sudo" + ;; + sudo-i) + SUDO_CMD="sudo -i" + ;; + *) + SUDO_CMD="sudo" + ;; + esac + + sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" bash < /dev/null 2>&1; then + echo "❌ deepflow-agent.service 重启后未运行" + exit 1 +fi + +echo "✅ deepflow-agent.service 已成功重启" EOF } verify_agent() { local ip="$1" user="$2" pass="$3" - echo "🔍 $ip 状态检查:" + echo "������ $ip 状态检查:" sshpass -p "$pass" ssh $SSH_OPTS "$user@$ip" " - systemctl is-active $SERVICE_NAME 2>/dev/null || \ - service $SERVICE_NAME status || \ - initctl status $SERVICE_NAME + sudo systemctl is-active $SERVICE_NAME 2>/dev/null || \ + sudo service $SERVICE_NAME status || \ + sudo initctl status $SERVICE_NAME || \ + echo '⚠️ 服务状态未知' " } #################################### -# 并发控制主逻辑 +# 并发控制主逻辑(不变) #################################### sem(){ @@ -242,6 +342,8 @@ sem(){ } while read -r ip user pass; do + [[ -z "$ip" || "$ip" =~ ^# ]] && continue + if [[ -n "$LIMIT" ]]; then IFS=',' read -ra LIMIT_IPS <<< "$LIMIT" skip=true @@ -260,7 +362,7 @@ wait TOTAL_SUCCESS=$(wc -l < "$SUCCESS_FILE") TOTAL_FAIL=$(wc -l < "$FAILED_FILE") -echo "🎯 全部任务执行完成: 成功 $TOTAL_SUCCESS 台,失败 $TOTAL_FAIL 台" +echo "������ 全部任务执行完成: 成功 $TOTAL_SUCCESS 台,失败 $TOTAL_FAIL 台" if [[ -s "$FAILED_FILE" ]]; then echo "❗ 失败主机列表已保存: $FAILED_FILE" fi