accounts/docs/install_tiny_monitor_server.sh

323 lines
10 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# Tiny Monitor Server: Prometheus + Grafana 12 (GitOps provisioning) + Nginx single-domain
# Debian/Ubuntu only. Run as root.
set -euo pipefail
# ===== Required =====
: "${GIT_URL:-https://github.com/svc-design/gitops.git}"
# ===== Config (env overrides allowed) =====
PROM_VERSION="${PROM_VERSION:-2.49.0}"
# Domains
GRAFANA_DOMAIN="${GRAFANA_DOMAIN:-grafana.svc.plus}"
METRICS_DOMAIN="${METRICS_DOMAIN:-metrics.svc.plus}"
# TLS certs
CERT_PEM="${CERT_PEM:-/etc/ssl/svc.plus.pem}"
CERT_KEY="${CERT_KEY:-/etc/ssl/svc.plus.rsa.key}"
# GitOps repo path
ROOT_DIR="/srv/grafana/grafana-as-code"
# Grafana datasource URL env (供你仓库里的数据源 YAML 使用:${PROM_URL})
PROM_URL_FOR_GRAFANA="${PROM_URL_FOR_GRAFANA:-https://${METRICS_DOMAIN}/prom/}"
# remote_write/remote_read 后端
METRICS_BACKEND_KIND="${METRICS_BACKEND_KIND:-vm}" # vm | receiver
METRICS_BACKEND_ADDR="${METRICS_BACKEND_ADDR:-10.10.0.50:8428}" # VM默认8428Receiver改你的地址
VM_WRITE_PATH="${VM_WRITE_PATH:-/api/v1/write}"
VM_READ_PATH="${VM_READ_PATH:-/api/v1/read}"
RECEIVER_PATH="${RECEIVER_PATH:-/api/v1/receive}"
ENABLE_REMOTE_WRITE="${ENABLE_REMOTE_WRITE:-1}"
ENABLE_REMOTE_READ="${ENABLE_REMOTE_READ:-1}" # receiver 通常不支持 remote_read
# Prometheus
PROM_DIR="/opt/prometheus"
PROM_USER="prometheus"
PROM_GROUP="prometheus"
PROM_DATA="/var/lib/prometheus"
PROM_ETC="/etc/prometheus"
FILE_SD_DIR="${PROM_ETC}/file_sd"
# =========================================
echo "[1/9] Install base packages"
apt-get update -y
apt-get install -y curl tar git jq adduser ca-certificates gnupg apt-transport-https nginx
echo "[2/9] Install Grafana 12 (OSS stable)"
echo "[2/9] Install Grafana 12 (OSS stable)"
if ! dpkg -s grafana >/dev/null 2>&1; then
echo "→ Setting up Grafana APT repo and keyring..."
# 1. 确保 keyring 目录存在
install -d -m 0755 /etc/apt/keyrings
# 2. 获取 Grafana GPG 公钥并转换格式(避免用旧地址)
curl -fsSL https://apt.grafana.com/gpg.key | \
gpg --dearmor | sudo tee /etc/apt/keyrings/grafana.gpg > /dev/null
# 3. 添加 APT 源signed-by 指向 keyring
echo "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" \
| sudo tee /etc/apt/sources.list.d/grafana.list > /dev/null
# 4. 更新 & 安装
apt-get update -y
apt-get install -y grafana
fi
echo "[3/9] Clone GitOps repo -> ${ROOT_DIR}"
mkdir -p "$(dirname "${ROOT_DIR}")"
if [[ -d "${ROOT_DIR}/.git" ]]; then
git -C "${ROOT_DIR}" pull --ff-only || true
else
git clone --depth 1 "${GIT_URL}" "${ROOT_DIR}"
fi
echo "[4/9] Wire Grafana Provisioning to GitOps dashboards"
# 仅声明 dashboards provider 指向仓库 dashboards/;数据源/告警建议在仓库自行维护
mkdir -p /etc/grafana/provisioning/dashboards
cat >/etc/grafana/provisioning/dashboards/dashboards.yaml <<'YAML'
apiVersion: 1
providers:
- name: 'gitops-dashboards'
type: file
disableDeletion: false
allowUiUpdates: false
updateIntervalSeconds: 30
options:
path: /srv/grafana/grafana-as-code/dashboards
foldersFromFilesStructure: true
YAML
# 注入 Grafana 基本环境变量(域名与 Prom 数据源 URL
mkdir -p /etc/systemd/system/grafana-server.service.d
cat >/etc/systemd/system/grafana-server.service.d/env.conf <<EOF
[Service]
Environment=GF_SERVER_DOMAIN=${GRAFANA_DOMAIN}
Environment=GF_SERVER_ROOT_URL=https://${GRAFANA_DOMAIN}/
Environment=PROM_URL=${PROM_URL_FOR_GRAFANA}
EOF
echo "[5/9] Install Prometheus ${PROM_VERSION}"
install -d -m 0755 "${PROM_DIR}" "${PROM_ETC}" "${PROM_DATA}" "${FILE_SD_DIR}"
if ! id -u "${PROM_USER}" >/dev/null 2>&1; then
adduser --system --no-create-home --group "${PROM_USER}"
fi
ARCH="$(uname -m)"
case "$ARCH" in
x86_64|amd64) PTAR="prometheus-${PROM_VERSION}.linux-amd64.tar.gz" ;;
aarch64|arm64) PTAR="prometheus-${PROM_VERSION}.linux-arm64.tar.gz" ;;
*) echo "Unsupported arch: $ARCH"; exit 1 ;;
esac
if [[ ! -x "${PROM_DIR}/prometheus" ]]; then
curl -fsSL "https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/${PTAR}" | tar xz -C /tmp
SRC="/tmp/$(basename "${PTAR}" .tar.gz)"
install -m 0755 "${SRC}/prometheus" "${PROM_DIR}/prometheus"
install -m 0755 "${SRC}/promtool" "${PROM_DIR}/promtool"
ln -sf "${PROM_DIR}/prometheus" /usr/local/bin/prometheus
ln -sf "${PROM_DIR}/promtool" /usr/local/bin/promtool
fi
echo "[6/9] Prometheus config (file_sd + remote_write/remote_read)"
# 默认抓取本机 node_exporter可自行编辑 nodes.json 并触发热加载
cat >"${FILE_SD_DIR}/nodes.json" <<'JSON'
[
{ "targets": ["127.0.0.1:9100"], "labels": { "instance": "localhost" } }
]
JSON
cat >"${PROM_ETC}/prometheus.yml" <<EOF
global:
scrape_interval: 30s
evaluation_interval: 30s
scrape_configs:
- job_name: 'node'
file_sd_configs:
- files: ['${FILE_SD_DIR}/nodes.json']
EOF
if [[ "${ENABLE_REMOTE_WRITE}" == "1" ]]; then
if [[ "${METRICS_BACKEND_KIND}" == "receiver" ]]; then
RW_URL="https://${METRICS_DOMAIN}${RECEIVER_PATH}"
else
RW_URL="https://${METRICS_DOMAIN}${VM_WRITE_PATH}"
fi
cat >>"${PROM_ETC}/prometheus.yml" <<EOF
remote_write:
- url: ${RW_URL}
queue_config:
max_samples_per_send: 10000
max_shards: 8
capacity: 100000
EOF
fi
if [[ "${ENABLE_REMOTE_READ}" == "1" && "${METRICS_BACKEND_KIND}" == "vm" ]]; then
RR_URL="https://${METRICS_DOMAIN}${VM_READ_PATH}"
cat >>"${PROM_ETC}/prometheus.yml" <<EOF
remote_read:
- url: ${RR_URL}
read_recent: true
EOF
fi
chown -R ${PROM_USER}:${PROM_GROUP} "${PROM_DATA}" "${PROM_ETC}"
echo "[7/9] Nginx vhost (single domain for Prom UI + ingest) & Grafana web"
# 清理旧链接
for f in /etc/nginx/sites-enabled/grafana.conf /etc/nginx/sites-enabled/metrics.conf; do
[[ -L "$f" ]] && rm -f "$f"
done
# metrics.svc.plus/prom/ -> 127.0.0.1:9090/api/v1/write|read -> 后端
cat >/etc/nginx/sites-available/metrics.conf <<EOF
upstream metrics_backend { server ${METRICS_BACKEND_ADDR}; keepalive 32; }
server {
listen 443 ssl http2;
server_name ${METRICS_DOMAIN};
ssl_certificate ${CERT_PEM};
ssl_certificate_key ${CERT_KEY};
# Prometheus Web/API via subpath
location /prom/ {
proxy_set_header Host \$http_host;
proxy_set_header X-Real-IP \$remote_addr;
proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto \$scheme;
proxy_read_timeout 300;
proxy_send_timeout 300;
proxy_pass http://127.0.0.1:9090/;
}
# Ingest (VM write/read; Receiver write)
location = ${VM_WRITE_PATH} {
client_max_body_size 0; proxy_request_buffering off; proxy_buffering off; proxy_http_version 1.1;
proxy_set_header Host \$http_host; proxy_set_header X-Real-IP \$remote_addr;
proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto \$scheme;
proxy_read_timeout 600; proxy_send_timeout 600;
proxy_pass http://metrics_backend\$request_uri;
}
location = ${VM_READ_PATH} {
client_max_body_size 0; proxy_request_buffering off; proxy_buffering off; proxy_http_version 1.1;
proxy_set_header Host \$http_host; proxy_set_header X-Real-IP \$remote_addr;
proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto \$scheme;
proxy_read_timeout 600; proxy_send_timeout 600;
proxy_pass http://metrics_backend\$request_uri;
}
location = ${RECEIVER_PATH} {
client_max_body_size 0; proxy_request_buffering off; proxy_buffering off; proxy_http_version 1.1;
proxy_set_header Host \$http_host; proxy_set_header X-Real-IP \$remote_addr;
proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto \$scheme;
proxy_read_timeout 600; proxy_send_timeout 600;
proxy_pass http://metrics_backend\$request_uri;
}
}
server { listen 80; server_name ${METRICS_DOMAIN}; return 301 https://\$host\$request_uri; }
EOF
# grafana.svc.plus
cat >/etc/nginx/sites-available/grafana.conf <<EOF
server {
listen 443 ssl http2;
server_name ${GRAFANA_DOMAIN};
ssl_certificate ${CERT_PEM};
ssl_certificate_key ${CERT_KEY};
location / {
proxy_set_header Host \$http_host;
proxy_set_header X-Real-IP \$remote_addr;
proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto \$scheme;
proxy_read_timeout 300;
proxy_send_timeout 300;
proxy_pass http://127.0.0.1:3000;
}
location /api/live/ {
proxy_http_version 1.1;
proxy_set_header Upgrade \$http_upgrade;
proxy_set_header Connection "upgrade";
proxy_pass http://127.0.0.1:3000;
}
}
server { listen 80; server_name ${GRAFANA_DOMAIN}; return 301 https://\$host\$request_uri; }
EOF
ln -s /etc/nginx/sites-available/metrics.conf /etc/nginx/sites-enabled/
ln -s /etc/nginx/sites-available/grafana.conf /etc/nginx/sites-enabled/
nginx -t && systemctl reload nginx
echo "[8/9] Systemd: Prometheus unit (with external-url subpath)"
cat >/etc/systemd/system/prometheus.service <<EOF
[Unit]
Description=Prometheus Server
After=network-online.target
Wants=network-online.target
[Service]
User=${PROM_USER}
Group=${PROM_GROUP}
ExecStart=${PROM_DIR}/prometheus \\
--config.file=${PROM_ETC}/prometheus.yml \\
--storage.tsdb.path=${PROM_DATA} \\
--web.enable-lifecycle \\
--web.external-url=https://${METRICS_DOMAIN}/prom/ \\
--web.route-prefix=/
Restart=always
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
echo "[9/9] Enable services + 5m Git pull timer"
# Git pull timer
cat >/etc/systemd/system/grafana-dash-pull.timer <<EOF
[Unit]
Description=git pull dashboards every 5m
[Timer]
OnBootSec=30s
OnUnitActiveSec=5m
AccuracySec=30s
[Install]
WantedBy=timers.target
EOF
cat >/etc/systemd/system/grafana-dash-pull.service <<EOF
[Unit]
Description=git pull dashboards
[Service]
Type=oneshot
WorkingDirectory=/srv/grafana/grafana-as-code
ExecStart=/usr/bin/git pull --ff-only
EOF
systemctl daemon-reload
systemctl enable --now prometheus
systemctl enable --now grafana-server
systemctl enable --now grafana-dash-pull.timer
echo
echo "=== DONE ==="
echo "Grafana : https://${GRAFANA_DOMAIN}/ (首次登录admin/admin)"
echo "Metrics : https://${METRICS_DOMAIN}/prom/ (Prometheus UI/API 子路径)"
echo "Ingest : https://${METRICS_DOMAIN}/api/v1/write (VM) | /api/v1/receive (Receiver)"
echo
echo "GitOps repo : ${ROOT_DIR} (每5分钟自动 git pull)"
echo "Prom targets: ${FILE_SD_DIR}/nodes.json (编辑后热加载curl -X POST http://127.0.0.1:9090/-/reload)"
echo
echo "Grafana will see PROM_URL=${PROM_URL_FOR_GRAFANA} (供你仓库的数据源 YAML 使用)"