feat(obs): harden agent/server ingest pipeline and switch to observability domain

This commit is contained in:
Haitao Pan 2026-02-04 00:12:00 +08:00
parent c3888ed257
commit 216680f6ab
5 changed files with 2493 additions and 742 deletions

View File

@ -125,7 +125,7 @@ To install observability agents (Node Exporter, Process Exporter, Vector) on a c
```bash
# bash -s -- --endpoint <YOUR_ENDPOINT>
curl -fsSL https://raw.githubusercontent.com/cloud-neutral-toolkit/observability.svc.plus/main/scripts/agent-install.sh \
| bash -s -- --endpoint https://infra.svc.plus/ingest/otlp
| bash -s -- --endpoint https://observability.svc.plus/ingest/otlp
```
> **Note**: The script automatically verifies the installation after setup.

File diff suppressed because it is too large Load Diff

View File

@ -1,24 +1,27 @@
#!/bin/bash
set -e
set -euo pipefail
# Default Configuration
DEFAULT_ENDPOINT="https://infra.svc.plus/ingest/otlp"
DEFAULT_ENDPOINT="https://observability.svc.plus/ingest/otlp"
INSTALL_DIR="/opt/observability"
BIN_DIR="$INSTALL_DIR/bin"
CONFIG_DIR="$INSTALL_DIR/config"
DATA_DIR="$INSTALL_DIR/data"
BIN_DIR="${INSTALL_DIR}/bin"
CONFIG_DIR="${INSTALL_DIR}/config"
DATA_DIR="${INSTALL_DIR}/data"
# Versions
NODE_EXPORTER_VERSION="1.7.0"
PROCESS_EXPORTER_VERSION="0.7.10"
VECTOR_VERSION="0.36.0"
# Colors
ACTION="deploy"
ENDPOINT="${DEFAULT_ENDPOINT}"
METRICS_ENDPOINT=""
LOGS_ENDPOINT=""
AUTO_YES=false
GREEN='\033[0;32m'
BLUE='\033[0;34m'
RED='\033[0;31m'
YELLOW='\033[0;33m'
NC='\033[0m' # No Color
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
@ -26,9 +29,106 @@ log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
log_fail() { echo -e "${RED}[FAIL]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
# Detect Architecture
ARCH=$(uname -m)
case $ARCH in
usage() {
cat <<EOF
Usage:
bash agent-install.sh [options]
Actions (default: deploy):
--action deploy Deploy or upgrade components
--action upgrade Alias of deploy
--action reset Uninstall then reinstall components
--action uninstall Remove agent components
Options:
--endpoint URL Base ingest endpoint (default: ${DEFAULT_ENDPOINT})
--metrics-endpoint URL Prometheus remote_write endpoint (optional override)
--logs-endpoint URL Loki push endpoint (optional override)
-y, --yes Non-interactive mode
-h, --help Show help
Example:
curl -fsSL .../agent-install.sh | bash -s -- --endpoint https://observability.svc.plus/ingest/otlp
EOF
}
confirm() {
local prompt="$1"
if [[ "${AUTO_YES}" == "true" ]]; then
return 0
fi
read -r -p "${prompt} [y/N] " reply
[[ "${reply}" =~ ^[Yy]$ ]]
}
while [[ $# -gt 0 ]]; do
case "$1" in
--action)
ACTION="$2"
shift 2
;;
--action=*)
ACTION="${1#*=}"
shift
;;
--endpoint)
ENDPOINT="$2"
shift 2
;;
--endpoint=*)
ENDPOINT="${1#*=}"
shift
;;
--metrics-endpoint)
METRICS_ENDPOINT="$2"
shift 2
;;
--metrics-endpoint=*)
METRICS_ENDPOINT="${1#*=}"
shift
;;
--logs-endpoint)
LOGS_ENDPOINT="$2"
shift 2
;;
--logs-endpoint=*)
LOGS_ENDPOINT="${1#*=}"
shift
;;
-y|--yes)
AUTO_YES=true
shift
;;
-h|--help)
usage
exit 0
;;
*)
log_error "Unknown parameter: $1"
usage
exit 1
;;
esac
done
base_endpoint="${ENDPOINT%/}"
if [[ "${base_endpoint}" == */ingest/otlp* ]]; then
base_endpoint="${base_endpoint%%/ingest/otlp*}"
fi
if [[ -z "${METRICS_ENDPOINT}" ]]; then
METRICS_ENDPOINT="${base_endpoint}/ingest/metrics/api/v1/write"
fi
if [[ -z "${LOGS_ENDPOINT}" ]]; then
LOGS_ENDPOINT="${base_endpoint}/ingest/logs/insert"
fi
if [[ $EUID -ne 0 ]]; then
log_error "This script must be run as root"
exit 1
fi
ARCH="$(uname -m)"
case "${ARCH}" in
x86_64)
ARCH_NODE="amd64"
ARCH_PROCESS="amd64"
@ -40,156 +140,207 @@ case $ARCH in
ARCH_VECTOR="aarch64"
;;
*)
log_error "Unsupported architecture: $ARCH"
log_error "Unsupported architecture: ${ARCH}"
exit 1
;;
esac
# Parse Arguments
ENDPOINT="$DEFAULT_ENDPOINT"
while [[ "$#" -gt 0 ]]; do
case $1 in
--endpoint) ENDPOINT="$2"; shift ;;
*) log_error "Unknown parameter passed: $1"; exit 1 ;;
esac
shift
done
mkdir -p "${BIN_DIR}" "${CONFIG_DIR}" "${DATA_DIR}" "${DATA_DIR}/vector"
log_info "Starting Observability Agent Installation"
log_info "Target Endpoint: $ENDPOINT"
log_info "Installation Directory: $INSTALL_DIR"
# Prepare Directories
mkdir -p "$BIN_DIR" "$CONFIG_DIR" "$DATA_DIR"
# --- 1. Install Node Exporter ---
install_node_exporter() {
if systemctl is-active --quiet node_exporter; then
log_info "Node Exporter is already running. Skipping installation."
return
version_from_bin() {
local bin="$1"
local regex="$2"
if [[ ! -x "${bin}" ]]; then
return 1
fi
log_info "Installing Node Exporter v${NODE_EXPORTER_VERSION}..."
local URL="https://github.com/prometheus/node_exporter/releases/download/v${NODE_EXPORTER_VERSION}/node_exporter-${NODE_EXPORTER_VERSION}.linux-${ARCH_NODE}.tar.gz"
local TMP_DIR=$(mktemp -d)
curl -L --progress-bar "$URL" -o "$TMP_DIR/node_exporter.tar.gz"
tar -xzf "$TMP_DIR/node_exporter.tar.gz" -C "$TMP_DIR"
mv "$TMP_DIR/node_exporter-${NODE_EXPORTER_VERSION}.linux-${ARCH_NODE}/node_exporter" "$BIN_DIR/"
rm -rf "$TMP_DIR"
# Create Systemd Service
cat <<EOF > /etc/systemd/system/node_exporter.service
[Unit]
"${bin}" --version 2>/dev/null | grep -Eo "${regex}" | head -n1 || true
}
write_unit_if_changed() {
local unit_name="$1"
local content="$2"
local unit_path="/etc/systemd/system/${unit_name}.service"
local tmp_file
tmp_file="$(mktemp)"
printf "%s\n" "${content}" > "${tmp_file}"
if [[ ! -f "${unit_path}" ]] || ! cmp -s "${tmp_file}" "${unit_path}"; then
install -m 0644 "${tmp_file}" "${unit_path}"
systemctl daemon-reload
fi
rm -f "${tmp_file}"
}
download_tar_binary() {
local url="$1"
local archive_name="$2"
local source_binary_relpath="$3"
local target_binary="$4"
local tmp_dir
tmp_dir="$(mktemp -d)"
curl -fL --progress-bar "${url}" -o "${tmp_dir}/${archive_name}"
tar -xzf "${tmp_dir}/${archive_name}" -C "${tmp_dir}"
install -m 0755 "${tmp_dir}/${source_binary_relpath}" "${target_binary}"
rm -rf "${tmp_dir}"
}
install_node_exporter() {
local current_version
current_version="$(version_from_bin "${BIN_DIR}/node_exporter" '[0-9]+\.[0-9]+\.[0-9]+')"
if [[ "${current_version}" != "${NODE_EXPORTER_VERSION}" ]]; then
log_info "Installing Node Exporter v${NODE_EXPORTER_VERSION} (current: ${current_version:-none})"
download_tar_binary \
"https://github.com/prometheus/node_exporter/releases/download/v${NODE_EXPORTER_VERSION}/node_exporter-${NODE_EXPORTER_VERSION}.linux-${ARCH_NODE}.tar.gz" \
"node_exporter.tar.gz" \
"node_exporter-${NODE_EXPORTER_VERSION}.linux-${ARCH_NODE}/node_exporter" \
"${BIN_DIR}/node_exporter"
else
log_info "Node Exporter already at desired version ${NODE_EXPORTER_VERSION}"
fi
write_unit_if_changed "node_exporter" "[Unit]
Description=Node Exporter
After=network.target
[Service]
User=root
ExecStart=$BIN_DIR/node_exporter
ExecStart=${BIN_DIR}/node_exporter
Restart=always
[Install]
WantedBy=multi-user.target
EOF
WantedBy=multi-user.target"
systemctl daemon-reload
systemctl enable --now node_exporter
log_success "Node Exporter installed and started."
systemctl restart node_exporter
}
# --- 2. Install Process Exporter ---
install_process_exporter() {
if systemctl is-active --quiet process_exporter; then
log_info "Process Exporter is already running. Skipping installation."
return
local current_version
current_version="$(version_from_bin "${BIN_DIR}/process-exporter" '[0-9]+\.[0-9]+\.[0-9]+')"
if [[ "${current_version}" != "${PROCESS_EXPORTER_VERSION}" ]]; then
log_info "Installing Process Exporter v${PROCESS_EXPORTER_VERSION} (current: ${current_version:-none})"
download_tar_binary \
"https://github.com/ncabatoff/process-exporter/releases/download/v${PROCESS_EXPORTER_VERSION}/process-exporter-${PROCESS_EXPORTER_VERSION}.linux-${ARCH_PROCESS}.tar.gz" \
"process_exporter.tar.gz" \
"process-exporter-${PROCESS_EXPORTER_VERSION}.linux-${ARCH_PROCESS}/process-exporter" \
"${BIN_DIR}/process-exporter"
else
log_info "Process Exporter already at desired version ${PROCESS_EXPORTER_VERSION}"
fi
log_info "Installing Process Exporter v${PROCESS_EXPORTER_VERSION}..."
local URL="https://github.com/ncabatoff/process-exporter/releases/download/v${PROCESS_EXPORTER_VERSION}/process-exporter-${PROCESS_EXPORTER_VERSION}.linux-${ARCH_PROCESS}.tar.gz"
local TMP_DIR=$(mktemp -d)
curl -L --progress-bar "$URL" -o "$TMP_DIR/process_exporter.tar.gz"
tar -xzf "$TMP_DIR/process_exporter.tar.gz" -C "$TMP_DIR"
mv "$TMP_DIR/process-exporter-${PROCESS_EXPORTER_VERSION}.linux-${ARCH_PROCESS}/process-exporter" "$BIN_DIR/"
rm -rf "$TMP_DIR"
# Configure Process Exporter
cat <<EOF > "$CONFIG_DIR/process-config.yaml"
cat <<EOF > "${CONFIG_DIR}/process-config.yaml"
process_names:
- name: "{{.Comm}}"
cmdline:
- '.+'
- '.+'
EOF
# Create Systemd Service
cat <<EOF > /etc/systemd/system/process_exporter.service
[Unit]
write_unit_if_changed "process_exporter" "[Unit]
Description=Process Exporter
After=network.target
[Service]
User=root
ExecStart=$BIN_DIR/process-exporter -config.path $CONFIG_DIR/process-config.yaml
ExecStart=${BIN_DIR}/process-exporter -config.path ${CONFIG_DIR}/process-config.yaml
Restart=always
[Install]
WantedBy=multi-user.target
EOF
WantedBy=multi-user.target"
systemctl daemon-reload
systemctl enable --now process_exporter
log_success "Process Exporter installed and started."
systemctl restart process_exporter
}
# --- 3. Install Vector ---
install_vector() {
log_info "Installing Vector v${VECTOR_VERSION}..."
# Vector installation via script is robust but let's use direct binary download to be consistent
local URL="https://packages.timber.io/vector/${VECTOR_VERSION}/vector-${VECTOR_VERSION}-${ARCH_VECTOR}-unknown-linux-gnu.tar.gz"
local TMP_DIR=$(mktemp -d)
curl -L --progress-bar "$URL" -o "$TMP_DIR/vector.tar.gz"
tar -xzf "$TMP_DIR/vector.tar.gz" -C "$TMP_DIR"
mv "$TMP_DIR/vector-${ARCH_VECTOR}-unknown-linux-gnu/bin/vector" "$BIN_DIR/"
rm -rf "$TMP_DIR"
# Configure Vector
log_info "Configuring Vector to push to $ENDPOINT..."
cat <<EOF > "$CONFIG_DIR/vector.yaml"
data_dir: "$DATA_DIR/vector"
write_vector_config() {
cat <<EOF > "${CONFIG_DIR}/vector.yaml"
data_dir: "${DATA_DIR}/vector"
sources:
node_exporter:
type: prometheus_scrape
endpoints:
- http://localhost:9100/metrics
- http://127.0.0.1:9100/metrics
scrape_interval_secs: 15
process_exporter:
type: prometheus_scrape
endpoints:
- http://localhost:9256/metrics
- http://127.0.0.1:9256/metrics
scrape_interval_secs: 15
sinks:
otlp_out:
type: opentelemetry
journald:
type: journald
current_boot_only: true
syslog_files:
type: file
include:
- /var/log/syslog
- /var/log/messages
- /var/log/auth.log
read_from: end
transforms:
add_metric_labels:
type: remap
inputs: ["node_exporter", "process_exporter"]
endpoint: "$ENDPOINT"
protocol: http
source: |
.tags.host = get_hostname!()
.tags.job = "node"
.tags.origin = "vector-agent"
add_log_labels:
type: remap
inputs: ["journald", "syslog_files"]
source: |
.host = get_hostname!()
.job = "node"
.origin = "vector-agent"
.timestamp = now()
sinks:
metrics_out:
type: prometheus_remote_write
inputs: ["add_metric_labels"]
endpoint: "${METRICS_ENDPOINT}"
compression: snappy
healthcheck: false
logs_out:
type: loki
inputs: ["add_log_labels"]
endpoint: "${LOGS_ENDPOINT}"
compression: gzip
encoding:
codec: protobuf
codec: json
labels:
host: "{{ host }}"
job: "{{ job }}"
origin: "{{ origin }}"
EOF
}
# Create Systemd Service
cat <<EOF > /etc/systemd/system/vector.service
[Unit]
install_vector() {
local current_version
current_version="$(version_from_bin "${BIN_DIR}/vector" '[0-9]+\.[0-9]+\.[0-9]+')"
if [[ "${current_version}" != "${VECTOR_VERSION}" ]]; then
log_info "Installing Vector v${VECTOR_VERSION} (current: ${current_version:-none})"
download_tar_binary \
"https://packages.timber.io/vector/${VECTOR_VERSION}/vector-${VECTOR_VERSION}-${ARCH_VECTOR}-unknown-linux-gnu.tar.gz" \
"vector.tar.gz" \
"vector-${ARCH_VECTOR}-unknown-linux-gnu/bin/vector" \
"${BIN_DIR}/vector"
else
log_info "Vector already at desired version ${VECTOR_VERSION}"
fi
write_vector_config
if ! "${BIN_DIR}/vector" validate --no-environment --config-yaml "${CONFIG_DIR}/vector.yaml" >/dev/null 2>&1; then
log_error "Vector config validation failed."
"${BIN_DIR}/vector" validate --no-environment --config-yaml "${CONFIG_DIR}/vector.yaml" || true
exit 1
fi
write_unit_if_changed "vector" "[Unit]
Description=Vector
Documentation=https://vector.dev
After=network-online.target
@ -197,87 +348,85 @@ Requires=network-online.target
[Service]
User=root
ExecStart=$BIN_DIR/vector --config $CONFIG_DIR/vector.yaml
ExecStart=${BIN_DIR}/vector --config ${CONFIG_DIR}/vector.yaml
Restart=always
RestartSec=5
AmbientCapabilities=CAP_NET_BIND_SERVICE
Environment="VECTOR_LOG=info"
Environment=VECTOR_LOG=info
[Install]
WantedBy=multi-user.target
EOF
WantedBy=multi-user.target"
systemctl daemon-reload
systemctl enable --now vector
# Restart to apply new config if it was already running
systemctl restart vector
log_success "Vector installed and started."
}
# --- 4. Verify Installation ---
uninstall_agent() {
confirm "This will uninstall observability agent components. Continue?" || {
log_info "Cancelled."
return 0
}
for svc in vector process_exporter node_exporter; do
systemctl disable --now "${svc}" >/dev/null 2>&1 || true
rm -f "/etc/systemd/system/${svc}.service"
done
systemctl daemon-reload
rm -rf "${INSTALL_DIR}"
log_success "Agent components uninstalled."
}
verify_installation() {
echo ""
log_info "---------------------------------------------------"
log_info " Verifying Installation..."
log_info "---------------------------------------------------"
# Check Services
check_service() {
local service=$1
if systemctl is-active --quiet "$service"; then
log_success "Service '$service' is running"
sleep 2
log_info "Verifying services..."
for service in node_exporter process_exporter vector; do
if systemctl is-active --quiet "${service}"; then
log_success "Service '${service}' is running"
else
log_fail "Service '$service' is NOT running"
systemctl status "$service" --no-pager | head -n 10
log_fail "Service '${service}' is NOT running"
systemctl status "${service}" --no-pager | head -n 20 || true
fi
}
done
check_service "node_exporter"
check_service "process_exporter"
check_service "vector"
# Check Ports
check_port() {
local port=$1
local name=$2
if ss -tulnA | grep -q ":$port "; then
log_success "Port $port ($name) is listening"
log_info "Checking ports..."
for item in "9100 Node Exporter" "9256 Process Exporter"; do
local port name
port="${item%% *}"
name="${item#* }"
if ss -tuln | grep -q ":${port} "; then
log_success "Port ${port} (${name}) is listening"
else
log_fail "Port $port ($name) is NOT listening"
log_fail "Port ${port} (${name}) is NOT listening"
fi
}
echo ""
log_info "Checking Ports..."
check_port 9100 "Node Exporter"
check_port 9256 "Process Exporter"
# Check Logs
echo ""
log_info "Checking Vector Logs (Last 10 lines)..."
if journalctl -u vector -n 20 --no-pager | grep -iE "error|fail"; then
log_warn "Possible errors found in Vector logs:"
journalctl -u vector -n 20 --no-pager | grep -iE "error|fail"
else
log_success "No recent errors found in Vector logs."
fi
done
}
# --- Check Permissions ---
if [[ $EUID -ne 0 ]]; then
log_error "This script must be run as root"
exit 1
fi
deploy_agent() {
log_info "Action=${ACTION}"
log_info "Base endpoint=${ENDPOINT}"
log_info "Metrics endpoint=${METRICS_ENDPOINT}"
log_info "Logs endpoint=${LOGS_ENDPOINT}"
install_node_exporter
install_process_exporter
install_vector
verify_installation
log_success "Agent deploy/upgrade complete."
}
# --- Execution ---
install_node_exporter
install_process_exporter
install_vector
verify_installation
echo ""
log_success "---------------------------------------------------"
log_success " Agent installation & verification complete!"
log_success " Data is being pushed to: $ENDPOINT"
log_success "---------------------------------------------------"
case "${ACTION}" in
deploy|upgrade)
deploy_agent
;;
reset)
uninstall_agent
deploy_agent
;;
uninstall)
uninstall_agent
;;
*)
log_error "Unsupported action: ${ACTION}"
usage
exit 1
;;
esac

View File

@ -1,153 +1,335 @@
#!/bin/bash
#==============================================================#
# File : install.sh
# Mtime : 2026-02-01
# Desc : Install observability.svc.plus
# Usage : curl ... | bash -s <VERSION> <DOMAIN>
# File : server-install.sh
# Mtime : 2026-02-03
# Desc : observability.svc.plus lifecycle installer
# Usage : curl ... | bash -s -- [options] [VERSION] [DOMAIN]
#==============================================================#
# Default parameters
set -euo pipefail
VERSION="main"
DOMAIN="$(hostname)"
# Handle flags
ACTION="deploy"
AUTO_YES=false
FORCE_RECLONE=false
SKIP_DEPLOY=false
REPO_URL="https://github.com/cloud-neutral-toolkit/observability.svc.plus.git"
REPO_NAME="$(basename "${REPO_URL}" .git)"
INSTALL_DIR="${HOME}/${REPO_NAME}"
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[0;33m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
log_ok() { echo -e "${GREEN}[OK]${NC} $1"; }
usage() {
cat <<EOF
Usage:
bash server-install.sh [options] [VERSION] [DOMAIN]
bash server-install.sh [options] [DOMAIN]
Actions (default: deploy):
--action deploy Deploy or upgrade in place (idempotent)
--action upgrade Same as deploy
--action reset Rebuild from scratch (destructive)
--action uninstall Remove install dir and local systemd units
Options:
-y, --yes Non-interactive mode
--force-reclone Re-clone repo before deploy/upgrade
--skip-deploy Only sync repo/bootstrap/configure, skip deploy.yml
-h, --help Show help
Examples:
curl -fsSL ".../server-install.sh" | bash -s -- observability.svc.plus
curl -fsSL ".../server-install.sh" | bash -s -- --action upgrade observability.svc.plus
curl -fsSL ".../server-install.sh" | bash -s -- --action reset -y observability.svc.plus
EOF
}
confirm() {
local prompt="$1"
if [[ "${AUTO_YES}" == "true" ]]; then
return 0
fi
read -r -p "${prompt} [y/N] " reply
[[ "${reply}" =~ ^[Yy]$ ]]
}
ensure_repo() {
if ! command -v git >/dev/null 2>&1; then
log_error "git is not installed."
exit 1
fi
if [[ ! -d "${INSTALL_DIR}/.git" || "${FORCE_RECLONE}" == "true" ]]; then
if [[ -d "${INSTALL_DIR}" ]]; then
log_warn "Removing existing directory before clone: ${INSTALL_DIR}"
rm -rf "${INSTALL_DIR}"
fi
log_info "Cloning ${REPO_URL} (${VERSION}) ..."
git clone -b "${VERSION}" "${REPO_URL}" "${INSTALL_DIR}"
else
log_info "Updating existing repo at ${INSTALL_DIR}"
git -C "${INSTALL_DIR}" fetch --prune origin
git -C "${INSTALL_DIR}" checkout "${VERSION}"
git -C "${INSTALL_DIR}" pull --ff-only origin "${VERSION}"
fi
}
ensure_root_ssh_access() {
if [[ "$(id -u)" -ne 0 ]]; then
return 0
fi
log_info "Ensuring root SSH key-based access..."
mkdir -p ~/.ssh
chmod 700 ~/.ssh
if [[ ! -f ~/.ssh/id_rsa ]]; then
ssh-keygen -t rsa -b 2048 -f ~/.ssh/id_rsa -N "" -q
fi
local public_key
public_key="$(cat ~/.ssh/id_rsa.pub)"
touch ~/.ssh/authorized_keys
if ! grep -qF "${public_key}" ~/.ssh/authorized_keys; then
echo "${public_key}" >> ~/.ssh/authorized_keys
fi
chmod 600 ~/.ssh/authorized_keys
if [[ -f /etc/ssh/sshd_config ]]; then
if grep -q "^PermitRootLogin" /etc/ssh/sshd_config; then
sed -i 's/^PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
elif ! grep -q "PermitRootLogin prohibit-password" /etc/ssh/sshd_config; then
echo "PermitRootLogin prohibit-password" >> /etc/ssh/sshd_config
fi
systemctl reload ssh >/dev/null 2>&1 || systemctl reload sshd >/dev/null 2>&1 || true
fi
}
run_bootstrap() {
cd "${INSTALL_DIR}"
if [[ -x "./bootstrap" ]]; then
log_info "Running bootstrap..."
./bootstrap
elif [[ -f "./configure" ]]; then
log_info "No bootstrap found, proceeding with configure."
else
log_error "Neither bootstrap nor configure exists in ${INSTALL_DIR}"
exit 1
fi
}
run_configure() {
cd "${INSTALL_DIR}"
if [[ -x "./configure" ]]; then
log_info "Running configure..."
./configure -n -i 127.0.0.1
fi
if [[ -f "pigsty.yml" ]]; then
sed -i 's/10\.146\.0\.6/127.0.0.1/g' pigsty.yml
fi
}
run_deploy() {
cd "${INSTALL_DIR}"
if [[ "${SKIP_DEPLOY}" == "true" ]]; then
log_warn "Skipping deploy.yml as requested."
return 0
fi
if [[ -x "./deploy.yml" ]]; then
log_info "Running deploy.yml ..."
./deploy.yml
else
log_warn "deploy.yml not found, skipping."
fi
}
configure_ingest_gateway() {
local home_conf="/etc/nginx/conf.d/home.conf"
local ingest_inc="/etc/nginx/conf.d/ingest-observability.inc"
if [[ -f "${home_conf}" ]]; then
log_info "Configuring HTTPS ingest routes in nginx..."
cat > "${ingest_inc}" <<'EOF'
# managed by scripts/server-install.sh
location = /ingest/metrics/api/v1/write {
proxy_pass http://127.0.0.1:8428/api/v1/write;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
location = /ingest/logs/loki/api/v1/push {
proxy_pass http://127.0.0.1:9428/insert/loki/api/v1/push;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
location = /ingest/otlp/v1/traces {
proxy_pass http://127.0.0.1:10428/insert/opentelemetry/v1/traces;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
EOF
if ! grep -q "include /etc/nginx/conf.d/ingest-observability.inc;" "${home_conf}"; then
# Keep it near top-level server directives so location blocks are active.
sed -i '/proxy_request_buffering off;/a\ include /etc/nginx/conf.d/ingest-observability.inc;' "${home_conf}"
fi
nginx -t
if systemctl is-active --quiet nginx; then
systemctl reload nginx
else
log_warn "nginx is inactive, skip reload."
fi
log_ok "Nginx ingest gateway configured."
else
log_warn "Nginx home.conf not found, skipping nginx ingest config."
fi
if [[ -f "/etc/caddy/Caddyfile" ]]; then
log_info "Configuring ingest reverse proxy in caddy..."
sed -i -E 's|(reverse_proxy[[:space:]]+)127\\.0\\.0\\.1:12345|\\1127.0.0.1:8428|g' /etc/caddy/Caddyfile
sed -i -E 's|(reverse_proxy[[:space:]]+)127\\.0\\.0\\.1:12346|\\1127.0.0.1:9428|g' /etc/caddy/Caddyfile
if command -v caddy >/dev/null 2>&1; then
caddy validate --config /etc/caddy/Caddyfile
fi
if systemctl is-active --quiet caddy; then
systemctl reload caddy
fi
log_ok "Caddy ingest gateway configured."
fi
}
uninstall_stack() {
log_warn "Uninstall action will remove local install assets."
confirm "Continue uninstall?" || { log_info "Cancelled."; return 0; }
if [[ -d "${INSTALL_DIR}" ]]; then
rm -rf "${INSTALL_DIR}"
log_ok "Removed ${INSTALL_DIR}"
else
log_info "Install directory does not exist: ${INSTALL_DIR}"
fi
for unit in pigsty vmetrics vlogs vtraces grafana-server; do
if systemctl list-unit-files | grep -q "^${unit}\.service"; then
systemctl disable --now "${unit}" >/dev/null 2>&1 || true
rm -f "/etc/systemd/system/${unit}.service"
fi
done
systemctl daemon-reload
log_ok "Uninstall cleanup finished."
}
deploy_or_upgrade() {
ensure_repo
ensure_root_ssh_access
run_bootstrap
run_configure
run_deploy
configure_ingest_gateway
log_ok "Deploy/upgrade completed."
echo -e "----------------------------------------------------------------"
echo -e "Dashboard : https://${DOMAIN}"
echo -e "user : admin"
echo -e "Pass : pigsty"
echo -e "----------------------------------------------------------------"
echo -e "Metrics Ingest : https://${DOMAIN}/ingest/metrics/api/v1/write"
echo -e "Logs Ingest : https://${DOMAIN}/ingest/logs/insert"
echo -e "Traces Ingest : https://${DOMAIN}/ingest/otlp/v1/traces"
echo -e "PromQL Query : http://${DOMAIN}:8428/api/v1/query"
echo -e "Remote Write : http://${DOMAIN}:8428/api/v1/write"
echo -e "Grafana : https://${DOMAIN}/grafana"
echo -e "----------------------------------------------------------------"
}
while [[ $# -gt 0 ]]; do
case "$1" in
-y|--yes) AUTO_YES=true; shift ;;
-*) shift ;; # ignore other flags
*) break ;;
--action)
ACTION="$2"
shift 2
;;
--action=*)
ACTION="${1#*=}"
shift
;;
-y|--yes)
AUTO_YES=true
shift
;;
--force-reclone)
FORCE_RECLONE=true
shift
;;
--skip-deploy)
SKIP_DEPLOY=true
shift
;;
-h|--help)
usage
exit 0
;;
-*)
log_error "Unknown option: $1"
usage
exit 1
;;
*)
break
;;
esac
done
if [[ -n "$1" ]]; then
# if $1 looks like a version/branch (main, master, v1.0, etc.)
if [[ $# -ge 1 ]]; then
if [[ "$1" == "main" || "$1" == "master" || "$1" == v[0-9]* ]]; then
VERSION="$1"
DOMAIN="${2:-$(hostname)}"
else
# assume $1 is the DOMAIN
DOMAIN="$1"
fi
fi
REPO_URL="https://github.com/cloud-neutral-toolkit/observability.svc.plus.git"
REPO_NAME=$(basename "${REPO_URL}" .git)
INSTALL_DIR="${HOME}/${REPO_NAME}"
log_info "Repo : ${REPO_URL}"
log_info "Dir : ${INSTALL_DIR}"
log_info "Version : ${VERSION}"
log_info "Domain : ${DOMAIN}"
log_info "Action : ${ACTION}"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
NC='\033[0m'
echo -e "${BLUE}Installing ${REPO_NAME}...${NC}"
echo -e "${BLUE}Version : ${VERSION}${NC}"
echo -e "${BLUE}Domain : ${DOMAIN}${NC}"
echo -e "${BLUE}Repo : ${REPO_URL}${NC}"
echo -e "${BLUE}Dir : ${INSTALL_DIR}${NC}"
# Check for git
if ! command -v git &> /dev/null; then
echo -e "${RED}Error: git is not installed.${NC}"
echo "Please install git first (yum install git / apt install git)"
exit 1
fi
# Clone or Update
if [ -d "${INSTALL_DIR}" ]; then
echo -e "${BLUE}Directory ${INSTALL_DIR} already exists.${NC}"
if [ "$AUTO_YES" = true ]; then
REPLY="y"
else
read -p "Overwrite? (y/N) " -n 1 -r
echo
fi
if [[ $REPLY =~ ^[Yy]$ ]]; then
rm -rf "${INSTALL_DIR}"
if ! git clone -b "${VERSION}" "${REPO_URL}" "${INSTALL_DIR}"; then
echo -e "${RED}Error: Failed to clone repository.${NC}"
exit 1
fi
else
echo -e "${BLUE}Updating existing repo...${NC}"
cd "${INSTALL_DIR}"
git fetch origin
if ! git checkout "${VERSION}"; then
echo -e "${RED}Error: Version ${VERSION} not found${NC}"
exit 1
fi
git pull origin "${VERSION}"
fi
else
if ! git clone -b "${VERSION}" "${REPO_URL}" "${INSTALL_DIR}"; then
echo -e "${RED}Error: Failed to clone repository.${NC}"
case "${ACTION}" in
deploy|upgrade)
deploy_or_upgrade
;;
reset)
confirm "Reset will remove and reinstall ${INSTALL_DIR}. Continue?" || {
log_info "Cancelled."
exit 0
}
FORCE_RECLONE=true
deploy_or_upgrade
;;
uninstall)
uninstall_stack
;;
*)
log_error "Unsupported action: ${ACTION}"
usage
exit 1
fi
fi
cd "${INSTALL_DIR}"
# Fix root SSH access if running as root
if [ "$(id -u)" -eq 0 ]; then
echo -e "${BLUE}Ensuring root SSH access...${NC}"
mkdir -p ~/.ssh && chmod 700 ~/.ssh
if [ ! -f ~/.ssh/id_rsa ]; then
ssh-keygen -t rsa -b 2048 -f ~/.ssh/id_rsa -N "" -q
fi
PUBLIC_KEY=$(cat ~/.ssh/id_rsa.pub)
if ! grep -q "$PUBLIC_KEY" ~/.ssh/authorized_keys 2>/dev/null; then
echo "$PUBLIC_KEY" >> ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
fi
# Also ensure SSH daemon allows root login via key
if grep -q "PermitRootLogin" /etc/ssh/sshd_config; then
sed -i 's/^.*PermitRootLogin.*/PermitRootLogin prohibit-password/' /etc/ssh/sshd_config
else
echo "PermitRootLogin prohibit-password" >> /etc/ssh/sshd_config
fi
systemctl reload ssh &>/dev/null || systemctl reload sshd &>/dev/null
fi
# Run Bootstrap
if [ -f "./bootstrap" ]; then
echo -e "${BLUE}Running bootstrap...${NC}"
./bootstrap || { echo -e "${RED}Error: Bootstrap failed${NC}"; exit 1; }
elif [ -f "./configure" ]; then
echo -e "${BLUE}Found configure script, but no bootstrap. Proceeding...${NC}"
else
echo -e "${RED}Warning: Primary setup scripts not found! Check repo content.${NC}"
fi
# Run Configure automatically
if [ -f "./configure" ]; then
echo -e "${BLUE}Running configure...${NC}"
./configure -n -i 127.0.0.1 || { echo -e "${RED}Error: Configure failed${NC}"; exit 1; }
# Reinforced IP safety check for inventory
if [ -f "pigsty.yml" ]; then
echo -e "${BLUE}Reinforcing 127.0.0.1 in pigsty.yml...${NC}"
sed -i 's/10.146.0.6/127.0.0.1/g' pigsty.yml
fi
fi
# Run Deployment automatically
if [ -f "./deploy.yml" ]; then
echo -e "${BLUE}Starting deployment...${NC}"
./deploy.yml || { echo -e "${RED}Error: Deployment failed${NC}"; exit 1; }
fi
echo -e "\n${GREEN}Successfully deployed observability.svc.plus!${NC}"
echo -e "----------------------------------------------------------------"
echo -e "Dashboard : https://${DOMAIN}"
echo -e "user : admin"
echo -e "Pass : pigsty"
echo -e "----------------------------------------------------------------"
echo -e "Otel_endpoint : http://${DOMAIN}:4317"
echo -e "Otel_endpoint : http://${DOMAIN}:4318"
echo -e "----------------------------------------------------------------"
echo -e "查询 (PromQL) : http://${DOMAIN}:8428/api/v1/query"
echo -e "写入 (Remote) : http://${DOMAIN}:8428/api/v1/write"
echo -e "----------------------------------------------------------------"
echo -e "Insight : https://${DOMAIN}/insight"
echo -e "Grafana : https://${DOMAIN}/grafana"
echo -e "----------------------------------------------------------------"
;;
esac

View File

@ -4,7 +4,7 @@ export interface ClientOptions {
}
export function createOpenObserveClient(options: ClientOptions = {}) {
const { baseUrl = 'https://infra.svc.plus/api', token } = options
const { baseUrl = 'https://observability.svc.plus/api', token } = options
async function request<T>(path: string, init?: RequestInit): Promise<T> {
const headers = new Headers(init?.headers)