Add mixed-host observability deploy and ingest auth

This commit is contained in:
Haitao Pan 2026-03-14 17:15:54 +08:00
parent f937afe1fd
commit c3fe0324ea
16 changed files with 718 additions and 11 deletions

View File

@ -31,6 +31,70 @@ flowchart LR
## 3) Start
当前推荐按“混合部署到已有主机”的方式执行。
1. 先更新 DNS`observability.svc.plus` 指到 `us-xhttp.svc.plus`
2. 在 `us-xhttp.svc.plus` 上执行下面的 Server side 示例,部署中心端
3. 再到其他已有主机执行下面的 Client side 示例,把采集数据回传到 `observability.svc.plus`
当前接入主机:
- `us-xhttp.svc.plus`:继续承载现有服务,同时承载 `observability.svc.plus`
- `clawdbot.svc.plus`:部署 agent采集后上报到中心端
- `jp-xhttp.svc.plus`:部署 agent采集后上报到中心端
### Ansible (Recommended)
#### Server side
先导出 Cloudflare Token然后在 `us-xhttp.svc.plus` 上执行服务端部署。`deploy_observability_service.yml` 会先把 Cloudflare 上的 `observability.svc.plus` 更新成指向 `us-xhttp.svc.plus` 的非代理记录,再等待公共 DNS 生效后继续部署,这样更容易保证 Caddy 首次自动签名成功。
```bash
export CLOUDFLARE_API_TOKEN=...
ansible-playbook -i <your-inventory> deploy_observability_service.yml -l us-xhttp.svc.plus
```
如果希望给 `/ingest/*` 增加一层基础认证,可以在服务端部署时一起打开:
```bash
export CLOUDFLARE_API_TOKEN=...
ansible-playbook -i <your-inventory> deploy_observability_service.yml -l us-xhttp.svc.plus \
-e observability_ingest_basic_auth_enabled=true \
-e observability_ingest_basic_auth_user=ingest \
-e observability_ingest_basic_auth_password='<strong-password>'
```
#### Client side (agent)
再到采集端主机执行 `node.yml` 的 push mode
```bash
ansible-playbook -i <your-inventory> node.yml \
-l clawdbot.svc.plus,jp-xhttp.svc.plus \
-e node_monitor_mode=push \
-e observability_endpoint=https://observability.svc.plus/ingest/otlp \
-e haproxy_enabled=false
```
如果服务端已开启 ingest 基本认证,采集端也要带上同一组凭据:
```bash
ansible-playbook -i <your-inventory> node.yml \
-l clawdbot.svc.plus,jp-xhttp.svc.plus \
-e node_monitor_mode=push \
-e observability_endpoint=https://observability.svc.plus/ingest/otlp \
-e observability_ingest_basic_auth_enabled=true \
-e observability_ingest_basic_auth_user=ingest \
-e observability_ingest_basic_auth_password='<strong-password>' \
-e haproxy_enabled=false
```
> `node_monitor_mode=push` 会在远端主机上部署 `node_exporter + process_exporter + vector`,并把 metrics / logs 主动汇总到 `observability.svc.plus`
>
> `observability_ingest_basic_auth_*` 只保护 `/ingest/*` 写入入口,不影响 Caddy 暴露的其他站点页面;服务端和采集端必须使用同一组认证信息。
### Script Installers
### Server side
```bash
@ -99,6 +163,14 @@ ssh root@clawdbot.svc.plus \
| bash -s -- --endpoint https://observability.svc.plus/ingest/otlp'
```
### Remote client example (jp-xhttp.svc.plus)
```bash
ssh root@jp-xhttp.svc.plus \
'curl -fsSL https://raw.githubusercontent.com/cloud-neutral-toolkit/observability.svc.plus/main/scripts/agent-install.sh \
| bash -s -- --endpoint https://observability.svc.plus/ingest/otlp'
```
### Optional SSH manager env example
```bash

View File

@ -0,0 +1,139 @@
---
- name: Update Cloudflare DNS for observability.svc.plus
hosts: localhost
connection: local
gather_facts: false
vars:
cloudflare_zone_name: svc.plus
cloudflare_api_base: https://api.cloudflare.com/client/v4
observability_domain: observability.svc.plus
observability_dns_target: us-xhttp.svc.plus
observability_dns_type: CNAME
observability_dns_ttl: 1
observability_dns_proxied: false
dns_wait_retries: 30
dns_wait_delay: 10
tasks:
- name: Validate Cloudflare token is present in environment
ansible.builtin.assert:
that:
- lookup('ansible.builtin.env', 'CLOUDFLARE_API_TOKEN') | length > 0
fail_msg: "CLOUDFLARE_API_TOKEN must be exported before running this playbook."
- name: Resolve Cloudflare zone id
ansible.builtin.uri:
url: "{{ cloudflare_api_base }}/zones?name={{ cloudflare_zone_name }}"
method: GET
headers:
Authorization: "Bearer {{ lookup('ansible.builtin.env', 'CLOUDFLARE_API_TOKEN') }}"
Content-Type: application/json
return_content: true
register: cloudflare_zone_lookup
- name: Validate zone lookup result
ansible.builtin.assert:
that:
- cloudflare_zone_lookup.json.success
- cloudflare_zone_lookup.json.result | length > 0
fail_msg: "Unable to resolve Cloudflare zone id for {{ cloudflare_zone_name }}."
- name: Set Cloudflare zone id
ansible.builtin.set_fact:
cloudflare_zone_id: "{{ cloudflare_zone_lookup.json.result[0].id }}"
- name: Query existing observability DNS records
ansible.builtin.uri:
url: "{{ cloudflare_api_base }}/zones/{{ cloudflare_zone_id }}/dns_records?name={{ observability_domain }}"
method: GET
headers:
Authorization: "Bearer {{ lookup('ansible.builtin.env', 'CLOUDFLARE_API_TOKEN') }}"
Content-Type: application/json
return_content: true
register: observability_dns_records
- name: Remove conflicting observability DNS records with different type
ansible.builtin.uri:
url: "{{ cloudflare_api_base }}/zones/{{ cloudflare_zone_id }}/dns_records/{{ item.id }}"
method: DELETE
headers:
Authorization: "Bearer {{ lookup('ansible.builtin.env', 'CLOUDFLARE_API_TOKEN') }}"
Content-Type: application/json
loop: "{{ observability_dns_records.json.result | default([]) }}"
loop_control:
label: "{{ item.type }} {{ item.name }}"
when: item.type != observability_dns_type
- name: Create observability DNS record when missing
ansible.builtin.uri:
url: "{{ cloudflare_api_base }}/zones/{{ cloudflare_zone_id }}/dns_records"
method: POST
headers:
Authorization: "Bearer {{ lookup('ansible.builtin.env', 'CLOUDFLARE_API_TOKEN') }}"
Content-Type: application/json
body_format: raw
body: >-
{{
{
'type': observability_dns_type,
'name': observability_domain,
'content': observability_dns_target,
'ttl': (observability_dns_ttl | int),
'proxied': (observability_dns_proxied | bool)
} | to_json
}}
when: (observability_dns_records.json.result | selectattr('type', 'equalto', observability_dns_type) | list | length) == 0
- name: Update observability DNS record when target changes
ansible.builtin.uri:
url: "{{ cloudflare_api_base }}/zones/{{ cloudflare_zone_id }}/dns_records/{{ (observability_dns_records.json.result | selectattr('type', 'equalto', observability_dns_type) | list | first).id }}"
method: PUT
headers:
Authorization: "Bearer {{ lookup('ansible.builtin.env', 'CLOUDFLARE_API_TOKEN') }}"
Content-Type: application/json
body_format: raw
body: >-
{{
{
'type': observability_dns_type,
'name': observability_domain,
'content': observability_dns_target,
'ttl': (observability_dns_ttl | int),
'proxied': (observability_dns_proxied | bool)
} | to_json
}}
when:
- (observability_dns_records.json.result | selectattr('type', 'equalto', observability_dns_type) | list | length) > 0
- >
((observability_dns_records.json.result | selectattr('type', 'equalto', observability_dns_type) | list | first).content != observability_dns_target)
or
(((observability_dns_records.json.result | selectattr('type', 'equalto', observability_dns_type) | list | first).proxied | default(false)) != observability_dns_proxied)
- name: Wait for public DNS to expose observability CNAME
ansible.builtin.uri:
url: "https://cloudflare-dns.com/dns-query?name={{ observability_domain }}&type=CNAME"
method: GET
headers:
Accept: application/dns-json
return_content: true
register: observability_dns_public
until:
- observability_dns_public.status == 200
- observability_dns_public.json.Status == 0
- >
(observability_dns_public.json.Answer | default([])
| selectattr('data', 'equalto', observability_dns_target ~ '.')
| list | length) > 0
retries: "{{ dns_wait_retries }}"
delay: "{{ dns_wait_delay }}"
- name: Show effective observability DNS target
ansible.builtin.debug:
msg: "{{ observability_domain }} -> {{ observability_dns_target }} proxied={{ observability_dns_proxied }}"
- import_playbook: infra.yml
vars:
infra_domain: observability.svc.plus
infra_portal:
home: { domain: observability.svc.plus }
caddy_enabled: true
nginx_enabled: false

View File

@ -103,4 +103,13 @@
# - add_logs : register infra as vector logging source
# - add_ds : register infra victoria stack as grafana datasource
#--------------------------------------------------------------#
# Mixed Existing-Host Deployment
#--------------------------------------------------------------#
# Center service example:
# ./infra.yml -l us-xhttp.svc.plus \
# -e infra_domain=observability.svc.plus \
# -e 'infra_portal={\"home\":{\"domain\":\"observability.svc.plus\"}}' \
# -e caddy_enabled=true \
# -e nginx_enabled=false
#--------------------------------------------------------------#
...

View File

@ -32,6 +32,12 @@
# node.yml -l <cls> # add groups
# node.yml -l <ip> # add single node
#
# Observability push-agent mode:
# ./node.yml -l clawdbot.svc.plus,jp-xhttp.svc.plus \
# -e node_monitor_mode=push \
# -e observability_endpoint=https://observability.svc.plus/ingest/otlp \
# -e haproxy_enabled=false
#
# Bootstrap with another admin user: (Create admin with another admin)
# node.yml -t node_admin # create admin user for nodes
# node.yml -t node_admin -k -K -e ansible_user=<another admin>

View File

@ -15,6 +15,10 @@ proxy_env: { no_proxy: "localhost,127.0.0.1,10.0.0.0/8,192.168.0.0/16,*.aliyun.c
infra_portal: # infra services exposed via portal
home : { domain: i.observability } # default home server definition
infra_domain: observability.svc.plus
observability_ingest_basic_auth_enabled: false
observability_ingest_basic_auth_user: ingest
observability_ingest_basic_auth_password: ''
observability_ingest_basic_auth_password_hash: ''
infra_data: /data/infra # default data path for infrastructure data
infra_services: # home page navigation entries
- { name: Metrics ,url: '/vmetrics/vmui/' ,desc: 'VictoriaMetrics Query UI' ,icon: 'metrics' ,name_cn: '指标查询' ,desc_cn: 'VictoriaMetrics 指标查询界面' }

View File

@ -1,4 +1,27 @@
---
#--------------------------------------------------------------#
# 0. Validate optional ingest auth [auth]
#--------------------------------------------------------------#
- name: validate observability ingest basic auth inputs
tags: auth
when: observability_ingest_basic_auth_enabled | default(false) | bool
assert:
that:
- observability_ingest_basic_auth_user | default('', true) | length > 0
- observability_ingest_basic_auth_password | default('', true) | length > 0 or observability_ingest_basic_auth_password_hash | default('', true) | length > 0
fail_msg: "When observability_ingest_basic_auth_enabled=true, set observability_ingest_basic_auth_user and either observability_ingest_basic_auth_password or observability_ingest_basic_auth_password_hash."
- name: build effective observability ingest password hash
tags: auth
when: observability_ingest_basic_auth_enabled | default(false) | bool
set_fact:
observability_ingest_basic_auth_password_hash_effective: >-
{{
observability_ingest_basic_auth_password_hash
if (observability_ingest_basic_auth_password_hash | default('', true) | length > 0)
else (observability_ingest_basic_auth_password | password_hash('bcrypt'))
}}
#--------------------------------------------------------------#
# 1. Infra User [infra_user]
#--------------------------------------------------------------#

View File

@ -5,6 +5,13 @@
{{ infra_domain | default('observability.svc.plus') }} {
encode gzip zstd
{% if observability_ingest_basic_auth_enabled | default(false) %}
@observability_ingest path /ingest/*
basic_auth @observability_ingest {
{{ observability_ingest_basic_auth_user }} {{ observability_ingest_basic_auth_password_hash_effective | default(observability_ingest_basic_auth_password_hash) }}
}
{% endif %}
# ---- Alloy unified ingest endpoints ----
# Prometheus remote_write

View File

@ -8,6 +8,13 @@ node_exporter_options: '--no-collector.softnet --no-collector.nvme --collector.t
#--------------------------------------------------------------#
# VECTOR
#--------------------------------------------------------------#
node_monitor_mode: pull # pull: central scrape/register, push: remote agent pushes to observability endpoint
observability_endpoint: '' # base endpoint, e.g. https://observability.svc.plus/ingest/otlp
observability_metrics_endpoint: '' # optional override for remote_write endpoint
observability_logs_endpoint: '' # optional override for logs endpoint
observability_ingest_basic_auth_enabled: false
observability_ingest_basic_auth_user: ingest
observability_ingest_basic_auth_password: ''
vector_enabled: true # enable vector log collector?
vector_clean: false # purge vector data dir during init?
vector_data: /data/vector # vector data dir, /data/vector by default
@ -15,6 +22,16 @@ vector_port: 9598 # vector metrics port, 9598 by default
vector_read_from: beginning # vector read from beginning or end
vector_log_endpoint: [ infra ] # if defined, sending vector log to this endpoint.
#--------------------------------------------------------------#
# PROCESS EXPORTER
#--------------------------------------------------------------#
process_exporter_enabled: false # enable process_exporter, automatically recommended for push mode
process_exporter_version: 0.7.10
process_exporter_port: 9256
process_exporter_binary: /usr/local/bin/process-exporter
process_exporter_config_dir: /etc/process-exporter
process_exporter_config_file: /etc/process-exporter/process-exporter.yml
#-----------------------------------------------------------------
# NODE_VIP (Reference)
#-----------------------------------------------------------------

View File

@ -45,6 +45,24 @@
loop: '{{ groups["infra"]|default([]) }}'
systemd: name=nginx state=reloaded enabled=yes daemon_reload=yes
- name: validate observability push mode inputs
tags: [monitor, vector, process_exporter]
when: node_monitor_mode | default('pull') == 'push'
assert:
that:
- observability_endpoint | default('', true) | length > 0 or (observability_metrics_endpoint | default('', true) | length > 0 and observability_logs_endpoint | default('', true) | length > 0)
fail_msg: "Set observability_endpoint, or set both observability_metrics_endpoint and observability_logs_endpoint, when node_monitor_mode=push."
- name: validate observability push mode basic auth inputs
tags: [monitor, vector, process_exporter, auth]
when:
- node_monitor_mode | default('pull') == 'push'
- observability_ingest_basic_auth_enabled | default(false) | bool
assert:
that:
- observability_ingest_basic_auth_user | default('', true) | length > 0
- observability_ingest_basic_auth_password | default('', true) | length > 0
fail_msg: "When observability_ingest_basic_auth_enabled=true in push mode, set observability_ingest_basic_auth_user and observability_ingest_basic_auth_password."
#--------------------------------------------------------------#
# Register Instance DNS Name [vip_dns]
@ -140,6 +158,7 @@
# /infra/targets/node/{{ ip }}.yml
- name: register node as victoria target
tags: [ node_vip, node_register, register, add_metrics ]
when: node_monitor_mode | default('pull') != 'push'
ignore_errors: true
delegate_to: '{{ item }}'
loop: '{{ groups["infra"]|default([]) }}'
@ -168,6 +187,7 @@
- name: register node as ping target
tags: [ node_register, register, add_metrics ]
when: node_monitor_mode | default('pull') != 'push'
ignore_errors: true
delegate_to: '{{ item }}'
loop: '{{ groups["infra"]|default([]) }}'
@ -183,7 +203,7 @@
- name: register node vip as ping target
tags: [ node_vip, node_register, register, add_metrics ]
when: vip_enabled|bool and vip_address is defined and vip_address != ''
when: node_monitor_mode | default('pull') != 'push' and vip_enabled|bool and vip_address is defined and vip_address != ''
ignore_errors: true
delegate_to: '{{ item }}'
loop: '{{ groups["infra"]|default([]) }}'
@ -197,6 +217,9 @@
- labels: { ip: {{ inventory_hostname }} , ins: {{ nodename }} , cls: {{ node_cluster|default('nodes') }}, vip: {{ vip_address }} , job: node-vip }
targets: [ {{ vip_address }} ]
- import_tasks: process_exporter.yml
tags: process_exporter
when: process_exporter_enabled | default(false) | bool or node_monitor_mode | default('pull') == 'push'
#--------------------------------------------------------------#
# Vector [vector]

View File

@ -0,0 +1,90 @@
---
#--------------------------------------------------------------#
# Install process_exporter [process_exporter_install]
#--------------------------------------------------------------#
- name: detect process_exporter architecture
tags: [process_exporter, process_exporter_install]
command: uname -m
register: process_exporter_uname
changed_when: false
- name: map process_exporter architecture
tags: [process_exporter, process_exporter_install]
set_fact:
process_exporter_arch: >-
{% if process_exporter_uname.stdout == 'x86_64' %}amd64{% elif process_exporter_uname.stdout in ['aarch64', 'arm64'] %}arm64{% else %}{% endif %}
- name: validate process_exporter architecture
tags: [process_exporter, process_exporter_install]
assert:
that:
- process_exporter_arch | length > 0
fail_msg: "Unsupported process_exporter architecture: {{ process_exporter_uname.stdout }}"
- name: ensure process_exporter config directory exists
tags: [process_exporter, process_exporter_config]
file:
path: "{{ process_exporter_config_dir }}"
state: directory
owner: root
group: root
mode: '0755'
- name: download process_exporter release archive
tags: [process_exporter, process_exporter_install]
get_url:
url: "https://github.com/ncabatoff/process-exporter/releases/download/v{{ process_exporter_version }}/process-exporter-{{ process_exporter_version }}.linux-{{ process_exporter_arch }}.tar.gz"
dest: "/tmp/process-exporter-{{ process_exporter_version }}.linux-{{ process_exporter_arch }}.tar.gz"
mode: '0644'
- name: extract process_exporter release archive
tags: [process_exporter, process_exporter_install]
unarchive:
src: "/tmp/process-exporter-{{ process_exporter_version }}.linux-{{ process_exporter_arch }}.tar.gz"
dest: /tmp
remote_src: true
creates: "/tmp/process-exporter-{{ process_exporter_version }}.linux-{{ process_exporter_arch }}/process-exporter"
- name: install process_exporter binary
tags: [process_exporter, process_exporter_install]
copy:
src: "/tmp/process-exporter-{{ process_exporter_version }}.linux-{{ process_exporter_arch }}/process-exporter"
dest: "{{ process_exporter_binary }}"
owner: root
group: root
mode: '0755'
remote_src: true
- name: render process_exporter config
tags: [process_exporter, process_exporter_config]
template:
src: process_exporter.yml
dest: "{{ process_exporter_config_file }}"
owner: root
group: root
mode: '0644'
- name: render process_exporter systemd unit
tags: [process_exporter, process_exporter_config]
template:
src: process_exporter.svc
dest: "{{ systemd_dir }}/process_exporter.service"
owner: root
group: root
mode: '0644'
- name: launch process_exporter
tags: [process_exporter, process_exporter_launch]
systemd:
name: process_exporter
state: restarted
enabled: true
daemon_reload: true
- name: wait for process_exporter service online
tags: [process_exporter, process_exporter_launch]
wait_for:
host: 127.0.0.1
port: "{{ process_exporter_port }}"
state: started
timeout: 15

View File

@ -46,10 +46,11 @@
with_items:
- { src: vector.svc ,dest: "{{ systemd_dir }}/vector.service" }
- { src: vector.env ,dest: /etc/default/vector }
- { src: vector.yaml ,dest: /etc/vector/vector.yaml }
- { src: "{% if node_monitor_mode | default('pull') == 'push' %}vector-push.yaml{% else %}vector.yaml{% endif %}" ,dest: /etc/vector/vector.yaml }
- name: register node syslog to vector
tags: [ node_register, register ,add_logs ]
when: node_monitor_mode | default('pull') != 'push'
template: src=node.yaml dest=/etc/vector/node.yaml mode=0600

View File

@ -0,0 +1,13 @@
[Unit]
Description=Process Exporter
Documentation=https://github.com/ncabatoff/process-exporter
After=network.target
[Service]
User=root
ExecStart={{ process_exporter_binary }} --web.listen-address=:{{ process_exporter_port }} -config.path {{ process_exporter_config_file }}
Restart=on-failure
RestartSec=5
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,4 @@
process_names:
- name: "{{ '{{.Comm}}' }}"
cmdline:
- '.+'

View File

@ -0,0 +1,99 @@
---
{% set base_endpoint = (observability_endpoint | default('', true) | regex_replace('/+$', '') | regex_replace('/ingest/otlp.*$', '')) %}
{% set metrics_endpoint = observability_metrics_endpoint | default(base_endpoint ~ '/ingest/metrics/api/v1/write', true) %}
{% set logs_endpoint = observability_logs_endpoint | default(base_endpoint ~ '/ingest/logs/insert', true) %}
data_dir: {{ vector_data }}
api:
enabled: true
sources:
internal_metrics:
type: internal_metrics
scrape_interval_secs: 15
node_exporter:
type: prometheus_scrape
endpoints:
- http://127.0.0.1:{{ node_exporter_port | default(9100) }}{{ exporter_metrics_path | default('/metrics') }}
scrape_interval_secs: 15
process_exporter:
type: prometheus_scrape
endpoints:
- http://127.0.0.1:{{ process_exporter_port | default(9256) }}/metrics
scrape_interval_secs: 15
journald:
type: journald
current_boot_only: true
syslog_files:
type: file
include:
- /var/log/syslog
- /var/log/messages
- /var/log/auth.log
read_from: end
transforms:
agent_metrics:
type: remap
inputs: ["node_exporter", "process_exporter"]
source: |
.tags.host = "{{ ansible_hostname | default(nodename | default(inventory_hostname)) }}"
.tags.ip = "{{ inventory_hostname }}"
.tags.ins = "{{ nodename | default(inventory_hostname) }}"
.tags.cls = "{{ node_cluster | default('nodes') }}"
.tags.job = "node"
.tags.origin = "vector-agent"
agent_logs:
type: remap
inputs: ["journald", "syslog_files"]
source: |
.host = "{{ ansible_hostname | default(nodename | default(inventory_hostname)) }}"
.ip = "{{ inventory_hostname }}"
.ins = "{{ nodename | default(inventory_hostname) }}"
.cls = "{{ node_cluster | default('nodes') }}"
.job = "node"
.origin = "vector-agent"
.timestamp = now()
sinks:
vector_metrics:
type: prometheus_exporter
inputs: ["internal_metrics"]
address: 0.0.0.0:{{ vector_port }}
default_namespace: vector
observability_metrics:
type: prometheus_remote_write
inputs: ["agent_metrics"]
endpoint: "{{ metrics_endpoint }}"
{% if observability_ingest_basic_auth_enabled | default(false) %}
auth:
strategy: basic
user: "{{ observability_ingest_basic_auth_user }}"
password: "{{ observability_ingest_basic_auth_password }}"
{% endif %}
compression: snappy
healthcheck: false
observability_logs:
type: loki
inputs: ["agent_logs"]
endpoint: "{{ logs_endpoint }}"
{% if observability_ingest_basic_auth_enabled | default(false) %}
auth:
strategy: basic
user: "{{ observability_ingest_basic_auth_user }}"
password: "{{ observability_ingest_basic_auth_password }}"
{% endif %}
compression: gzip
encoding:
codec: json
labels:
host: "{{ '{{ host }}' }}"
job: "{{ '{{ job }}' }}"
origin: "{{ '{{ origin }}' }}"

View File

@ -36,6 +36,102 @@ log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
log_fail() { echo -e "${RED}[FAIL]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
append_unique() {
local value="$1"
local -n target_ref="$2"
[[ -z "${value}" ]] && return 0
local existing
for existing in "${target_ref[@]:-}"; do
if [[ "${existing}" == "${value}" ]]; then
return 0
fi
done
target_ref+=("${value}")
}
collect_local_ipv4s() {
local ips=()
local ip
if command -v hostname >/dev/null 2>&1; then
for ip in $(hostname -I 2>/dev/null || true); do
append_unique "${ip}" ips
done
fi
if command -v ip >/dev/null 2>&1; then
while read -r ip; do
append_unique "${ip}" ips
done < <(ip -o -4 addr show scope global 2>/dev/null | awk '{print $4}' | cut -d/ -f1)
fi
printf '%s\n' "${ips[@]}"
}
resolve_ipv4s() {
local host="$1"
local ips=()
local ip
if command -v getent >/dev/null 2>&1; then
while read -r ip _; do
append_unique "${ip}" ips
done < <(getent ahostsv4 "${host}" 2>/dev/null || true)
fi
if [[ ${#ips[@]} -eq 0 ]] && command -v host >/dev/null 2>&1; then
while read -r ip; do
append_unique "${ip}" ips
done < <(host "${host}" 2>/dev/null | awk '/has address/ {print $4}')
fi
printf '%s\n' "${ips[@]}"
}
extract_host_from_url() {
local url="$1"
url="${url#*://}"
url="${url%%/*}"
url="${url%%:*}"
printf '%s\n' "${url}"
}
endpoint_targets_local_host() {
local host="$1"
local local_host
local local_short
local local_ip
local resolved_ip
local local_ips=()
local resolved_ips=()
local_host="$(hostname -f 2>/dev/null || hostname)"
local_short="${local_host%%.*}"
if [[ "${host}" == "${local_host}" || "${host}" == "${local_short}" ]]; then
return 0
fi
while read -r local_ip; do
append_unique "${local_ip}" local_ips
done < <(collect_local_ipv4s)
while read -r resolved_ip; do
append_unique "${resolved_ip}" resolved_ips
done < <(resolve_ipv4s "${host}")
[[ ${#local_ips[@]} -eq 0 || ${#resolved_ips[@]} -eq 0 ]] && return 1
for resolved_ip in "${resolved_ips[@]}"; do
for local_ip in "${local_ips[@]}"; do
if [[ "${resolved_ip}" == "${local_ip}" ]]; then
return 0
fi
done
done
return 1
}
usage() {
cat <<EOF
Usage:
@ -177,10 +273,9 @@ if [[ "${DEEPFLOW_AGENT_ENABLED}" == "true" && -z "${DEEPFLOW_GRPC_ENDPOINT}" ]]
DEEPFLOW_GRPC_ENDPOINT="deepflow-agent.${base_endpoint#*://}:443"
fi
# observability server should bypass external HTTPS ingress for local self-monitoring
local_host="$(hostname -f 2>/dev/null || hostname)"
local_short="${local_host%%.*}"
if [[ "${local_host}" == "observability.svc.plus" || "${local_short}" == "observability" ]]; then
collector_host="$(extract_host_from_url "${base_endpoint}")"
if endpoint_targets_local_host "${collector_host}"; then
log_info "Collector endpoint resolves to this host; using local ingest ports for self-monitoring."
if [[ "${METRICS_ENDPOINT_SET}" == "false" ]]; then
METRICS_ENDPOINT="http://127.0.0.1:8428/api/v1/write"
fi

View File

@ -30,6 +30,86 @@ log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
log_ok() { echo -e "${GREEN}[OK]${NC} $1"; }
append_unique() {
local value="$1"
local -n target_ref="$2"
[[ -z "${value}" ]] && return 0
local existing
for existing in "${target_ref[@]:-}"; do
if [[ "${existing}" == "${value}" ]]; then
return 0
fi
done
target_ref+=("${value}")
}
collect_local_ipv4s() {
local ips=()
local ip
if command -v hostname >/dev/null 2>&1; then
for ip in $(hostname -I 2>/dev/null || true); do
append_unique "${ip}" ips
done
fi
if command -v ip >/dev/null 2>&1; then
while read -r ip; do
append_unique "${ip}" ips
done < <(ip -o -4 addr show scope global 2>/dev/null | awk '{print $4}' | cut -d/ -f1)
fi
printf '%s\n' "${ips[@]}"
}
resolve_ipv4s() {
local host="$1"
local ips=()
local ip
if command -v getent >/dev/null 2>&1; then
while read -r ip _; do
append_unique "${ip}" ips
done < <(getent ahostsv4 "${host}" 2>/dev/null || true)
fi
if [[ ${#ips[@]} -eq 0 ]] && command -v host >/dev/null 2>&1; then
while read -r ip; do
append_unique "${ip}" ips
done < <(host "${host}" 2>/dev/null | awk '/has address/ {print $4}')
fi
printf '%s\n' "${ips[@]}"
}
domain_points_to_local_host() {
local host="$1"
local local_ip
local resolved_ip
local local_ips=()
local resolved_ips=()
while read -r local_ip; do
append_unique "${local_ip}" local_ips
done < <(collect_local_ipv4s)
while read -r resolved_ip; do
append_unique "${resolved_ip}" resolved_ips
done < <(resolve_ipv4s "${host}")
[[ ${#local_ips[@]} -eq 0 || ${#resolved_ips[@]} -eq 0 ]] && return 1
for resolved_ip in "${resolved_ips[@]}"; do
for local_ip in "${local_ips[@]}"; do
if [[ "${resolved_ip}" == "${local_ip}" ]]; then
return 0
fi
done
done
return 1
}
usage() {
cat <<EOF
Usage:
@ -52,6 +132,10 @@ Examples:
curl -fsSL ".../server-install.sh" | bash -s -- observability.svc.plus
curl -fsSL ".../server-install.sh" | bash -s -- --action upgrade observability.svc.plus
curl -fsSL ".../server-install.sh" | bash -s -- --action reset -y observability.svc.plus
Notes:
DOMAIN is the public ingress domain. The current machine may still be named
us-xhttp.svc.plus while serving traffic for observability.svc.plus.
EOF
}
@ -151,7 +235,27 @@ run_configure() {
else
sed -i '/vars:/a \ caddy_enabled: true' pigsty.yml
fi
if grep -q "infra_domain:" pigsty.yml; then
sed -i -E "s#^([[:space:]]*infra_domain:).*#\\1 ${DOMAIN}#" pigsty.yml
else
sed -i "/caddy_enabled:/a\\ infra_domain: ${DOMAIN}" pigsty.yml
fi
if grep -qE '^([[:space:]]*)home[[:space:]]*:[[:space:]]*\{[[:space:]]*domain:' pigsty.yml; then
sed -i -E "s#^([[:space:]]*home[[:space:]]*:[[:space:]]*\\{[[:space:]]*domain:[[:space:]]*)[^,}]+(.*)#\\1${DOMAIN}\\2#" pigsty.yml
fi
fi
}
check_dns_preflight() {
if domain_points_to_local_host "${DOMAIN}"; then
log_ok "DNS preflight passed: ${DOMAIN} resolves to this host."
return 0
fi
log_warn "DNS preflight: ${DOMAIN} does not currently resolve to this host."
log_warn "Recommended order: update DNS first, then deploy the server on this machine."
}
run_deploy() {
@ -184,7 +288,7 @@ location = /ingest/metrics/api/v1/write {
proxy_set_header X-Forwarded-Proto $scheme;
}
location = /ingest/logs/loki/api/v1/push {
location = /ingest/logs/insert/loki/api/v1/push {
proxy_pass http://127.0.0.1:9428/insert/loki/api/v1/push;
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
@ -261,6 +365,7 @@ deploy_or_upgrade() {
ensure_repo
ensure_root_ssh_access
check_dns_preflight
run_bootstrap
run_configure
run_deploy