Refine observability deploy boundaries

This commit is contained in:
Haitao Pan 2026-03-14 20:17:50 +08:00
parent 7382073910
commit e533dcb147
9 changed files with 139 additions and 21 deletions

View File

@ -40,7 +40,7 @@ flowchart LR
当前接入主机:
- `us-xhttp.svc.plus`:继续承载现有服务,同时承载 `observability.svc.plus`
- `clawdbot.svc.plus`:部署 agent采集后上报到中心端
- `openclaw.svc.plus`:部署 agent采集后上报到中心端
- `jp-xhttp.svc.plus`:部署 agent采集后上报到中心端
### Ansible (Recommended)
@ -70,9 +70,9 @@ ansible-playbook -i <your-inventory> deploy_observability_service.yml -l us-xhtt
```bash
ansible-playbook -i <your-inventory> node.yml \
-l clawdbot.svc.plus,jp-xhttp.svc.plus \
-l openclaw.svc.plus,jp-xhttp.svc.plus \
-e node_monitor_mode=push \
-e observability_endpoint=https://observability.svc.plus/ingest/otlp \
-e observability_endpoint=https://observability.svc.plus/ \
-e haproxy_enabled=false
```
@ -80,16 +80,18 @@ ansible-playbook -i <your-inventory> node.yml \
```bash
ansible-playbook -i <your-inventory> node.yml \
-l clawdbot.svc.plus,jp-xhttp.svc.plus \
-l openclaw.svc.plus,jp-xhttp.svc.plus \
-e node_monitor_mode=push \
-e observability_endpoint=https://observability.svc.plus/ingest/otlp \
-e observability_endpoint=https://observability.svc.plus/ \
-e observability_ingest_basic_auth_enabled=true \
-e observability_ingest_basic_auth_user=ingest \
-e observability_ingest_basic_auth_password='<strong-password>' \
-e haproxy_enabled=false
```
> `node_monitor_mode=push` 会在远端主机上部署 `node_exporter + process_exporter + vector`,并把 metrics / logs 主动汇总到 `observability.svc.plus`
> `node_monitor_mode=push` 会在远端主机上部署 `node_exporter + process_exporter + vector`,并把 metrics / logs 主动汇总到 `observability.svc.plus`。`vector` 固定归到采集端任务,服务端 `infra.yml` 不再默认部署它。
>
> 如果采集端与 Victoria 服务端同机playbook 会自动把 metrics / logs 改走本机 `127.0.0.1` ingest跨主机时默认走 `https://observability.svc.plus/` 并自动补全 `/ingest/metrics/api/v1/write``/ingest/logs/insert`
>
> `observability_ingest_basic_auth_*` 只保护 `/ingest/*` 写入入口,不影响 Caddy 暴露的其他站点页面;服务端和采集端必须使用同一组认证信息。
@ -155,10 +157,10 @@ vi pigsty.yml # adjust domain/password/ports
Default inventory template: `conf/app/deepflow.yml`
### Remote client example (clawdbot.svc.plus)
### Remote client example (openclaw.svc.plus)
```bash
ssh root@clawdbot.svc.plus \
ssh root@openclaw.svc.plus \
'curl -fsSL https://raw.githubusercontent.com/cloud-neutral-toolkit/observability.svc.plus/main/scripts/agent-install.sh \
| bash -s -- --endpoint https://observability.svc.plus/ingest/otlp'
```
@ -174,11 +176,11 @@ ssh root@jp-xhttp.svc.plus \
### Optional SSH manager env example
```bash
SSH_SERVER_CLAWBOT_HOST=clawdbot.svc.plus
SSH_SERVER_CLAWBOT_HOST=openclaw.svc.plus
SSH_SERVER_CLAWBOT_USER=root
SSH_SERVER_CLAWBOT_KEYPATH=~/.ssh/id_rsa
SSH_SERVER_CLAWBOT_PORT=22
SSH_SERVER_CLAWBOT_DESCRIPTION=clawdbot_server
SSH_SERVER_CLAWBOT_DESCRIPTION=openclaw_server
```
## 4) Features

View File

@ -33,9 +33,9 @@
# node.yml -l <ip> # add single node
#
# Observability push-agent mode:
# ./node.yml -l clawdbot.svc.plus,jp-xhttp.svc.plus \
# ./node.yml -l openclaw.svc.plus,jp-xhttp.svc.plus \
# -e node_monitor_mode=push \
# -e observability_endpoint=https://observability.svc.plus/ingest/otlp \
# -e observability_endpoint=https://observability.svc.plus/ \
# -e haproxy_enabled=false
#
# Bootstrap with another admin user: (Create admin with another admin)

View File

@ -64,7 +64,7 @@ certbot_options: '' # certbot extra options
#-----------------------------------------------------------------
# DNS
#-----------------------------------------------------------------
dns_enabled: true # setup dnsmasq on this infra node?
dns_enabled: false # setup dnsmasq on this infra node?
dns_port: 53 # dns server listen port, 53 by default
dns_records: # dynamic dns records resolved by dnsmasq
- "${admin_ip} i.pigsty"

View File

@ -21,10 +21,41 @@
tags: caddy_config
template:
src: caddy/Caddyfile
dest: /etc/caddy/conf.d/observability.caddy
owner: root
group: root
mode: '0644'
notify: reload caddy
- name: check existing caddy main config
tags: caddy_config
stat:
path: /etc/caddy/Caddyfile
register: caddy_main_config
- name: bootstrap caddy main config when missing
tags: caddy_config
copy:
dest: /etc/caddy/Caddyfile
owner: root
group: root
mode: '0644'
content: |
{
}
import /etc/caddy/conf.d/*.caddy
when: not caddy_main_config.stat.exists
notify: reload caddy
- name: ensure caddy main config imports conf.d snippets
tags: caddy_config
lineinfile:
path: /etc/caddy/Caddyfile
line: "import /etc/caddy/conf.d/*.caddy"
insertafter: EOF
state: present
when: caddy_main_config.stat.exists
notify: reload caddy
#--------------------------------------------------------------#

View File

@ -63,6 +63,7 @@
#--------------------------------------------------------------#
# dns_config, dns_record, dns_launch
- import_tasks: dns.yml
when: dns_enabled|bool
tags: dns
#--------------------------------------------------------------#

View File

@ -1,7 +1,3 @@
{
# debug
}
{{ infra_domain | default('observability.svc.plus') }} {
encode gzip zstd

View File

@ -15,7 +15,7 @@ observability_logs_endpoint: '' # optional override for logs endpoint
observability_ingest_basic_auth_enabled: false
observability_ingest_basic_auth_user: ingest
observability_ingest_basic_auth_password: ''
vector_enabled: true # enable vector log collector?
vector_enabled: false # enable vector log collector? push mode enables it automatically
vector_clean: false # purge vector data dir during init?
vector_data: /data/vector # vector data dir, /data/vector by default
vector_port: 9598 # vector metrics port, 9598 by default

View File

@ -64,6 +64,91 @@
- observability_ingest_basic_auth_password | default('', true) | length > 0
fail_msg: "When observability_ingest_basic_auth_enabled=true in push mode, set observability_ingest_basic_auth_user and observability_ingest_basic_auth_password."
- name: derive observability collector host for push mode
tags: [monitor, vector, process_exporter]
when: node_monitor_mode | default('pull') == 'push'
set_fact:
observability_collector_host: >-
{{
(
observability_endpoint
if (observability_endpoint | default('', true) | length > 0)
else (
observability_metrics_endpoint
if (observability_metrics_endpoint | default('', true) | length > 0)
else observability_logs_endpoint
)
)
| regex_replace('^[A-Za-z][A-Za-z0-9+.-]*://', '')
| regex_replace('/.*$', '')
| regex_replace(':.*$', '')
}}
- name: detect whether observability collector is local
tags: [monitor, vector, process_exporter]
when: node_monitor_mode | default('pull') == 'push'
shell: |
set -eu
collector_host="{{ observability_collector_host }}"
if [ -z "${collector_host}" ]; then
exit 1
fi
matches_local_name() {
local candidate="$1"
[ -n "${candidate}" ] && [ "${collector_host}" = "${candidate}" ]
}
if matches_local_name "{{ inventory_hostname }}"; then
exit 0
fi
if matches_local_name "{{ nodename | default('', true) }}"; then
exit 0
fi
if matches_local_name "$(hostname -f 2>/dev/null || hostname 2>/dev/null || true)"; then
exit 0
fi
if matches_local_name "$(hostname -s 2>/dev/null || true)"; then
exit 0
fi
local_ips="$(
{
hostname -I 2>/dev/null || true
ip -o -4 addr show scope global 2>/dev/null | awk '{print $4}' | cut -d/ -f1
} | tr ' ' '\n' | sed '/^$/d' | sort -u
)"
resolved_ips="$(
{
getent ahostsv4 "${collector_host}" 2>/dev/null | awk '{print $1}' || true
host "${collector_host}" 2>/dev/null | awk '/has address/ {print $4}' || true
} | sed '/^$/d' | sort -u
)"
[ -n "${local_ips}" ] || exit 1
[ -n "${resolved_ips}" ] || exit 1
if comm -12 <(printf '%s\n' "${local_ips}") <(printf '%s\n' "${resolved_ips}") | grep -q .; then
exit 0
fi
exit 1
args: { executable: /bin/bash }
register: observability_collector_local_check
changed_when: false
failed_when: false
- name: expose observability collector locality
tags: [monitor, vector, process_exporter]
when: node_monitor_mode | default('pull') == 'push'
set_fact:
observability_collector_is_local: "{{ observability_collector_local_check.rc == 0 }}"
#--------------------------------------------------------------#
# Register Instance DNS Name [vip_dns]
#--------------------------------------------------------------#
@ -226,5 +311,5 @@
#--------------------------------------------------------------#
- import_tasks: vector.yml
tags: vector
when: vector_enabled|bool
when: vector_enabled|bool or node_monitor_mode | default('pull') == 'push'
...

View File

@ -1,7 +1,10 @@
---
{% set base_endpoint = (observability_endpoint | default('', true) | regex_replace('/+$', '') | regex_replace('/ingest/otlp.*$', '')) %}
{% set metrics_endpoint = observability_metrics_endpoint | default(base_endpoint ~ '/ingest/metrics/api/v1/write', true) %}
{% set logs_endpoint = observability_logs_endpoint | default(base_endpoint ~ '/ingest/logs/insert', true) %}
{% set collector_is_local = observability_collector_is_local | default(false) | bool %}
{% set default_metrics_endpoint = 'http://127.0.0.1:' ~ (vmetrics_port | default(8428) | string) ~ '/api/v1/write' if collector_is_local else base_endpoint ~ '/ingest/metrics/api/v1/write' %}
{% set default_logs_endpoint = 'http://127.0.0.1:' ~ (vlogs_port | default(9428) | string) ~ '/insert' if collector_is_local else base_endpoint ~ '/ingest/logs/insert' %}
{% set metrics_endpoint = observability_metrics_endpoint | default(default_metrics_endpoint, true) %}
{% set logs_endpoint = observability_logs_endpoint | default(default_logs_endpoint, true) %}
data_dir: {{ vector_data }}
api: