diff --git a/README.md b/README.md index 3c80820..ef84c82 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ flowchart LR 当前接入主机: - `us-xhttp.svc.plus`:继续承载现有服务,同时承载 `observability.svc.plus` -- `clawdbot.svc.plus`:部署 agent,采集后上报到中心端 +- `openclaw.svc.plus`:部署 agent,采集后上报到中心端 - `jp-xhttp.svc.plus`:部署 agent,采集后上报到中心端 ### Ansible (Recommended) @@ -70,9 +70,9 @@ ansible-playbook -i deploy_observability_service.yml -l us-xhtt ```bash ansible-playbook -i node.yml \ - -l clawdbot.svc.plus,jp-xhttp.svc.plus \ + -l openclaw.svc.plus,jp-xhttp.svc.plus \ -e node_monitor_mode=push \ - -e observability_endpoint=https://observability.svc.plus/ingest/otlp \ + -e observability_endpoint=https://observability.svc.plus/ \ -e haproxy_enabled=false ``` @@ -80,16 +80,18 @@ ansible-playbook -i node.yml \ ```bash ansible-playbook -i node.yml \ - -l clawdbot.svc.plus,jp-xhttp.svc.plus \ + -l openclaw.svc.plus,jp-xhttp.svc.plus \ -e node_monitor_mode=push \ - -e observability_endpoint=https://observability.svc.plus/ingest/otlp \ + -e observability_endpoint=https://observability.svc.plus/ \ -e observability_ingest_basic_auth_enabled=true \ -e observability_ingest_basic_auth_user=ingest \ -e observability_ingest_basic_auth_password='' \ -e haproxy_enabled=false ``` -> `node_monitor_mode=push` 会在远端主机上部署 `node_exporter + process_exporter + vector`,并把 metrics / logs 主动汇总到 `observability.svc.plus`。 +> `node_monitor_mode=push` 会在远端主机上部署 `node_exporter + process_exporter + vector`,并把 metrics / logs 主动汇总到 `observability.svc.plus`。`vector` 固定归到采集端任务,服务端 `infra.yml` 不再默认部署它。 +> +> 如果采集端与 Victoria 服务端同机,playbook 会自动把 metrics / logs 改走本机 `127.0.0.1` ingest;跨主机时默认走 `https://observability.svc.plus/` 并自动补全 `/ingest/metrics/api/v1/write` 和 `/ingest/logs/insert`。 > > `observability_ingest_basic_auth_*` 只保护 `/ingest/*` 写入入口,不影响 Caddy 暴露的其他站点页面;服务端和采集端必须使用同一组认证信息。 @@ -155,10 +157,10 @@ vi pigsty.yml # adjust domain/password/ports Default inventory template: `conf/app/deepflow.yml` -### Remote client example (clawdbot.svc.plus) +### Remote client example (openclaw.svc.plus) ```bash -ssh root@clawdbot.svc.plus \ +ssh root@openclaw.svc.plus \ 'curl -fsSL https://raw.githubusercontent.com/cloud-neutral-toolkit/observability.svc.plus/main/scripts/agent-install.sh \ | bash -s -- --endpoint https://observability.svc.plus/ingest/otlp' ``` @@ -174,11 +176,11 @@ ssh root@jp-xhttp.svc.plus \ ### Optional SSH manager env example ```bash -SSH_SERVER_CLAWBOT_HOST=clawdbot.svc.plus +SSH_SERVER_CLAWBOT_HOST=openclaw.svc.plus SSH_SERVER_CLAWBOT_USER=root SSH_SERVER_CLAWBOT_KEYPATH=~/.ssh/id_rsa SSH_SERVER_CLAWBOT_PORT=22 -SSH_SERVER_CLAWBOT_DESCRIPTION=clawdbot_server +SSH_SERVER_CLAWBOT_DESCRIPTION=openclaw_server ``` ## 4) Features diff --git a/node.yml b/node.yml index 5c58fed..67ec72c 100755 --- a/node.yml +++ b/node.yml @@ -33,9 +33,9 @@ # node.yml -l # add single node # # Observability push-agent mode: -# ./node.yml -l clawdbot.svc.plus,jp-xhttp.svc.plus \ +# ./node.yml -l openclaw.svc.plus,jp-xhttp.svc.plus \ # -e node_monitor_mode=push \ -# -e observability_endpoint=https://observability.svc.plus/ingest/otlp \ +# -e observability_endpoint=https://observability.svc.plus/ \ # -e haproxy_enabled=false # # Bootstrap with another admin user: (Create admin with another admin) diff --git a/roles/infra/defaults/main.yml b/roles/infra/defaults/main.yml index c8c5450..2dae1b8 100644 --- a/roles/infra/defaults/main.yml +++ b/roles/infra/defaults/main.yml @@ -64,7 +64,7 @@ certbot_options: '' # certbot extra options #----------------------------------------------------------------- # DNS #----------------------------------------------------------------- -dns_enabled: true # setup dnsmasq on this infra node? +dns_enabled: false # setup dnsmasq on this infra node? dns_port: 53 # dns server listen port, 53 by default dns_records: # dynamic dns records resolved by dnsmasq - "${admin_ip} i.pigsty" diff --git a/roles/infra/tasks/caddy.yml b/roles/infra/tasks/caddy.yml index de44aeb..28a04e3 100644 --- a/roles/infra/tasks/caddy.yml +++ b/roles/infra/tasks/caddy.yml @@ -21,10 +21,41 @@ tags: caddy_config template: src: caddy/Caddyfile + dest: /etc/caddy/conf.d/observability.caddy + owner: root + group: root + mode: '0644' + notify: reload caddy + +- name: check existing caddy main config + tags: caddy_config + stat: + path: /etc/caddy/Caddyfile + register: caddy_main_config + +- name: bootstrap caddy main config when missing + tags: caddy_config + copy: dest: /etc/caddy/Caddyfile owner: root group: root mode: '0644' + content: | + { + } + + import /etc/caddy/conf.d/*.caddy + when: not caddy_main_config.stat.exists + notify: reload caddy + +- name: ensure caddy main config imports conf.d snippets + tags: caddy_config + lineinfile: + path: /etc/caddy/Caddyfile + line: "import /etc/caddy/conf.d/*.caddy" + insertafter: EOF + state: present + when: caddy_main_config.stat.exists notify: reload caddy #--------------------------------------------------------------# diff --git a/roles/infra/tasks/main.yml b/roles/infra/tasks/main.yml index 3b5ec31..b2dff64 100644 --- a/roles/infra/tasks/main.yml +++ b/roles/infra/tasks/main.yml @@ -63,6 +63,7 @@ #--------------------------------------------------------------# # dns_config, dns_record, dns_launch - import_tasks: dns.yml + when: dns_enabled|bool tags: dns #--------------------------------------------------------------# diff --git a/roles/infra/templates/caddy/Caddyfile b/roles/infra/templates/caddy/Caddyfile index e8c0423..4c7d537 100644 --- a/roles/infra/templates/caddy/Caddyfile +++ b/roles/infra/templates/caddy/Caddyfile @@ -1,7 +1,3 @@ -{ - # debug -} - {{ infra_domain | default('observability.svc.plus') }} { encode gzip zstd diff --git a/roles/node_monitor/defaults/main.yml b/roles/node_monitor/defaults/main.yml index 8eea7df..9245e06 100644 --- a/roles/node_monitor/defaults/main.yml +++ b/roles/node_monitor/defaults/main.yml @@ -15,7 +15,7 @@ observability_logs_endpoint: '' # optional override for logs endpoint observability_ingest_basic_auth_enabled: false observability_ingest_basic_auth_user: ingest observability_ingest_basic_auth_password: '' -vector_enabled: true # enable vector log collector? +vector_enabled: false # enable vector log collector? push mode enables it automatically vector_clean: false # purge vector data dir during init? vector_data: /data/vector # vector data dir, /data/vector by default vector_port: 9598 # vector metrics port, 9598 by default diff --git a/roles/node_monitor/tasks/main.yml b/roles/node_monitor/tasks/main.yml index 8fcc7b4..53e838e 100644 --- a/roles/node_monitor/tasks/main.yml +++ b/roles/node_monitor/tasks/main.yml @@ -64,6 +64,91 @@ - observability_ingest_basic_auth_password | default('', true) | length > 0 fail_msg: "When observability_ingest_basic_auth_enabled=true in push mode, set observability_ingest_basic_auth_user and observability_ingest_basic_auth_password." +- name: derive observability collector host for push mode + tags: [monitor, vector, process_exporter] + when: node_monitor_mode | default('pull') == 'push' + set_fact: + observability_collector_host: >- + {{ + ( + observability_endpoint + if (observability_endpoint | default('', true) | length > 0) + else ( + observability_metrics_endpoint + if (observability_metrics_endpoint | default('', true) | length > 0) + else observability_logs_endpoint + ) + ) + | regex_replace('^[A-Za-z][A-Za-z0-9+.-]*://', '') + | regex_replace('/.*$', '') + | regex_replace(':.*$', '') + }} + +- name: detect whether observability collector is local + tags: [monitor, vector, process_exporter] + when: node_monitor_mode | default('pull') == 'push' + shell: | + set -eu + collector_host="{{ observability_collector_host }}" + + if [ -z "${collector_host}" ]; then + exit 1 + fi + + matches_local_name() { + local candidate="$1" + [ -n "${candidate}" ] && [ "${collector_host}" = "${candidate}" ] + } + + if matches_local_name "{{ inventory_hostname }}"; then + exit 0 + fi + + if matches_local_name "{{ nodename | default('', true) }}"; then + exit 0 + fi + + if matches_local_name "$(hostname -f 2>/dev/null || hostname 2>/dev/null || true)"; then + exit 0 + fi + + if matches_local_name "$(hostname -s 2>/dev/null || true)"; then + exit 0 + fi + + local_ips="$( + { + hostname -I 2>/dev/null || true + ip -o -4 addr show scope global 2>/dev/null | awk '{print $4}' | cut -d/ -f1 + } | tr ' ' '\n' | sed '/^$/d' | sort -u + )" + + resolved_ips="$( + { + getent ahostsv4 "${collector_host}" 2>/dev/null | awk '{print $1}' || true + host "${collector_host}" 2>/dev/null | awk '/has address/ {print $4}' || true + } | sed '/^$/d' | sort -u + )" + + [ -n "${local_ips}" ] || exit 1 + [ -n "${resolved_ips}" ] || exit 1 + + if comm -12 <(printf '%s\n' "${local_ips}") <(printf '%s\n' "${resolved_ips}") | grep -q .; then + exit 0 + fi + + exit 1 + args: { executable: /bin/bash } + register: observability_collector_local_check + changed_when: false + failed_when: false + +- name: expose observability collector locality + tags: [monitor, vector, process_exporter] + when: node_monitor_mode | default('pull') == 'push' + set_fact: + observability_collector_is_local: "{{ observability_collector_local_check.rc == 0 }}" + #--------------------------------------------------------------# # Register Instance DNS Name [vip_dns] #--------------------------------------------------------------# @@ -226,5 +311,5 @@ #--------------------------------------------------------------# - import_tasks: vector.yml tags: vector - when: vector_enabled|bool + when: vector_enabled|bool or node_monitor_mode | default('pull') == 'push' ... diff --git a/roles/node_monitor/templates/vector-push.yaml b/roles/node_monitor/templates/vector-push.yaml index c3a101b..dc5a0cb 100644 --- a/roles/node_monitor/templates/vector-push.yaml +++ b/roles/node_monitor/templates/vector-push.yaml @@ -1,7 +1,10 @@ --- {% set base_endpoint = (observability_endpoint | default('', true) | regex_replace('/+$', '') | regex_replace('/ingest/otlp.*$', '')) %} -{% set metrics_endpoint = observability_metrics_endpoint | default(base_endpoint ~ '/ingest/metrics/api/v1/write', true) %} -{% set logs_endpoint = observability_logs_endpoint | default(base_endpoint ~ '/ingest/logs/insert', true) %} +{% set collector_is_local = observability_collector_is_local | default(false) | bool %} +{% set default_metrics_endpoint = 'http://127.0.0.1:' ~ (vmetrics_port | default(8428) | string) ~ '/api/v1/write' if collector_is_local else base_endpoint ~ '/ingest/metrics/api/v1/write' %} +{% set default_logs_endpoint = 'http://127.0.0.1:' ~ (vlogs_port | default(9428) | string) ~ '/insert' if collector_is_local else base_endpoint ~ '/ingest/logs/insert' %} +{% set metrics_endpoint = observability_metrics_endpoint | default(default_metrics_endpoint, true) %} +{% set logs_endpoint = observability_logs_endpoint | default(default_logs_endpoint, true) %} data_dir: {{ vector_data }} api: