observability.svc.plus/roles/node_monitor/tasks/main.yml
2026-03-14 20:17:50 +08:00

316 lines
12 KiB
YAML

---
#--------------------------------------------------------------#
# Register HAProxy to Nginx [haproxy_register]
#--------------------------------------------------------------#
# nginx are idempotent on multiple meta nodes
- name: register haproxy instance to nginx
tags: [ haproxy_register, register_nginx, register, add_proxy ]
when: haproxy_enabled|bool
become: true
block:
- name: create nginx config dir for haproxy
run_once: true
delegate_to: '{{ item }}'
loop: '{{ groups["infra"]|default([]) }}'
file: path=/etc/nginx/conf.d/haproxy state=directory owner=root
# /etc/nginx/conf.d/haproxy/upstream-{{ nodename }}.conf
- name: register haproxy upstream to nginx
delegate_to: '{{ item }}'
loop: '{{ groups["infra"]|default([]) }}'
copy:
dest: /etc/nginx/conf.d/haproxy/upstream-{{ nodename }}.conf
content: |
upstream haproxy-{{ nodename }} {
server {{ inventory_hostname }}:{{ haproxy_exporter_port }} max_fails=0;
}
# /etc/nginx/conf.d/haproxy/location-{{ nodename }}.conf
- name: register haproxy url location to nginx
delegate_to: '{{ item }}'
loop: '{{ groups["infra"]|default([]) }}'
copy:
dest: /etc/nginx/conf.d/haproxy/location-{{ nodename }}.conf
content: |
location ^~/haproxy/{{ nodename }}/ {
proxy_pass http://haproxy-{{ nodename }};
proxy_connect_timeout 1;
}
# reload meta node nginx (maybe not appropriate here)
- name: reload nginx to finish haproxy register
delegate_to: '{{ item }}'
run_once: true
loop: '{{ groups["infra"]|default([]) }}'
systemd: name=nginx state=reloaded enabled=yes daemon_reload=yes
- name: validate observability push mode inputs
tags: [monitor, vector, process_exporter]
when: node_monitor_mode | default('pull') == 'push'
assert:
that:
- observability_endpoint | default('', true) | length > 0 or (observability_metrics_endpoint | default('', true) | length > 0 and observability_logs_endpoint | default('', true) | length > 0)
fail_msg: "Set observability_endpoint, or set both observability_metrics_endpoint and observability_logs_endpoint, when node_monitor_mode=push."
- name: validate observability push mode basic auth inputs
tags: [monitor, vector, process_exporter, auth]
when:
- node_monitor_mode | default('pull') == 'push'
- observability_ingest_basic_auth_enabled | default(false) | bool
assert:
that:
- observability_ingest_basic_auth_user | default('', true) | length > 0
- observability_ingest_basic_auth_password | default('', true) | length > 0
fail_msg: "When observability_ingest_basic_auth_enabled=true in push mode, set observability_ingest_basic_auth_user and observability_ingest_basic_auth_password."
- name: derive observability collector host for push mode
tags: [monitor, vector, process_exporter]
when: node_monitor_mode | default('pull') == 'push'
set_fact:
observability_collector_host: >-
{{
(
observability_endpoint
if (observability_endpoint | default('', true) | length > 0)
else (
observability_metrics_endpoint
if (observability_metrics_endpoint | default('', true) | length > 0)
else observability_logs_endpoint
)
)
| regex_replace('^[A-Za-z][A-Za-z0-9+.-]*://', '')
| regex_replace('/.*$', '')
| regex_replace(':.*$', '')
}}
- name: detect whether observability collector is local
tags: [monitor, vector, process_exporter]
when: node_monitor_mode | default('pull') == 'push'
shell: |
set -eu
collector_host="{{ observability_collector_host }}"
if [ -z "${collector_host}" ]; then
exit 1
fi
matches_local_name() {
local candidate="$1"
[ -n "${candidate}" ] && [ "${collector_host}" = "${candidate}" ]
}
if matches_local_name "{{ inventory_hostname }}"; then
exit 0
fi
if matches_local_name "{{ nodename | default('', true) }}"; then
exit 0
fi
if matches_local_name "$(hostname -f 2>/dev/null || hostname 2>/dev/null || true)"; then
exit 0
fi
if matches_local_name "$(hostname -s 2>/dev/null || true)"; then
exit 0
fi
local_ips="$(
{
hostname -I 2>/dev/null || true
ip -o -4 addr show scope global 2>/dev/null | awk '{print $4}' | cut -d/ -f1
} | tr ' ' '\n' | sed '/^$/d' | sort -u
)"
resolved_ips="$(
{
getent ahostsv4 "${collector_host}" 2>/dev/null | awk '{print $1}' || true
host "${collector_host}" 2>/dev/null | awk '/has address/ {print $4}' || true
} | sed '/^$/d' | sort -u
)"
[ -n "${local_ips}" ] || exit 1
[ -n "${resolved_ips}" ] || exit 1
if comm -12 <(printf '%s\n' "${local_ips}") <(printf '%s\n' "${resolved_ips}") | grep -q .; then
exit 0
fi
exit 1
args: { executable: /bin/bash }
register: observability_collector_local_check
changed_when: false
failed_when: false
- name: expose observability collector locality
tags: [monitor, vector, process_exporter]
when: node_monitor_mode | default('pull') == 'push'
set_fact:
observability_collector_is_local: "{{ observability_collector_local_check.rc == 0 }}"
#--------------------------------------------------------------#
# Register Instance DNS Name [vip_dns]
#--------------------------------------------------------------#
# render to temp file first, then atomic mv to avoid dnsmasq inotify race
# use {{ node_cluster }}.vip as filename to avoid conflict with pg_cluster dns
- name: render node vip dns name
tags: [ node_vip, vip_dns, add_dns ]
when: vip_enabled|bool
delegate_to: '{{ item }}'
loop: "{{ groups['infra'] | default([]) }}"
ignore_errors: true
copy:
dest: /infra/hosts/.{{ node_cluster }}.vip.tmp
mode: 0644
owner: root
group: root
content: "{{ vip_address }} {{ node_cluster }}{{ vip_dns_suffix|default('') }}"
- name: activate node vip dns name
tags: [ node_vip, vip_dns, add_dns ]
when: vip_enabled|bool
delegate_to: '{{ item }}'
loop: "{{ groups['infra'] | default([]) }}"
ignore_errors: true
shell: chcon -t dnsmasq_etc_t /infra/hosts/.{{ node_cluster }}.vip.tmp 2>/dev/null; mv /infra/hosts/.{{ node_cluster }}.vip.tmp /infra/hosts/{{ node_cluster }}.vip || true
args: { executable: /bin/bash }
#--------------------------------------------------------------#
# Config node_exporter [node_exporter_config]
#--------------------------------------------------------------#
- name: config node_exporter
tags: [ node_exporter, node_exporter_config ]
block:
- name: config node_exporter systemd unit
copy: src=node_exporter.svc dest={{ systemd_dir }}/node_exporter.service
- name: config default node_exporter options
copy:
dest: /etc/default/node_exporter
content: |
NODE_EXPORTER_OPTS="--web.listen-address=':{{ node_exporter_port }}' --web.telemetry-path='{{ exporter_metrics_path }}' {{ node_exporter_options }}"
#--------------------------------------------------------------#
# Launch node_exporter [node_exporter_launch]
#--------------------------------------------------------------#
- name: launch node_exporter
tags: [ node_exporter, node_exporter_launch ]
when: node_exporter_enabled|bool
block:
- name: launch node_exporter systemd service
systemd: name=node_exporter state=restarted enabled=yes daemon_reload=yes
- name: wait for node_exporter service online
wait_for: host=127.0.0.1 port={{ node_exporter_port }} state=started timeout=10
#--------------------------------------------------------------#
# Config keepalived_exporter [vip_exporter_config]
#--------------------------------------------------------------#
- name: config keepalived_exporter
tags: [ node_vip, vip_exporter, vip_exporter_config ]
when: vip_enabled|bool
block:
- name: config keepalived_exporter systemd unit
copy: src=keepalived_exporter.svc dest={{ systemd_dir }}/keepalived_exporter.service
- name: config default keepalived_exporter options
copy:
dest: /etc/default/keepalived_exporter
content: |
KEEPALIVED_EXPORTER_OPTS="--web.listen-address=':{{ vip_exporter_port }}' --web.telemetry-path='{{ exporter_metrics_path }}'"
#--------------------------------------------------------------#
# Launch keepalived_exporter [vip_exporter_launch]
#--------------------------------------------------------------#
- name: launch keepalived_exporter
tags: [ node_vip, vip_exporter, vip_exporter_launch ]
when: vip_enabled|bool
block:
- name: launch keepalived_exporter systemd service
systemd: name=keepalived_exporter state=restarted enabled=yes daemon_reload=yes
- name: wait for keepalived_exporter service online
wait_for: host=127.0.0.1 port={{ vip_exporter_port }} state=started timeout=10
#--------------------------------------------------------------#
# Register Node [node_register]
#--------------------------------------------------------------#
# /infra/targets/node/{{ ip }}.yml
- name: register node as victoria target
tags: [ node_vip, node_register, register, add_metrics ]
when: node_monitor_mode | default('pull') != 'push'
ignore_errors: true
delegate_to: '{{ item }}'
loop: '{{ groups["infra"]|default([]) }}'
copy:
dest: "/infra/targets/node/{{ inventory_hostname }}.yml"
owner: victoria
group: infra
mode: '0640'
content: |
# {{ inventory_hostname }}
# node, haproxy, vector
- labels: { ip: {{ inventory_hostname }} , ins: {{ nodename }} , host: {{ ansible_hostname|default(nodename|default(inventory_hostname)) }} , cls: {{ node_cluster|default('nodes') }} }
targets: {% if not node_exporter_enabled|bool and not haproxy_enabled|bool and not vector_enabled|bool %}[]{% endif %}
{% if node_exporter_enabled|bool %}- {{ inventory_hostname }}:{{ node_exporter_port }}{% endif %}
{% if haproxy_enabled|bool %}- {{ inventory_hostname }}:{{ haproxy_exporter_port }}{% endif %}
{% if vector_enabled|bool %}- {{ inventory_hostname }}:{{ vector_port }}{% endif %}
{% if vip_enabled|bool and vip_address is defined and vip_address != '' %}
# keepalived
- labels: { ip: {{ inventory_hostname }} , ins: {{ nodename }} , host: {{ ansible_hostname|default(nodename|default(inventory_hostname)) }} , cls: {{ node_cluster|default('nodes') }}, vip: {{ vip_address }} }
targets: [ {{ inventory_hostname }}:{{ vip_exporter_port }} ]
{% endif %}
- name: register node as ping target
tags: [ node_register, register, add_metrics ]
when: node_monitor_mode | default('pull') != 'push'
ignore_errors: true
delegate_to: '{{ item }}'
loop: '{{ groups["infra"]|default([]) }}'
copy:
dest: "/infra/targets/ping/{{ inventory_hostname }}.yml"
owner: victoria
group: infra
mode: '0640'
content: |
# {{ inventory_hostname }}
- labels: { ip: {{ inventory_hostname }} , ins: {{ nodename }} , cls: {{ node_cluster|default('nodes') }} }
targets: [ {{ inventory_hostname }} ]
- name: register node vip as ping target
tags: [ node_vip, node_register, register, add_metrics ]
when: node_monitor_mode | default('pull') != 'push' and vip_enabled|bool and vip_address is defined and vip_address != ''
ignore_errors: true
delegate_to: '{{ item }}'
loop: '{{ groups["infra"]|default([]) }}'
copy:
dest: "/infra/targets/ping/{{ vip_address }}---{{ inventory_hostname }}.yml"
owner: victoria
group: infra
mode: '0640'
content: |
# {{ vip_address }}@{{ inventory_hostname }}
- labels: { ip: {{ inventory_hostname }} , ins: {{ nodename }} , cls: {{ node_cluster|default('nodes') }}, vip: {{ vip_address }} , job: node-vip }
targets: [ {{ vip_address }} ]
- import_tasks: process_exporter.yml
tags: process_exporter
when: process_exporter_enabled | default(false) | bool or node_monitor_mode | default('pull') == 'push'
#--------------------------------------------------------------#
# Vector [vector]
#--------------------------------------------------------------#
- import_tasks: vector.yml
tags: vector
when: vector_enabled|bool or node_monitor_mode | default('pull') == 'push'
...