From 99a7b96ce7cf27077cd1067a2a7745acc3cdfdec Mon Sep 17 00:00:00 2001 From: shenlan Date: Mon, 18 Aug 2025 14:16:16 +0800 Subject: [PATCH] Add Grafana and Prometheus vhost roles with playbook --- .../deploy_tiny_monitor_server_vhost.yml | 21 ++++ .../roles/vhosts/grafana/defaults/main.yml | 5 + playbooks/roles/vhosts/grafana/tasks/main.yml | 85 +++++++++++++ .../grafana/templates/dashboards.yaml.j2 | 10 ++ .../vhosts/grafana/templates/env.conf.j2 | 4 + .../templates/grafana-dash-pull.service.j2 | 7 ++ .../templates/grafana-dash-pull.timer.j2 | 10 ++ .../roles/vhosts/nginx/defaults/main.yml | 6 + playbooks/roles/vhosts/nginx/tasks/main.yml | 28 +++++ .../vhosts/nginx/templates/grafana.conf.j2 | 23 ++++ .../vhosts/nginx/templates/metrics.conf.j2 | 43 +++++++ .../roles/vhosts/prometheus/defaults/main.yml | 15 +++ .../roles/vhosts/prometheus/tasks/main.yml | 116 ++++++++++++++++++ .../vhosts/prometheus/templates/nodes.json.j2 | 3 + .../templates/prometheus.service.j2 | 19 +++ .../prometheus/templates/prometheus.yml.j2 | 23 ++++ 16 files changed, 418 insertions(+) create mode 100644 playbooks/deploy_tiny_monitor_server_vhost.yml create mode 100644 playbooks/roles/vhosts/grafana/defaults/main.yml create mode 100644 playbooks/roles/vhosts/grafana/tasks/main.yml create mode 100644 playbooks/roles/vhosts/grafana/templates/dashboards.yaml.j2 create mode 100644 playbooks/roles/vhosts/grafana/templates/env.conf.j2 create mode 100644 playbooks/roles/vhosts/grafana/templates/grafana-dash-pull.service.j2 create mode 100644 playbooks/roles/vhosts/grafana/templates/grafana-dash-pull.timer.j2 create mode 100644 playbooks/roles/vhosts/nginx/templates/grafana.conf.j2 create mode 100644 playbooks/roles/vhosts/nginx/templates/metrics.conf.j2 create mode 100644 playbooks/roles/vhosts/prometheus/defaults/main.yml create mode 100644 playbooks/roles/vhosts/prometheus/tasks/main.yml create mode 100644 playbooks/roles/vhosts/prometheus/templates/nodes.json.j2 create mode 100644 playbooks/roles/vhosts/prometheus/templates/prometheus.service.j2 create mode 100644 playbooks/roles/vhosts/prometheus/templates/prometheus.yml.j2 diff --git a/playbooks/deploy_tiny_monitor_server_vhost.yml b/playbooks/deploy_tiny_monitor_server_vhost.yml new file mode 100644 index 0000000..07bee11 --- /dev/null +++ b/playbooks/deploy_tiny_monitor_server_vhost.yml @@ -0,0 +1,21 @@ +- name: setup tiny monitor server + hosts: cn-homepage.svc.plus + become: true + vars: + group: cn-homepage.svc.plus + roles: + - roles/vhosts/common/ + - roles/vhosts/prometheus/ + - roles/vhosts/grafana/ + - roles/vhosts/nginx/ + +- name: setup tiny monitor server + hosts: global-homepage.svc.plus + become: true + vars: + group: global-homepage.svc.plus + roles: + - roles/vhosts/common/ + - roles/vhosts/prometheus/ + - roles/vhosts/grafana/ + - roles/vhosts/nginx/ diff --git a/playbooks/roles/vhosts/grafana/defaults/main.yml b/playbooks/roles/vhosts/grafana/defaults/main.yml new file mode 100644 index 0000000..a2cf29d --- /dev/null +++ b/playbooks/roles/vhosts/grafana/defaults/main.yml @@ -0,0 +1,5 @@ +grafana_git_url: https://github.com/svc-design/gitops.git +grafana_root_dir: /srv/grafana/grafana-as-code +grafana_domain: grafana.svc.plus +metrics_domain: metrics.svc.plus +prom_url_for_grafana: "https://{{ metrics_domain }}/prom/" diff --git a/playbooks/roles/vhosts/grafana/tasks/main.yml b/playbooks/roles/vhosts/grafana/tasks/main.yml new file mode 100644 index 0000000..9826d4c --- /dev/null +++ b/playbooks/roles/vhosts/grafana/tasks/main.yml @@ -0,0 +1,85 @@ +- name: Ensure Grafana APT key is present + ansible.builtin.get_url: + url: https://apt.grafana.com/gpg.key + dest: /etc/apt/keyrings/grafana.gpg + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Add Grafana repository + ansible.builtin.apt_repository: + repo: "deb [signed-by=/etc/apt/keyrings/grafana.gpg] https://apt.grafana.com stable main" + filename: grafana + when: inventory_hostname in groups[group] + +- name: Install Grafana + ansible.builtin.apt: + name: grafana + state: present + update_cache: true + when: inventory_hostname in groups[group] + +- name: Clone GitOps dashboards repo + ansible.builtin.git: + repo: "{{ grafana_git_url }}" + dest: "{{ grafana_root_dir }}" + version: HEAD + depth: 1 + update: true + when: inventory_hostname in groups[group] + +- name: Ensure Grafana provisioning directory exists + ansible.builtin.file: + path: /etc/grafana/provisioning/dashboards + state: directory + mode: '0755' + when: inventory_hostname in groups[group] + +- name: Configure Grafana dashboards provisioning + ansible.builtin.template: + src: dashboards.yaml.j2 + dest: /etc/grafana/provisioning/dashboards/dashboards.yaml + owner: root + group: root + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Ensure Grafana systemd override directory exists + ansible.builtin.file: + path: /etc/systemd/system/grafana-server.service.d + state: directory + mode: '0755' + when: inventory_hostname in groups[group] + +- name: Inject Grafana environment overrides + ansible.builtin.template: + src: env.conf.j2 + dest: /etc/systemd/system/grafana-server.service.d/env.conf + owner: root + group: root + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Install grafana dashboard pull timer + ansible.builtin.template: + src: grafana-dash-pull.timer.j2 + dest: /etc/systemd/system/grafana-dash-pull.timer + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Install grafana dashboard pull service + ansible.builtin.template: + src: grafana-dash-pull.service.j2 + dest: /etc/systemd/system/grafana-dash-pull.service + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Enable and start Grafana services + ansible.builtin.systemd: + name: "{{ item }}" + enabled: true + state: started + daemon_reload: true + loop: + - grafana-server + - grafana-dash-pull.timer + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/vhosts/grafana/templates/dashboards.yaml.j2 b/playbooks/roles/vhosts/grafana/templates/dashboards.yaml.j2 new file mode 100644 index 0000000..7809060 --- /dev/null +++ b/playbooks/roles/vhosts/grafana/templates/dashboards.yaml.j2 @@ -0,0 +1,10 @@ +apiVersion: 1 +providers: + - name: 'gitops-dashboards' + type: file + disableDeletion: false + allowUiUpdates: false + updateIntervalSeconds: 30 + options: + path: {{ grafana_root_dir }}/dashboards + foldersFromFilesStructure: true diff --git a/playbooks/roles/vhosts/grafana/templates/env.conf.j2 b/playbooks/roles/vhosts/grafana/templates/env.conf.j2 new file mode 100644 index 0000000..78ac6df --- /dev/null +++ b/playbooks/roles/vhosts/grafana/templates/env.conf.j2 @@ -0,0 +1,4 @@ +[Service] +Environment=GF_SERVER_DOMAIN={{ grafana_domain }} +Environment=GF_SERVER_ROOT_URL=https://{{ grafana_domain }}/ +Environment=PROM_URL={{ prom_url_for_grafana }} diff --git a/playbooks/roles/vhosts/grafana/templates/grafana-dash-pull.service.j2 b/playbooks/roles/vhosts/grafana/templates/grafana-dash-pull.service.j2 new file mode 100644 index 0000000..fadef99 --- /dev/null +++ b/playbooks/roles/vhosts/grafana/templates/grafana-dash-pull.service.j2 @@ -0,0 +1,7 @@ +[Unit] +Description=git pull dashboards + +[Service] +Type=oneshot +WorkingDirectory={{ grafana_root_dir }} +ExecStart=/usr/bin/git pull --ff-only diff --git a/playbooks/roles/vhosts/grafana/templates/grafana-dash-pull.timer.j2 b/playbooks/roles/vhosts/grafana/templates/grafana-dash-pull.timer.j2 new file mode 100644 index 0000000..7e3a5a8 --- /dev/null +++ b/playbooks/roles/vhosts/grafana/templates/grafana-dash-pull.timer.j2 @@ -0,0 +1,10 @@ +[Unit] +Description=git pull dashboards every 5m + +[Timer] +OnBootSec=30s +OnUnitActiveSec=5m +AccuracySec=30s + +[Install] +WantedBy=timers.target diff --git a/playbooks/roles/vhosts/nginx/defaults/main.yml b/playbooks/roles/vhosts/nginx/defaults/main.yml index 3285c6b..c1029ce 100644 --- a/playbooks/roles/vhosts/nginx/defaults/main.yml +++ b/playbooks/roles/vhosts/nginx/defaults/main.yml @@ -4,3 +4,9 @@ vhosts_nginx_ssl_certificate: /etc/ssl/svc.plus.pem vhosts_nginx_ssl_certificate_key: /etc/ssl/svc.plus.rsa.key vhosts_nginx_cn_homepage_root: /var/www/XControl/ui/homepage/out vhosts_nginx_artifact_root: /data/update-server +vhosts_nginx_grafana_domain: grafana.svc.plus +vhosts_nginx_metrics_domain: metrics.svc.plus +vhosts_nginx_metrics_backend_addr: 10.10.0.50:8428 +vhosts_nginx_vm_write_path: /api/v1/write +vhosts_nginx_vm_read_path: /api/v1/read +vhosts_nginx_receiver_path: /api/v1/receive diff --git a/playbooks/roles/vhosts/nginx/tasks/main.yml b/playbooks/roles/vhosts/nginx/tasks/main.yml index 983ce53..06ddc56 100644 --- a/playbooks/roles/vhosts/nginx/tasks/main.yml +++ b/playbooks/roles/vhosts/nginx/tasks/main.yml @@ -49,6 +49,34 @@ state: link notify: Reload nginx +- name: Deploy metrics site configuration + ansible.builtin.template: + src: metrics.conf.j2 + dest: /etc/nginx/sites-available/metrics.conf + mode: '0644' + notify: Reload nginx + +- name: Enable metrics site + ansible.builtin.file: + src: /etc/nginx/sites-available/metrics.conf + dest: /etc/nginx/sites-enabled/metrics.conf + state: link + notify: Reload nginx + +- name: Deploy grafana site configuration + ansible.builtin.template: + src: grafana.conf.j2 + dest: /etc/nginx/sites-available/grafana.conf + mode: '0644' + notify: Reload nginx + +- name: Enable grafana site + ansible.builtin.file: + src: /etc/nginx/sites-available/grafana.conf + dest: /etc/nginx/sites-enabled/grafana.conf + state: link + notify: Reload nginx + - name: Ensure nginx is running ansible.builtin.service: name: nginx diff --git a/playbooks/roles/vhosts/nginx/templates/grafana.conf.j2 b/playbooks/roles/vhosts/nginx/templates/grafana.conf.j2 new file mode 100644 index 0000000..14ec0e1 --- /dev/null +++ b/playbooks/roles/vhosts/nginx/templates/grafana.conf.j2 @@ -0,0 +1,23 @@ +server { + listen 443 ssl http2; + server_name {{ vhosts_nginx_grafana_domain }}; + ssl_certificate {{ vhosts_nginx_ssl_certificate }}; + ssl_certificate_key {{ vhosts_nginx_ssl_certificate_key }}; + + location / { + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 300; + proxy_send_timeout 300; + proxy_pass http://127.0.0.1:3000; + } + location /api/live/ { + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_pass http://127.0.0.1:3000; + } +} +server { listen 80; server_name {{ vhosts_nginx_grafana_domain }}; return 301 https://$host$request_uri; } diff --git a/playbooks/roles/vhosts/nginx/templates/metrics.conf.j2 b/playbooks/roles/vhosts/nginx/templates/metrics.conf.j2 new file mode 100644 index 0000000..db694cc --- /dev/null +++ b/playbooks/roles/vhosts/nginx/templates/metrics.conf.j2 @@ -0,0 +1,43 @@ +upstream metrics_backend { server {{ vhosts_nginx_metrics_backend_addr }}; keepalive 32; } + +server { + listen 443 ssl http2; + server_name {{ vhosts_nginx_metrics_domain }}; + ssl_certificate {{ vhosts_nginx_ssl_certificate }}; + ssl_certificate_key {{ vhosts_nginx_ssl_certificate_key }}; + + # Prometheus Web/API via subpath + location /prom/ { + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 300; + proxy_send_timeout 300; + proxy_pass http://127.0.0.1:9090/; + } + + # Ingest (VM write/read; Receiver write) + location = {{ vhosts_nginx_vm_write_path }} { + client_max_body_size 0; proxy_request_buffering off; proxy_buffering off; proxy_http_version 1.1; + proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 600; proxy_send_timeout 600; + proxy_pass http://metrics_backend$request_uri; + } + location = {{ vhosts_nginx_vm_read_path }} { + client_max_body_size 0; proxy_request_buffering off; proxy_buffering off; proxy_http_version 1.1; + proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 600; proxy_send_timeout 600; + proxy_pass http://metrics_backend$request_uri; + } + location = {{ vhosts_nginx_receiver_path }} { + client_max_body_size 0; proxy_request_buffering off; proxy_buffering off; proxy_http_version 1.1; + proxy_set_header Host $http_host; proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 600; proxy_send_timeout 600; + proxy_pass http://metrics_backend$request_uri; + } +} +server { listen 80; server_name {{ vhosts_nginx_metrics_domain }}; return 301 https://$host$request_uri; } diff --git a/playbooks/roles/vhosts/prometheus/defaults/main.yml b/playbooks/roles/vhosts/prometheus/defaults/main.yml new file mode 100644 index 0000000..e1cc6c7 --- /dev/null +++ b/playbooks/roles/vhosts/prometheus/defaults/main.yml @@ -0,0 +1,15 @@ +prometheus_version: 2.49.0 +prometheus_dir: /opt/prometheus +prometheus_user: prometheus +prometheus_group: prometheus +prometheus_data: /var/lib/prometheus +prometheus_etc: /etc/prometheus +prometheus_file_sd_dir: "{{ prometheus_etc }}/file_sd" +metrics_domain: metrics.svc.plus +metrics_backend_kind: vm +metrics_backend_addr: 10.10.0.50:8428 +vm_write_path: /api/v1/write +vm_read_path: /api/v1/read +receiver_path: /api/v1/receive +enable_remote_write: true +enable_remote_read: true diff --git a/playbooks/roles/vhosts/prometheus/tasks/main.yml b/playbooks/roles/vhosts/prometheus/tasks/main.yml new file mode 100644 index 0000000..49927cc --- /dev/null +++ b/playbooks/roles/vhosts/prometheus/tasks/main.yml @@ -0,0 +1,116 @@ +- name: Create Prometheus directories + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: '0755' + loop: + - "{{ prometheus_dir }}" + - "{{ prometheus_etc }}" + - "{{ prometheus_data }}" + - "{{ prometheus_file_sd_dir }}" + when: inventory_hostname in groups[group] + +- name: Ensure prometheus user exists + ansible.builtin.user: + name: "{{ prometheus_user }}" + system: true + shell: /usr/sbin/nologin + create_home: false + when: inventory_hostname in groups[group] + +- name: Set Prometheus archive for amd64 + ansible.builtin.set_fact: + prometheus_tar: "prometheus-{{ prometheus_version }}.linux-amd64.tar.gz" + prometheus_src_dir: "prometheus-{{ prometheus_version }}.linux-amd64" + when: + - inventory_hostname in groups[group] + - ansible_architecture in ['x86_64', 'amd64'] + +- name: Set Prometheus archive for arm64 + ansible.builtin.set_fact: + prometheus_tar: "prometheus-{{ prometheus_version }}.linux-arm64.tar.gz" + prometheus_src_dir: "prometheus-{{ prometheus_version }}.linux-arm64" + when: + - inventory_hostname in groups[group] + - ansible_architecture in ['aarch64', 'arm64'] + +- name: Download Prometheus archive + ansible.builtin.get_url: + url: "https://github.com/prometheus/prometheus/releases/download/v{{ prometheus_version }}/{{ prometheus_tar }}" + dest: "/tmp/{{ prometheus_tar }}" + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Extract Prometheus archive + ansible.builtin.unarchive: + src: "/tmp/{{ prometheus_tar }}" + dest: /tmp + remote_src: true + creates: "/tmp/{{ prometheus_src_dir }}" + when: inventory_hostname in groups[group] + +- name: Install Prometheus binaries + ansible.builtin.copy: + src: "/tmp/{{ prometheus_src_dir }}/{{ item }}" + dest: "{{ prometheus_dir }}/{{ item }}" + mode: '0755' + remote_src: true + loop: + - prometheus + - promtool + when: inventory_hostname in groups[group] + +- name: Symlink Prometheus binaries + ansible.builtin.file: + src: "{{ prometheus_dir }}/{{ item }}" + dest: "/usr/local/bin/{{ item }}" + state: link + loop: + - prometheus + - promtool + when: inventory_hostname in groups[group] + +- name: Create default file_sd config + ansible.builtin.template: + src: nodes.json.j2 + dest: "{{ prometheus_file_sd_dir }}/nodes.json" + owner: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Deploy Prometheus configuration + ansible.builtin.template: + src: prometheus.yml.j2 + dest: "{{ prometheus_etc }}/prometheus.yml" + owner: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Ensure Prometheus ownership + ansible.builtin.file: + path: "{{ item }}" + state: directory + owner: "{{ prometheus_user }}" + group: "{{ prometheus_group }}" + recurse: true + loop: + - "{{ prometheus_data }}" + - "{{ prometheus_etc }}" + when: inventory_hostname in groups[group] + +- name: Install Prometheus service + ansible.builtin.template: + src: prometheus.service.j2 + dest: /etc/systemd/system/prometheus.service + mode: '0644' + when: inventory_hostname in groups[group] + +- name: Enable and start Prometheus + ansible.builtin.systemd: + name: prometheus + enabled: true + state: restarted + daemon_reload: true + when: inventory_hostname in groups[group] diff --git a/playbooks/roles/vhosts/prometheus/templates/nodes.json.j2 b/playbooks/roles/vhosts/prometheus/templates/nodes.json.j2 new file mode 100644 index 0000000..b761aba --- /dev/null +++ b/playbooks/roles/vhosts/prometheus/templates/nodes.json.j2 @@ -0,0 +1,3 @@ +[ + { "targets": ["127.0.0.1:9100"], "labels": { "instance": "localhost" } } +] diff --git a/playbooks/roles/vhosts/prometheus/templates/prometheus.service.j2 b/playbooks/roles/vhosts/prometheus/templates/prometheus.service.j2 new file mode 100644 index 0000000..96e9499 --- /dev/null +++ b/playbooks/roles/vhosts/prometheus/templates/prometheus.service.j2 @@ -0,0 +1,19 @@ +[Unit] +Description=Prometheus Server +After=network-online.target +Wants=network-online.target + +[Service] +User={{ prometheus_user }} +Group={{ prometheus_group }} +ExecStart={{ prometheus_dir }}/prometheus \ + --config.file={{ prometheus_etc }}/prometheus.yml \ + --storage.tsdb.path={{ prometheus_data }} \ + --web.enable-lifecycle \ + --web.external-url=https://{{ metrics_domain }}/prom/ \ + --web.route-prefix=/ +Restart=always +LimitNOFILE=65536 + +[Install] +WantedBy=multi-user.target diff --git a/playbooks/roles/vhosts/prometheus/templates/prometheus.yml.j2 b/playbooks/roles/vhosts/prometheus/templates/prometheus.yml.j2 new file mode 100644 index 0000000..4134631 --- /dev/null +++ b/playbooks/roles/vhosts/prometheus/templates/prometheus.yml.j2 @@ -0,0 +1,23 @@ +global: + scrape_interval: 30s + evaluation_interval: 30s + +scrape_configs: + - job_name: 'node' + file_sd_configs: + - files: ['{{ prometheus_file_sd_dir }}/nodes.json'] +{% if enable_remote_write %} + +remote_write: + - url: https://{{ metrics_domain }}{% if metrics_backend_kind == 'receiver' %}{{ receiver_path }}{% else %}{{ vm_write_path }}{% endif %} + queue_config: + max_samples_per_send: 10000 + max_shards: 8 + capacity: 100000 +{% endif %} +{% if enable_remote_read and metrics_backend_kind == 'vm' %} + +remote_read: + - url: https://{{ metrics_domain }}{{ vm_read_path }} + read_recent: true +{% endif %}