gitops/clusters/monitor/alert-rules-patch.yaml
2023-12-19 18:55:06 +08:00

52 lines
2.0 KiB
YAML

apiVersion: v1
kind: ConfigMap
metadata:
annotations:
meta.helm.sh/release-name: observability-server
meta.helm.sh/release-namespace: monitoring
labels:
app.kubernetes.io/component: server
app.kubernetes.io/instance: observability-server
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: prometheus
app.kubernetes.io/part-of: prometheus
app.kubernetes.io/version: v2.48.1
helm.sh/chart: prometheus-25.8.2
name: observability-server-prometheus-server
namespace: monitoring
data:
alerting_rules.yml: |
groups:
- name: host-monitoring
rules:
- alert: HighLoad
expr: node_load1 > 2.0 for: 5m
labels:
severity: warning
annotations:
summary: High load on {{ $labels.instance }}
description: "Load is {{ $value }} (threshold: 2.0)"
- alert: HighCpuUsage
expr: 100 - (avg by (instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 90 for: 5m
labels:
severity: critical
annotations:
summary: High CPU usage on {{ $labels.instance }}
description: "CPU usage is {{ $value }}%"
- alert: HighMemoryUsage
expr: (node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes) / node_memory_MemTotal_bytes * 100 > 90
for: 5m
labels:
severity: warning
annotations:
summary: High memory usage on {{ $labels.instance }}
description: "Memory usage is {{ $value }}%"
- alert: HighDiskUsage
expr: node_filesystem_avail_bytes{fstype="ext4"} / node_filesystem_size_bytes{fstype="ext4"} * 100 < 10
for: 5m
labels:
severity: critical
annotations:
summary: High disk usage on {{ $labels.instance }}
description: "Disk usage is {{ $value }}%"