prometheus,alertmanager 報警配置詳解
###
1、prometheus配置文件
vim prometheus.yml
### global: scrape_interval: 15s external_labels: monitor: 'codelab-monitor' scrape_configs: - job_name: test static_configs: - targets: ['10.13.82.244:8000'] labels: instance: proxy - job_name: node static_configs: - targets: ['10.13.82.244:9100','10.13.82.196:9100'] alerting: alertmanagers: - static_configs: - targets: ["localhost:9093"] rule_files: - rule.yml
2、告警規則配置文件
vim rule.yml ### groups: - name: test-rule rules: - alert: "內存報警" expr: 100 - ((node_memory_MemAvailable * 100) / node_memory_MemTotal) > 10 for: 1s labels: severity: warning annotations: summary: "服務名:{{$labels.alertname}}" description: "業務500報警: {{ $value }}" value: "{{ $value }}" - name: test-rule2 rules: - alert: "內存報警" expr: 100 - ((node_memory_MemAvailable * 100) / node_memory_MemTotal) > 40 for: 1s labels: severity: test annotations: summary: "服務名:{{$labels.alertname}}" description: "業務500報警: {{ $value }}" value: "{{ $value }}"
3、alertmanager配置文件
vim alertmanager.yml ### global: smtp_smarthost: 'xxx' smtp_from: 'xxx' smtp_auth_username: 'xxx' smtp_auth_password: 'xxx' smtp_require_tls: false templates: - '/alertmanager/template/*.tmpl' route: receiver: 'default-receiver' # 下面routes:規則中沒有匹配的的信息,會發送到此默認的'webhook'接收端(接收端地址隨意寫但是需與receivers:中-name: 對應上 group_wait: 1s # 收到告警時 等待0s確認時間內是否有新告警 如果有則一并發送 group_interval: 1s # 在發送新告警前的等待時間。通常5m或以上、第二組發送郵件間隔時間 repeat_interval: 1s # 發送重復告警的周期。如果已經發送了通知,再次發送之前需要等待多長時間。通常3小時或以上 group_by: [cluster, alertname] routes: - receiver: test group_wait: 1s match_re: severity: test receivers: - name: 'default-receiver' email_configs: - to: 'xx@xx.xx' html: '{{ template "xx.html" . }}' headers: { Subject: " {{ .CommonAnnotations.summary }}" } # 報警郵件主題 - name: 'test' email_configs: - to: 'xx@xx.xx' html: '{{ template "xx.html" . }}' headers: { Subject: " {{ 第二路由匹配測試}}" } # 報警郵件主題
4、告警模板配置
vim test.tmpl ### {{ define "xx.html" }} <table border="5"> <tr><td>報警項</td> <td>磁盤</td> <td>報警閥值</td> <td>開始時間</td> </tr> {{ range $i, $alert := .Alerts }} <tr><td>{{ index $alert.Labels "alertname" }}</td> <td>{{ index $alert.Labels "instance" }}</td> <td>{{ index $alert.Annotations "value" }}</td> <td>{{ $alert.StartsAt }}</td> </tr> {{ end }} </table> {{ end }}
###
浙公網安備 33010602011771號