groups:
- name: targets
rules:
- alert: monitor_service_down
expr: up == 0
for: 40s
labels:
severity: critical
annotations:
summary: "Monitor service non-operational"
description: "Service {{ $labels.instance }} is down." - alert: server_down
expr: probe_success == 0
for: 30s
labels:
severity: critical
annotations:
summary: "Server is down (no probes are up)"
description: "Server {{ $labels.instance }} is down." - alert: loadbalancer_down
expr: loadbalancer_stats < 1
for: 30s
labels:
severity: critical
annotations:
summary: "A loadbalancer is down"
description: "Loadbalancer for {{ $labels.instance }} is down."- name: host
rules:
- alert: high_cpu_load1
expr: node_load1 > 8.0
for: 300s
labels:
severity: warning
annotations:
summary: "Server under high load (load 1m) for 5 minutes"
description: "Host is under high load, the avg load 1m is at {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}." - alert: high_cpu_load5
expr: node_load5 > 5.0
for: 600s
labels:
severity: warning
annotations:
summary: "Server under high load (load 5m) for 10 minutes."
description: "Host is under high load, the avg load 5m is at {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}." - alert: high_cpu_load15
expr: node_load15 > 4.5
for: 900s
labels:
severity: critical
annotations:
summary: "Server under high load (load 15m) for 15 minutes."
description: "Host is under high load, the avg load 15m is at {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}." - alert: high_volume_workers_prod
expr: sum(apache_workers{job="Apache PROD"}) by (instance) > 325
for: 30s
labels:
severity: warning
annotations:
summary: "Number of workers above 325 for 30s"
description: "The Apache workers are over 325 for 30s. Current value is {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}." - alert: medium_volume_workers_prod
expr: sum(apache_workers{job="Apache PROD"}) by (instance) > 300
for: 30s
labels:
severity: warning
annotations:
summary: "Number of workers above 300 for 30s"
description: "The Apache workers are over 300 for 30s. Current value is {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}." - alert: swap_usage_java_high
expr: swapusage_stats{application="java"} > 500000
for: 300s
labels:
severity: warning
annotations:
summary: "Swap usage for Java is high for the last 5 minutes"
description: "The swap usage for the java process are hig. Current value is {{ $value}}. Reported by instance {{ $labels.instance }} of job {{ $labels.job }}."
global:
resolve_timeout: 5m
http_config: {}
smtp_from: alertmanager@example.com
smtp_hello: localhost
smtp_smarthost: localhost:25
smtp_require_tls: true
pagerduty_url: https://events.pagerduty.com/v2/enqueue
hipchat_api_url: https://api.hipchat.com/
opsgenie_api_url: https://api.opsgenie.com/
wechat_api_url: https://qyapi.weixin.qq.com/cgi-bin/
victorops_api_url: https://alert.victorops.com/integrations/generic/20131114/alert/
route:
receiver: default
group_by:
- instance
routes:
- receiver: mail
match:
severity: warning
- receiver: all
match:
severity: critical
group_wait: 1s
group_interval: 1s
receivers:
- name: default
- name: mail
email_configs:
- send_resolved: true
to: somemail@mail.nl
from: alertmanager@example.com
hello: localhost
smarthost: localhost:25
headers:
From: alertmanager@example.com
Subject: '{{ template "email.default.subject" . }}'
To: somemail@mail.nl
html: '{{ template "email.default.html" . }}'
require_tls: false
- name: all
email_configs:
- send_resolved: true
to: fm.nl.itn.dis.cdi.dld.superheroes@rabobank.nl
from: alertmanager@example.com
hello: localhost
smarthost: localhost:25
headers:
From: alertmanager@example.com
Subject: '{{ template "email.default.subject" . }}'
To: mymail@mail.nl
html: '{{ template "email.default.html" . }}'
require_tls: false
- send_resolved: true
to: mynumber@mysms.nl
from: alertmanager@example.com
hello: localhost
smarthost: localhost:25
headers:
From: alertmanager@example.com
Subject: '{{ template "email.default.subject" . }}'
To: mynumber@mysms.nl
html: '{{ template "email.default.html" . }}'
require_tls: false
- name: webhook
webhook_configs:
- send_resolved: true
http_config: {}
url: http://127.0.0.1:9000
templates: []--
You received this message because you are subscribed to the Google Groups "Prometheus Users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to prometheus-use...@googlegroups.com.
To view this discussion on the web visit https://groups.google.com/d/msgid/prometheus-users/7cbb3a17-bf66-4530-9d2c-344549c5cbb3%40googlegroups.com.
To unsubscribe from this group and stop receiving emails from it, send an email to promethe...@googlegroups.com.