2015-11-12 14:15:12 +00:00
|
|
|
global:
|
|
|
|
# The smarthost and SMTP sender used for mail notifications.
|
2015-11-20 14:10:38 +00:00
|
|
|
smtp_smarthost: 'localhost:25'
|
|
|
|
smtp_from: 'alertmanager@example.org'
|
2016-04-15 03:12:47 +00:00
|
|
|
smtp_auth_username: 'alertmanager'
|
|
|
|
smtp_auth_password: 'password'
|
2015-11-12 14:15:12 +00:00
|
|
|
|
|
|
|
# The directory from which notification templates are read.
|
2020-12-28 04:20:39 +00:00
|
|
|
templates:
|
2021-12-22 13:03:53 +00:00
|
|
|
- '/etc/alertmanager/template/*.tmpl'
|
2015-11-12 14:15:12 +00:00
|
|
|
|
|
|
|
# The root route on which each incoming alert enters.
|
|
|
|
route:
|
|
|
|
# The labels by which incoming alerts are grouped together. For example,
|
|
|
|
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
|
|
|
|
# be batched into a single group.
|
2018-11-29 11:31:14 +00:00
|
|
|
#
|
|
|
|
# To aggregate by all possible labels use '...' as the sole label name.
|
|
|
|
# This effectively disables aggregation entirely, passing through all
|
|
|
|
# alerts as-is. This is unlikely to be what you want, unless you have
|
|
|
|
# a very low alert volume or your upstream notification system performs
|
|
|
|
# its own grouping. Example: group_by: [...]
|
2015-11-12 14:15:12 +00:00
|
|
|
group_by: ['alertname', 'cluster', 'service']
|
|
|
|
|
|
|
|
# When a new group of alerts is created by an incoming alert, wait at
|
|
|
|
# least 'group_wait' to send the initial notification.
|
|
|
|
# This way ensures that you get multiple alerts for the same group that start
|
2020-12-28 04:20:39 +00:00
|
|
|
# firing shortly after another are batched together on the first
|
2015-11-12 14:15:12 +00:00
|
|
|
# notification.
|
|
|
|
group_wait: 30s
|
|
|
|
|
2016-04-21 02:10:28 +00:00
|
|
|
# When the first notification was sent, wait 'group_interval' to send a batch
|
2015-11-12 14:15:12 +00:00
|
|
|
# of new alerts that started firing for that group.
|
|
|
|
group_interval: 5m
|
|
|
|
|
|
|
|
# If an alert has successfully been sent, wait 'repeat_interval' to
|
|
|
|
# resend them.
|
2020-12-28 04:20:39 +00:00
|
|
|
repeat_interval: 3h
|
2015-11-12 14:15:12 +00:00
|
|
|
|
2016-02-03 16:20:41 +00:00
|
|
|
# A default receiver
|
|
|
|
receiver: team-X-mails
|
2015-11-12 14:15:12 +00:00
|
|
|
|
2020-12-28 04:20:39 +00:00
|
|
|
# All the above attributes are inherited by all child routes and can
|
2015-11-12 14:15:12 +00:00
|
|
|
# overwritten on each.
|
|
|
|
|
|
|
|
# The child route trees.
|
|
|
|
routes:
|
2021-12-22 13:03:53 +00:00
|
|
|
# This routes performs a regular expression match on alert labels to
|
|
|
|
# catch alerts that are related to a list of services.
|
2021-05-06 17:17:25 +00:00
|
|
|
- matchers:
|
2021-12-22 13:03:53 +00:00
|
|
|
- service=~"foo1|foo2|baz"
|
|
|
|
receiver: team-X-mails
|
|
|
|
# The service has a sub-route for critical alerts, any alerts
|
|
|
|
# that do not match, i.e. severity != critical, fall-back to the
|
|
|
|
# parent node and are sent to 'team-X-mails'
|
|
|
|
routes:
|
|
|
|
- matchers:
|
|
|
|
- severity="critical"
|
|
|
|
receiver: team-X-pager
|
2021-05-06 17:17:25 +00:00
|
|
|
- matchers:
|
2021-12-22 13:03:53 +00:00
|
|
|
- service="files"
|
|
|
|
receiver: team-Y-mails
|
|
|
|
|
|
|
|
routes:
|
|
|
|
- matchers:
|
|
|
|
- severity="critical"
|
|
|
|
receiver: team-Y-pager
|
|
|
|
|
|
|
|
# This route handles all alerts coming from a database service. If there's
|
|
|
|
# no team to handle it, it defaults to the DB team.
|
2021-05-06 17:17:25 +00:00
|
|
|
- matchers:
|
2021-12-22 13:03:53 +00:00
|
|
|
- service="database"
|
|
|
|
receiver: team-DB-pager
|
|
|
|
# Also group alerts by affected database.
|
|
|
|
group_by: [alertname, cluster, database]
|
|
|
|
routes:
|
|
|
|
- matchers:
|
|
|
|
- owner="team-X"
|
|
|
|
receiver: team-X-pager
|
|
|
|
continue: true
|
|
|
|
- matchers:
|
|
|
|
- owner="team-Y"
|
|
|
|
receiver: team-Y-pager
|
2015-11-12 14:15:12 +00:00
|
|
|
|
|
|
|
|
|
|
|
# Inhibition rules allow to mute a set of alerts given that another alert is
|
|
|
|
# firing.
|
2020-12-28 04:20:39 +00:00
|
|
|
# We use this to mute any warning-level notifications if the same alert is
|
2015-11-12 14:15:12 +00:00
|
|
|
# already critical.
|
|
|
|
inhibit_rules:
|
2021-12-22 13:03:53 +00:00
|
|
|
- source_matchers: [severity="critical"]
|
|
|
|
target_matchers: [severity="warning"]
|
|
|
|
# Apply inhibition if the alertname is the same.
|
|
|
|
# CAUTION:
|
|
|
|
# If all label names listed in `equal` are missing
|
|
|
|
# from both the source and target alerts,
|
|
|
|
# the inhibition rule will apply!
|
|
|
|
equal: [alertname, cluster, service]
|
2015-11-12 14:15:12 +00:00
|
|
|
|
|
|
|
|
|
|
|
receivers:
|
2021-12-22 13:03:53 +00:00
|
|
|
- name: 'team-X-mails'
|
|
|
|
email_configs:
|
|
|
|
- to: 'team-X+alerts@example.org'
|
|
|
|
|
|
|
|
- name: 'team-X-pager'
|
|
|
|
email_configs:
|
|
|
|
- to: 'team-X+alerts-critical@example.org'
|
|
|
|
pagerduty_configs:
|
|
|
|
- service_key: <team-X-key>
|
|
|
|
|
|
|
|
- name: 'team-Y-mails'
|
|
|
|
email_configs:
|
|
|
|
- to: 'team-Y+alerts@example.org'
|
|
|
|
|
|
|
|
- name: 'team-Y-pager'
|
|
|
|
pagerduty_configs:
|
|
|
|
- service_key: <team-Y-key>
|
|
|
|
|
|
|
|
- name: 'team-DB-pager'
|
|
|
|
pagerduty_configs:
|
|
|
|
- service_key: <team-DB-key>
|