Allow limiting maximum number of alerts in webhook (#2274)

* Allow limiting maximum number of alerts in webhook

The webhook notifier is the only notifier that does not allow templating
on the Alertmanager side. Users who encounter occasional alert storms
(10ks of alerts going off at once for the same group) have reported
webhook receiver systems not being able to cope with the load caused by
the resulting large webhook notifier messages (the alerting rules also
contained large annotations that can't be stripped away due to lack of
templating). Reducing group size also wasn't an option, but this change
proposes to allow truncating the list of alerts sent in the webhook body
to a provided maximum length. This assumes that e.g. if a group receives
20k alerts, you really are fine only receiving 10k because you wouldn't
be able to check them all anyway.

Signed-off-by: Julius Volz <julius.volz@gmail.com>

* Change max_alerts to uint32

Signed-off-by: Julius Volz <julius.volz@gmail.com>

* Add truncatedAlerts field to webhook message

Signed-off-by: Julius Volz <julius.volz@gmail.com>

* Fix JSON struct tag

Signed-off-by: Julius Volz <julius.volz@gmail.com>
This commit is contained in:
Julius Volz 2020-06-04 10:07:33 +02:00 committed by GitHub
parent 9c3ee38683
commit 70b5e00ffc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 37 additions and 5 deletions

View File

@ -409,6 +409,10 @@ type WebhookConfig struct {
// URL to send POST request to.
URL *URL `yaml:"url" json:"url"`
// MaxAlerts is the maximum number of alerts to be sent per webhook message.
// Alerts exceeding this threshold will be truncated. Setting this to 0
// allows an unlimited number of alerts.
MaxAlerts uint64 `yaml:"max_alerts" json:"max_alerts"`
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.

View File

@ -69,12 +69,22 @@ type Message struct {
*template.Data
// The protocol version.
Version string `json:"version"`
GroupKey string `json:"groupKey"`
Version string `json:"version"`
GroupKey string `json:"groupKey"`
TruncatedAlerts uint64 `json:"truncatedAlerts"`
}
func truncateAlerts(maxAlerts uint64, alerts []*types.Alert) ([]*types.Alert, uint64) {
if maxAlerts != 0 && uint64(len(alerts)) > maxAlerts {
return alerts[:maxAlerts], uint64(len(alerts)) - maxAlerts
}
return alerts, 0
}
// Notify implements the Notifier interface.
func (n *Notifier) Notify(ctx context.Context, alerts ...*types.Alert) (bool, error) {
alerts, numTruncated := truncateAlerts(n.conf.MaxAlerts, alerts)
data := notify.GetTemplateData(ctx, n.tmpl, alerts, n.logger)
groupKey, err := notify.ExtractGroupKey(ctx)
@ -83,9 +93,10 @@ func (n *Notifier) Notify(ctx context.Context, alerts ...*types.Alert) (bool, er
}
msg := &Message{
Version: "4",
Data: data,
GroupKey: groupKey.String(),
Version: "4",
Data: data,
GroupKey: groupKey.String(),
TruncatedAlerts: numTruncated,
}
var buf bytes.Buffer

View File

@ -24,6 +24,7 @@ import (
"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/notify/test"
"github.com/prometheus/alertmanager/types"
)
func TestWebhookRetry(t *testing.T) {
@ -47,3 +48,19 @@ func TestWebhookRetry(t *testing.T) {
require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode))
}
}
func TestWebhookTruncateAlerts(t *testing.T) {
alerts := make([]*types.Alert, 10)
truncatedAlerts, numTruncated := truncateAlerts(0, alerts)
require.Len(t, truncatedAlerts, 10)
require.EqualValues(t, numTruncated, 0)
truncatedAlerts, numTruncated = truncateAlerts(4, alerts)
require.Len(t, truncatedAlerts, 4)
require.EqualValues(t, numTruncated, 6)
truncatedAlerts, numTruncated = truncateAlerts(100, alerts)
require.Len(t, truncatedAlerts, 10)
require.EqualValues(t, numTruncated, 0)
}