mirror of
https://github.com/prometheus/alertmanager
synced 2025-01-02 19:42:03 +00:00
Add timeout option for webhook notifier. (#4137)
* Add timeout option for webhook notifier. Signed-off-by: Steve Simpson <steve@grafana.com> * Tweak error handling and de-pointer timeout field. Signed-off-by: Steve Simpson <steve@grafana.com> --------- Signed-off-by: Steve Simpson <steve@grafana.com>
This commit is contained in:
parent
eb7aba4e30
commit
0f65e8fa5f
@ -535,6 +535,10 @@ type WebhookConfig struct {
|
|||||||
// Alerts exceeding this threshold will be truncated. Setting this to 0
|
// Alerts exceeding this threshold will be truncated. Setting this to 0
|
||||||
// allows an unlimited number of alerts.
|
// allows an unlimited number of alerts.
|
||||||
MaxAlerts uint64 `yaml:"max_alerts" json:"max_alerts"`
|
MaxAlerts uint64 `yaml:"max_alerts" json:"max_alerts"`
|
||||||
|
|
||||||
|
// Timeout is the maximum time allowed to invoke the webhook. Setting this to 0
|
||||||
|
// does not impose a timeout.
|
||||||
|
Timeout time.Duration `yaml:"timeout" json:"timeout"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
// UnmarshalYAML implements the yaml.Unmarshaler interface.
|
||||||
|
@ -1594,6 +1594,13 @@ url_file: <filepath>
|
|||||||
# above this threshold are truncated. When leaving this at its default value of
|
# above this threshold are truncated. When leaving this at its default value of
|
||||||
# 0, all alerts are included.
|
# 0, all alerts are included.
|
||||||
[ max_alerts: <int> | default = 0 ]
|
[ max_alerts: <int> | default = 0 ]
|
||||||
|
|
||||||
|
# The maximum time to wait for a webhook request to complete, before failing the
|
||||||
|
# request and allowing it to be retried. The default value of 0s indicates that
|
||||||
|
# no timeout should be applied.
|
||||||
|
# NOTE: This will have no effect if set higher than the group_interval.
|
||||||
|
[ timeout: <duration> | default = 0s ]
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
The Alertmanager
|
The Alertmanager
|
||||||
|
@ -112,8 +112,17 @@ func (n *Notifier) Notify(ctx context.Context, alerts ...*types.Alert) (bool, er
|
|||||||
url = strings.TrimSpace(string(content))
|
url = strings.TrimSpace(string(content))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if n.conf.Timeout > 0 {
|
||||||
|
postCtx, cancel := context.WithTimeoutCause(ctx, n.conf.Timeout, fmt.Errorf("configured webhook timeout reached (%s)", n.conf.Timeout))
|
||||||
|
defer cancel()
|
||||||
|
ctx = postCtx
|
||||||
|
}
|
||||||
|
|
||||||
resp, err := notify.PostJSON(ctx, n.client, url, &buf)
|
resp, err := notify.PostJSON(ctx, n.client, url, &buf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
err = fmt.Errorf("%w: %w", err, context.Cause(ctx))
|
||||||
|
}
|
||||||
return true, notify.RedactURL(err)
|
return true, notify.RedactURL(err)
|
||||||
}
|
}
|
||||||
defer notify.Drain(resp)
|
defer notify.Drain(resp)
|
||||||
|
@ -464,3 +464,52 @@ receivers:
|
|||||||
|
|
||||||
t.Log(co.Check())
|
t.Log(co.Check())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestWebhookTimeout(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
// This integration test uses an extended group_interval to check that
|
||||||
|
// the webhook level timeout has the desired effect, and that notification
|
||||||
|
// sending is retried in this case.
|
||||||
|
conf := `
|
||||||
|
route:
|
||||||
|
receiver: "default"
|
||||||
|
group_by: [alertname]
|
||||||
|
group_wait: 1s
|
||||||
|
group_interval: 1m
|
||||||
|
repeat_interval: 1m
|
||||||
|
|
||||||
|
receivers:
|
||||||
|
- name: "default"
|
||||||
|
webhook_configs:
|
||||||
|
- url: 'http://%s'
|
||||||
|
timeout: 500ms
|
||||||
|
`
|
||||||
|
|
||||||
|
at := NewAcceptanceTest(t, &AcceptanceOpts{
|
||||||
|
Tolerance: 150 * time.Millisecond,
|
||||||
|
})
|
||||||
|
|
||||||
|
co := at.Collector("webhook")
|
||||||
|
wh := NewWebhook(t, co)
|
||||||
|
|
||||||
|
wh.Func = func(ts float64) bool {
|
||||||
|
// Make some webhook requests slow enough to hit the webhook
|
||||||
|
// timeout, but not so slow as to hit the dispatcher timeout.
|
||||||
|
if ts < 3 {
|
||||||
|
time.Sleep(time.Second)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
am := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1)
|
||||||
|
|
||||||
|
am.Push(At(1), Alert("alertname", "test1"))
|
||||||
|
|
||||||
|
co.Want(Between(3, 4), Alert("alertname", "test1").Active(1))
|
||||||
|
|
||||||
|
at.Run()
|
||||||
|
|
||||||
|
t.Log(co.Check())
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user