mirror of
https://github.com/prometheus/alertmanager
synced 2025-01-03 04:02:13 +00:00
notify: refactor code to retry requests (#1974)
* notify: refactor code to retry requests Signed-off-by: Simon Pasquier <spasquie@redhat.com> * s/Process/Check/ Signed-off-by: Simon Pasquier <spasquie@redhat.com>
This commit is contained in:
parent
f45f870d2c
commit
655947d7e0
@ -35,6 +35,7 @@ type Notifier struct {
|
||||
tmpl *template.Template
|
||||
logger log.Logger
|
||||
client *http.Client
|
||||
retrier *notify.Retrier
|
||||
}
|
||||
|
||||
// New returns a new Hipchat notification handler.
|
||||
@ -48,6 +49,10 @@ func New(c *config.HipchatConfig, t *template.Template, l log.Logger) (*Notifier
|
||||
tmpl: t,
|
||||
logger: l,
|
||||
client: client,
|
||||
// Response codes 429 (rate limiting) and 5xx can potentially recover.
|
||||
// 2xx response codes indicate successful requests.
|
||||
// https://developer.atlassian.com/hipchat/guide/hipchat-rest-api/api-response-codes
|
||||
retrier: ¬ify.Retrier{RetryCodes: []int{http.StatusTooManyRequests}},
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -103,16 +108,5 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error)
|
||||
}
|
||||
defer notify.Drain(resp)
|
||||
|
||||
return n.retry(resp.StatusCode)
|
||||
}
|
||||
|
||||
func (n *Notifier) retry(statusCode int) (bool, error) {
|
||||
// Response codes 429 (rate limiting) and 5xx can potentially recover.
|
||||
// 2xx response codes indicate successful requests.
|
||||
// https://developer.atlassian.com/hipchat/guide/hipchat-rest-api/api-response-codes
|
||||
if statusCode/100 != 2 {
|
||||
return (statusCode == 429 || statusCode/100 == 5), fmt.Errorf("unexpected status code %v", statusCode)
|
||||
}
|
||||
|
||||
return false, nil
|
||||
return n.retrier.Check(resp.StatusCode, nil)
|
||||
}
|
||||
|
@ -27,10 +27,17 @@ import (
|
||||
)
|
||||
|
||||
func TestHipchatRetry(t *testing.T) {
|
||||
notifier := new(Notifier)
|
||||
notifier, err := New(
|
||||
&config.HipchatConfig{
|
||||
HTTPConfig: &commoncfg.HTTPClientConfig{},
|
||||
},
|
||||
test.CreateTmpl(t),
|
||||
log.NewNopLogger(),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
retryCodes := append(test.DefaultRetryCodes(), http.StatusTooManyRequests)
|
||||
for statusCode, expected := range test.RetryTests(retryCodes) {
|
||||
actual, _ := notifier.retry(statusCode)
|
||||
actual, _ := notifier.retrier.Check(statusCode, nil)
|
||||
require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode))
|
||||
}
|
||||
}
|
||||
|
@ -18,8 +18,6 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
@ -41,6 +39,7 @@ type Notifier struct {
|
||||
tmpl *template.Template
|
||||
logger log.Logger
|
||||
client *http.Client
|
||||
retrier *notify.Retrier
|
||||
}
|
||||
|
||||
// New returns a new OpsGenie notifier.
|
||||
@ -49,7 +48,13 @@ func New(c *config.OpsGenieConfig, t *template.Template, l log.Logger) (*Notifie
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Notifier{conf: c, tmpl: t, logger: l, client: client}, nil
|
||||
return &Notifier{
|
||||
conf: c,
|
||||
tmpl: t,
|
||||
logger: l,
|
||||
client: client,
|
||||
retrier: ¬ify.Retrier{RetryCodes: []int{http.StatusTooManyRequests}},
|
||||
}, nil
|
||||
}
|
||||
|
||||
type opsGenieCreateMessage struct {
|
||||
@ -88,7 +93,7 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error)
|
||||
}
|
||||
defer notify.Drain(resp)
|
||||
|
||||
return n.retry(resp.StatusCode, resp.Body)
|
||||
return n.retrier.Check(resp.StatusCode, resp.Body)
|
||||
}
|
||||
|
||||
// Like Split but filter out empty strings.
|
||||
@ -191,20 +196,3 @@ func (n *Notifier) createRequest(ctx context.Context, as ...*types.Alert) (*http
|
||||
req.Header.Set("Authorization", fmt.Sprintf("GenieKey %s", apiKey))
|
||||
return req.WithContext(ctx), true, nil
|
||||
}
|
||||
|
||||
func (n *Notifier) retry(statusCode int, body io.Reader) (bool, error) {
|
||||
if statusCode/100 == 2 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
err := errors.Errorf("unexpected status code %v", statusCode)
|
||||
if body != nil {
|
||||
if bs, errRead := ioutil.ReadAll(body); errRead == nil {
|
||||
err = errors.Errorf("%s: %s", err, string(bs))
|
||||
}
|
||||
}
|
||||
|
||||
// https://docs.opsgenie.com/docs/response#section-response-codes
|
||||
// Response codes 429 (rate limiting) and 5xx are potentially recoverable
|
||||
return statusCode/100 == 5 || statusCode == 429, err
|
||||
}
|
||||
|
@ -14,10 +14,8 @@
|
||||
package opsgenie
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
@ -36,45 +34,22 @@ import (
|
||||
)
|
||||
|
||||
func TestOpsGenieRetry(t *testing.T) {
|
||||
notifier := new(Notifier)
|
||||
notifier, err := New(
|
||||
&config.OpsGenieConfig{
|
||||
HTTPConfig: &commoncfg.HTTPClientConfig{},
|
||||
},
|
||||
test.CreateTmpl(t),
|
||||
log.NewNopLogger(),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
retryCodes := append(test.DefaultRetryCodes(), http.StatusTooManyRequests)
|
||||
for statusCode, expected := range test.RetryTests(retryCodes) {
|
||||
actual, _ := notifier.retry(statusCode, nil)
|
||||
actual, _ := notifier.retrier.Check(statusCode, nil)
|
||||
require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode))
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpsGenieErr(t *testing.T) {
|
||||
notifier := new(Notifier)
|
||||
for _, tc := range []struct {
|
||||
status int
|
||||
body io.Reader
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
status: http.StatusUnprocessableEntity,
|
||||
body: nil,
|
||||
expected: "unexpected status code 422",
|
||||
},
|
||||
{
|
||||
status: http.StatusUnprocessableEntity,
|
||||
body: bytes.NewBuffer([]byte(`{"message":"Request body is not processable. Please check the errors.","errors":{"priority":"should be one of [ P1, P2, P3, P4, P5 ]"},"took":0.002,"requestId":"865a4f83-99d9-48c8-9550-42a375a3a387"}`)),
|
||||
expected: `unexpected status code 422: {"message":"Request body is not processable. Please check the errors.","errors":{"priority":"should be one of [ P1, P2, P3, P4, P5 ]"},"took":0.002,"requestId":"865a4f83-99d9-48c8-9550-42a375a3a387"}`,
|
||||
},
|
||||
{
|
||||
status: http.StatusInternalServerError,
|
||||
body: bytes.NewBuffer([]byte("internal error")),
|
||||
expected: "unexpected status code 500: internal error",
|
||||
},
|
||||
} {
|
||||
t.Run("", func(t *testing.T) {
|
||||
_, err := notifier.retry(tc.status, tc.body)
|
||||
require.Equal(t, err.Error(), tc.expected)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestOpsGenieRedactedURL(t *testing.T) {
|
||||
ctx, u, fn := test.GetContextWithCancelingURL()
|
||||
defer fn()
|
||||
|
@ -40,6 +40,7 @@ type Notifier struct {
|
||||
logger log.Logger
|
||||
apiV1 string // for tests.
|
||||
client *http.Client
|
||||
retrier *notify.Retrier
|
||||
}
|
||||
|
||||
// New returns a new PagerDuty notifier.
|
||||
@ -51,6 +52,13 @@ func New(c *config.PagerdutyConfig, t *template.Template, l log.Logger) (*Notifi
|
||||
n := &Notifier{conf: c, tmpl: t, logger: l, client: client}
|
||||
if c.ServiceKey != "" {
|
||||
n.apiV1 = "https://events.pagerduty.com/generic/2010-04-15/create_event.json"
|
||||
// Retrying can solve the issue on 403 (rate limiting) and 5xx response codes.
|
||||
// https://v2.developer.pagerduty.com/docs/trigger-events
|
||||
n.retrier = ¬ify.Retrier{RetryCodes: []int{http.StatusForbidden}, CustomDetailsFunc: errDetails}
|
||||
} else {
|
||||
// Retrying can solve the issue on 429 (rate limiting) and 5xx response codes.
|
||||
// https://v2.developer.pagerduty.com/docs/events-api-v2#api-response-codes--retry-logic
|
||||
n.retrier = ¬ify.Retrier{RetryCodes: []int{http.StatusTooManyRequests}, CustomDetailsFunc: errDetails}
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
@ -142,7 +150,7 @@ func (n *Notifier) notifyV1(
|
||||
}
|
||||
defer notify.Drain(resp)
|
||||
|
||||
return n.retryV1(resp)
|
||||
return n.retrier.Check(resp.StatusCode, resp.Body)
|
||||
}
|
||||
|
||||
func (n *Notifier) notifyV2(
|
||||
@ -210,7 +218,7 @@ func (n *Notifier) notifyV2(
|
||||
}
|
||||
defer notify.Drain(resp)
|
||||
|
||||
return n.retryV2(resp)
|
||||
return n.retrier.Check(resp.StatusCode, resp.Body)
|
||||
}
|
||||
|
||||
// Notify implements the Notifier interface.
|
||||
@ -248,44 +256,19 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error)
|
||||
return n.notifyV2(ctx, eventType, key, data, details, as...)
|
||||
}
|
||||
|
||||
func pagerDutyErr(status int, body io.Reader) error {
|
||||
func errDetails(status int, body io.Reader) string {
|
||||
// See https://v2.developer.pagerduty.com/docs/trigger-events for the v1 events API.
|
||||
// See https://v2.developer.pagerduty.com/docs/send-an-event-events-api-v2 for the v2 events API.
|
||||
type pagerDutyResponse struct {
|
||||
if status != http.StatusBadRequest || body == nil {
|
||||
return ""
|
||||
}
|
||||
var pgr struct {
|
||||
Status string `json:"status"`
|
||||
Message string `json:"message"`
|
||||
Errors []string `json:"errors"`
|
||||
}
|
||||
if status == http.StatusBadRequest && body != nil {
|
||||
var r pagerDutyResponse
|
||||
if err := json.NewDecoder(body).Decode(&r); err == nil {
|
||||
return fmt.Errorf("%s: %s", r.Message, strings.Join(r.Errors, ","))
|
||||
if err := json.NewDecoder(body).Decode(&pgr); err != nil {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("unexpected status code: %v", status)
|
||||
}
|
||||
|
||||
func (n *Notifier) retryV1(resp *http.Response) (bool, error) {
|
||||
// Retrying can solve the issue on 403 (rate limiting) and 5xx response codes.
|
||||
// 2xx response codes indicate a successful request.
|
||||
// https://v2.developer.pagerduty.com/docs/trigger-events
|
||||
statusCode := resp.StatusCode
|
||||
|
||||
if statusCode/100 != 2 {
|
||||
return (statusCode == http.StatusForbidden || statusCode/100 == 5), pagerDutyErr(statusCode, resp.Body)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (n *Notifier) retryV2(resp *http.Response) (bool, error) {
|
||||
// Retrying can solve the issue on 429 (rate limiting) and 5xx response codes.
|
||||
// 2xx response codes indicate a successful request.
|
||||
// https://v2.developer.pagerduty.com/docs/events-api-v2#api-response-codes--retry-logic
|
||||
statusCode := resp.StatusCode
|
||||
|
||||
if statusCode/100 != 2 {
|
||||
return (statusCode == http.StatusTooManyRequests || statusCode/100 == 5), pagerDutyErr(statusCode, resp.Body)
|
||||
}
|
||||
|
||||
return false, nil
|
||||
return fmt.Sprintf("%s: %s", pgr.Message, strings.Join(pgr.Errors, ","))
|
||||
}
|
||||
|
@ -29,27 +29,37 @@ import (
|
||||
)
|
||||
|
||||
func TestPagerDutyRetryV1(t *testing.T) {
|
||||
notifier := new(Notifier)
|
||||
notifier, err := New(
|
||||
&config.PagerdutyConfig{
|
||||
ServiceKey: config.Secret("01234567890123456789012345678901"),
|
||||
HTTPConfig: &commoncfg.HTTPClientConfig{},
|
||||
},
|
||||
test.CreateTmpl(t),
|
||||
log.NewNopLogger(),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
retryCodes := append(test.DefaultRetryCodes(), http.StatusForbidden)
|
||||
for statusCode, expected := range test.RetryTests(retryCodes) {
|
||||
resp := &http.Response{
|
||||
StatusCode: statusCode,
|
||||
}
|
||||
actual, _ := notifier.retryV1(resp)
|
||||
actual, _ := notifier.retrier.Check(statusCode, nil)
|
||||
require.Equal(t, expected, actual, fmt.Sprintf("retryv1 - error on status %d", statusCode))
|
||||
}
|
||||
}
|
||||
|
||||
func TestPagerDutyRetryV2(t *testing.T) {
|
||||
notifier := new(Notifier)
|
||||
notifier, err := New(
|
||||
&config.PagerdutyConfig{
|
||||
RoutingKey: config.Secret("01234567890123456789012345678901"),
|
||||
HTTPConfig: &commoncfg.HTTPClientConfig{},
|
||||
},
|
||||
test.CreateTmpl(t),
|
||||
log.NewNopLogger(),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
retryCodes := append(test.DefaultRetryCodes(), http.StatusTooManyRequests)
|
||||
for statusCode, expected := range test.RetryTests(retryCodes) {
|
||||
resp := &http.Response{
|
||||
StatusCode: statusCode,
|
||||
}
|
||||
actual, _ := notifier.retryV2(resp)
|
||||
actual, _ := notifier.retrier.Check(statusCode, nil)
|
||||
require.Equal(t, expected, actual, fmt.Sprintf("retryv2 - error on status %d", statusCode))
|
||||
}
|
||||
}
|
||||
@ -92,7 +102,7 @@ func TestPagerDutyRedactedURLV2(t *testing.T) {
|
||||
test.AssertNotifyLeaksNoSecret(t, ctx, notifier, key)
|
||||
}
|
||||
|
||||
func TestPagerDutyErr(t *testing.T) {
|
||||
func TestErrDetails(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
status int
|
||||
body io.Reader
|
||||
@ -111,25 +121,23 @@ func TestPagerDutyErr(t *testing.T) {
|
||||
status: http.StatusBadRequest,
|
||||
body: bytes.NewBuffer([]byte(`{"status"}`)),
|
||||
|
||||
exp: "unexpected status code: 400",
|
||||
exp: "",
|
||||
},
|
||||
{
|
||||
status: http.StatusBadRequest,
|
||||
body: nil,
|
||||
|
||||
exp: "unexpected status code: 400",
|
||||
exp: "",
|
||||
},
|
||||
{
|
||||
status: http.StatusTooManyRequests,
|
||||
body: bytes.NewBuffer([]byte("")),
|
||||
|
||||
exp: "unexpected status code: 429",
|
||||
exp: "",
|
||||
},
|
||||
} {
|
||||
tc := tc
|
||||
t.Run("", func(t *testing.T) {
|
||||
err := pagerDutyErr(tc.status, tc.body)
|
||||
require.Contains(t, err.Error(), tc.exp)
|
||||
err := errDetails(tc.status, tc.body)
|
||||
require.Contains(t, err, tc.exp)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -37,6 +37,7 @@ type Notifier struct {
|
||||
tmpl *template.Template
|
||||
logger log.Logger
|
||||
client *http.Client
|
||||
retrier *notify.Retrier
|
||||
apiURL string // for tests.
|
||||
}
|
||||
|
||||
@ -51,6 +52,7 @@ func New(c *config.PushoverConfig, t *template.Template, l log.Logger) (*Notifie
|
||||
tmpl: t,
|
||||
logger: l,
|
||||
client: client,
|
||||
retrier: ¬ify.Retrier{},
|
||||
apiURL: "https://api.pushover.net/1/messages.json",
|
||||
}, nil
|
||||
}
|
||||
@ -128,18 +130,5 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error)
|
||||
}
|
||||
defer notify.Drain(resp)
|
||||
|
||||
return n.retry(resp.StatusCode)
|
||||
}
|
||||
|
||||
func (n *Notifier) retry(statusCode int) (bool, error) {
|
||||
// Only documented behaviour is that 2xx response codes are successful and
|
||||
// 4xx are unsuccessful, therefore assuming only 5xx are recoverable.
|
||||
// https://pushover.net/api#response
|
||||
if statusCode/100 == 5 {
|
||||
return true, fmt.Errorf("unexpected status code %v", statusCode)
|
||||
} else if statusCode/100 != 2 {
|
||||
return false, fmt.Errorf("unexpected status code %v", statusCode)
|
||||
}
|
||||
|
||||
return false, nil
|
||||
return n.retrier.Check(resp.StatusCode, nil)
|
||||
}
|
||||
|
@ -26,9 +26,16 @@ import (
|
||||
)
|
||||
|
||||
func TestPushoverRetry(t *testing.T) {
|
||||
notifier := new(Notifier)
|
||||
notifier, err := New(
|
||||
&config.PushoverConfig{
|
||||
HTTPConfig: &commoncfg.HTTPClientConfig{},
|
||||
},
|
||||
test.CreateTmpl(t),
|
||||
log.NewNopLogger(),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
for statusCode, expected := range test.RetryTests(test.DefaultRetryCodes()) {
|
||||
actual, _ := notifier.retry(statusCode)
|
||||
actual, _ := notifier.retrier.Check(statusCode, nil)
|
||||
require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode))
|
||||
}
|
||||
}
|
||||
|
@ -17,9 +17,6 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
@ -37,6 +34,7 @@ type Notifier struct {
|
||||
tmpl *template.Template
|
||||
logger log.Logger
|
||||
client *http.Client
|
||||
retrier *notify.Retrier
|
||||
}
|
||||
|
||||
// New returns a new Slack notification handler.
|
||||
@ -51,6 +49,7 @@ func New(c *config.SlackConfig, t *template.Template, l log.Logger) (*Notifier,
|
||||
tmpl: t,
|
||||
logger: l,
|
||||
client: client,
|
||||
retrier: ¬ify.Retrier{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -181,22 +180,8 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error)
|
||||
}
|
||||
defer notify.Drain(resp)
|
||||
|
||||
return n.retry(resp.StatusCode, resp.Body)
|
||||
}
|
||||
|
||||
func (n *Notifier) retry(statusCode int, body io.Reader) (bool, error) {
|
||||
// Only 5xx response codes are recoverable and 2xx codes are successful.
|
||||
// https://api.slack.com/incoming-webhooks#handling_errors
|
||||
// https://api.slack.com/changelog/2016-05-17-changes-to-errors-for-incoming-webhooks
|
||||
if statusCode/100 == 2 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
err := fmt.Errorf("unexpected status code %v", statusCode)
|
||||
if body != nil {
|
||||
if bs, errRead := ioutil.ReadAll(body); errRead == nil {
|
||||
err = fmt.Errorf("%s: %q", err, string(bs))
|
||||
}
|
||||
}
|
||||
return statusCode/100 == 5, err
|
||||
return n.retrier.Check(resp.StatusCode, resp.Body)
|
||||
}
|
||||
|
@ -14,10 +14,7 @@
|
||||
package slack
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"testing"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
@ -29,45 +26,18 @@ import (
|
||||
)
|
||||
|
||||
func TestSlackRetry(t *testing.T) {
|
||||
notifier := new(Notifier)
|
||||
for statusCode, expected := range test.RetryTests(test.DefaultRetryCodes()) {
|
||||
actual, _ := notifier.retry(statusCode, nil)
|
||||
require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode))
|
||||
}
|
||||
}
|
||||
notifier, err := New(
|
||||
&config.SlackConfig{
|
||||
HTTPConfig: &commoncfg.HTTPClientConfig{},
|
||||
},
|
||||
test.CreateTmpl(t),
|
||||
log.NewNopLogger(),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
func TestSlackErr(t *testing.T) {
|
||||
notifier := new(Notifier)
|
||||
for _, tc := range []struct {
|
||||
status int
|
||||
body io.Reader
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
status: http.StatusBadRequest,
|
||||
body: nil,
|
||||
expected: "unexpected status code 400",
|
||||
},
|
||||
{
|
||||
status: http.StatusBadRequest,
|
||||
body: bytes.NewBuffer([]byte("invalid_payload")),
|
||||
expected: "unexpected status code 400: \"invalid_payload\"",
|
||||
},
|
||||
{
|
||||
status: http.StatusNotFound,
|
||||
body: bytes.NewBuffer([]byte("channel_not_found")),
|
||||
expected: "unexpected status code 404: \"channel_not_found\"",
|
||||
},
|
||||
{
|
||||
status: http.StatusInternalServerError,
|
||||
body: bytes.NewBuffer([]byte("rollup_error")),
|
||||
expected: "unexpected status code 500: \"rollup_error\"",
|
||||
},
|
||||
} {
|
||||
t.Run("", func(t *testing.T) {
|
||||
_, err := notifier.retry(tc.status, tc.body)
|
||||
require.Contains(t, err.Error(), tc.expected)
|
||||
})
|
||||
for statusCode, expected := range test.RetryTests(test.DefaultRetryCodes()) {
|
||||
actual, _ := notifier.retrier.Check(statusCode, nil)
|
||||
require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,7 @@ import (
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
"github.com/go-kit/kit/log/level"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/prometheus/alertmanager/template"
|
||||
"github.com/prometheus/alertmanager/types"
|
||||
@ -108,7 +109,7 @@ type Key string
|
||||
func ExtractGroupKey(ctx context.Context) (Key, error) {
|
||||
key, ok := GroupKey(ctx)
|
||||
if !ok {
|
||||
return "", fmt.Errorf("group key missing")
|
||||
return "", errors.Errorf("group key missing")
|
||||
}
|
||||
return Key(key), nil
|
||||
}
|
||||
@ -139,3 +140,57 @@ func GetTemplateData(ctx context.Context, tmpl *template.Template, alerts []*typ
|
||||
}
|
||||
return tmpl.Data(recv, groupLabels, alerts...)
|
||||
}
|
||||
|
||||
func readAll(r io.Reader) string {
|
||||
if r == nil {
|
||||
return ""
|
||||
}
|
||||
bs, err := ioutil.ReadAll(r)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return string(bs)
|
||||
}
|
||||
|
||||
// Retrier knows when to retry an HTTP request to a receiver. 2xx status codes
|
||||
// are successful, anything else is a failure and only 5xx status codes should
|
||||
// be retried.
|
||||
type Retrier struct {
|
||||
// Function to return additional information in the error message.
|
||||
CustomDetailsFunc func(code int, body io.Reader) string
|
||||
// Additional HTTP status codes that should be retried.
|
||||
RetryCodes []int
|
||||
}
|
||||
|
||||
// Check returns a boolean indicating whether the request should be retried
|
||||
// and an optional error if the request has failed. If body is not nil, it will
|
||||
// be included in the error message.
|
||||
func (r *Retrier) Check(statusCode int, body io.Reader) (bool, error) {
|
||||
// 2xx responses are considered to be always successful.
|
||||
if statusCode/100 == 2 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// 5xx responses are considered to be always retried.
|
||||
retry := statusCode/100 == 5
|
||||
if !retry {
|
||||
for _, code := range r.RetryCodes {
|
||||
if code == statusCode {
|
||||
retry = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s := fmt.Sprintf("unexpected status code %v", statusCode)
|
||||
var details string
|
||||
if r.CustomDetailsFunc != nil {
|
||||
details = r.CustomDetailsFunc(statusCode, body)
|
||||
} else {
|
||||
details = readAll(body)
|
||||
}
|
||||
if details != "" {
|
||||
s = fmt.Sprintf("%s: %s", s, details)
|
||||
}
|
||||
return retry, errors.New(s)
|
||||
}
|
||||
|
@ -14,7 +14,11 @@
|
||||
package notify
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
@ -80,3 +84,96 @@ func TestTruncate(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type brokenReader struct{}
|
||||
|
||||
func (b brokenReader) Read([]byte) (int, error) {
|
||||
return 0, fmt.Errorf("some error")
|
||||
}
|
||||
|
||||
func TestRetrierCheck(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
retrier Retrier
|
||||
status int
|
||||
body io.Reader
|
||||
|
||||
retry bool
|
||||
expectedErr string
|
||||
}{
|
||||
{
|
||||
retrier: Retrier{},
|
||||
status: http.StatusOK,
|
||||
body: bytes.NewBuffer([]byte("ok")),
|
||||
|
||||
retry: false,
|
||||
},
|
||||
{
|
||||
retrier: Retrier{},
|
||||
status: http.StatusNoContent,
|
||||
|
||||
retry: false,
|
||||
},
|
||||
{
|
||||
retrier: Retrier{},
|
||||
status: http.StatusBadRequest,
|
||||
|
||||
retry: false,
|
||||
expectedErr: "unexpected status code 400",
|
||||
},
|
||||
{
|
||||
retrier: Retrier{RetryCodes: []int{http.StatusTooManyRequests}},
|
||||
status: http.StatusBadRequest,
|
||||
body: bytes.NewBuffer([]byte("invalid request")),
|
||||
|
||||
retry: false,
|
||||
expectedErr: "unexpected status code 400: invalid request",
|
||||
},
|
||||
{
|
||||
retrier: Retrier{RetryCodes: []int{http.StatusTooManyRequests}},
|
||||
status: http.StatusTooManyRequests,
|
||||
|
||||
retry: true,
|
||||
expectedErr: "unexpected status code 429",
|
||||
},
|
||||
{
|
||||
retrier: Retrier{},
|
||||
status: http.StatusServiceUnavailable,
|
||||
body: bytes.NewBuffer([]byte("retry later")),
|
||||
|
||||
retry: true,
|
||||
expectedErr: "unexpected status code 503: retry later",
|
||||
},
|
||||
{
|
||||
retrier: Retrier{},
|
||||
status: http.StatusBadGateway,
|
||||
body: &brokenReader{},
|
||||
|
||||
retry: true,
|
||||
expectedErr: "unexpected status code 502",
|
||||
},
|
||||
{
|
||||
retrier: Retrier{CustomDetailsFunc: func(status int, b io.Reader) string {
|
||||
if status != http.StatusServiceUnavailable {
|
||||
return "invalid"
|
||||
}
|
||||
bs, _ := ioutil.ReadAll(b)
|
||||
return fmt.Sprintf("server response is %q", string(bs))
|
||||
}},
|
||||
status: http.StatusServiceUnavailable,
|
||||
body: bytes.NewBuffer([]byte("retry later")),
|
||||
|
||||
retry: true,
|
||||
expectedErr: "unexpected status code 503: server response is \"retry later\"",
|
||||
},
|
||||
} {
|
||||
t.Run("", func(t *testing.T) {
|
||||
retry, err := tc.retrier.Check(tc.status, tc.body)
|
||||
require.Equal(t, tc.retry, retry)
|
||||
if tc.expectedErr == "" {
|
||||
require.NoError(t, err)
|
||||
return
|
||||
}
|
||||
require.EqualError(t, err, tc.expectedErr)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -37,6 +37,7 @@ type Notifier struct {
|
||||
tmpl *template.Template
|
||||
logger log.Logger
|
||||
client *http.Client
|
||||
retrier *notify.Retrier
|
||||
}
|
||||
|
||||
// New returns a new VictorOps notifier.
|
||||
@ -50,6 +51,9 @@ func New(c *config.VictorOpsConfig, t *template.Template, l log.Logger) (*Notifi
|
||||
tmpl: t,
|
||||
logger: l,
|
||||
client: client,
|
||||
// Missing documentation therefore assuming only 5xx response codes are
|
||||
// recoverable.
|
||||
retrier: ¬ify.Retrier{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -80,7 +84,7 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error)
|
||||
}
|
||||
defer notify.Drain(resp)
|
||||
|
||||
return n.retry(resp.StatusCode)
|
||||
return n.retrier.Check(resp.StatusCode, nil)
|
||||
}
|
||||
|
||||
// Create the JSON payload to be sent to the VictorOps API.
|
||||
@ -144,15 +148,3 @@ func (n *Notifier) createVictorOpsPayload(ctx context.Context, as ...*types.Aler
|
||||
}
|
||||
return &buf, nil
|
||||
}
|
||||
|
||||
func (n *Notifier) retry(statusCode int) (bool, error) {
|
||||
// Missing documentation therefore assuming only 5xx response codes are
|
||||
// recoverable.
|
||||
if statusCode/100 == 5 {
|
||||
return true, fmt.Errorf("unexpected status code %v", statusCode)
|
||||
} else if statusCode/100 != 2 {
|
||||
return false, fmt.Errorf("unexpected status code %v", statusCode)
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
@ -83,9 +83,17 @@ func TestVictorOpsCustomFields(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestVictorOpsRetry(t *testing.T) {
|
||||
notifier := new(Notifier)
|
||||
notifier, err := New(
|
||||
&config.VictorOpsConfig{
|
||||
APIKey: config.Secret("secret"),
|
||||
HTTPConfig: &commoncfg.HTTPClientConfig{},
|
||||
},
|
||||
test.CreateTmpl(t),
|
||||
log.NewNopLogger(),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
for statusCode, expected := range test.RetryTests(test.DefaultRetryCodes()) {
|
||||
actual, _ := notifier.retry(statusCode)
|
||||
actual, _ := notifier.retrier.Check(statusCode, nil)
|
||||
require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode))
|
||||
}
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
@ -39,6 +40,7 @@ type Notifier struct {
|
||||
tmpl *template.Template
|
||||
logger log.Logger
|
||||
client *http.Client
|
||||
retrier *notify.Retrier
|
||||
}
|
||||
|
||||
// New returns a new Webhook.
|
||||
@ -52,6 +54,13 @@ func New(conf *config.WebhookConfig, t *template.Template, l log.Logger) (*Notif
|
||||
tmpl: t,
|
||||
logger: l,
|
||||
client: client,
|
||||
// Webhooks are assumed to respond with 2xx response codes on a successful
|
||||
// request and 5xx response codes are assumed to be recoverable.
|
||||
retrier: ¬ify.Retrier{
|
||||
CustomDetailsFunc: func(int, io.Reader) string {
|
||||
return conf.URL.String()
|
||||
},
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
@ -97,15 +106,5 @@ func (n *Notifier) Notify(ctx context.Context, alerts ...*types.Alert) (bool, er
|
||||
}
|
||||
notify.Drain(resp)
|
||||
|
||||
return n.retry(resp.StatusCode)
|
||||
}
|
||||
|
||||
func (n *Notifier) retry(statusCode int) (bool, error) {
|
||||
// Webhooks are assumed to respond with 2xx response codes on a successful
|
||||
// request and 5xx response codes are assumed to be recoverable.
|
||||
if statusCode/100 != 2 {
|
||||
return (statusCode/100 == 5), fmt.Errorf("unexpected status code %v from %s", statusCode, n.conf.URL)
|
||||
}
|
||||
|
||||
return false, nil
|
||||
return n.retrier.Check(resp.StatusCode, nil)
|
||||
}
|
||||
|
@ -18,6 +18,8 @@ import (
|
||||
"net/url"
|
||||
"testing"
|
||||
|
||||
"github.com/go-kit/kit/log"
|
||||
commoncfg "github.com/prometheus/common/config"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/prometheus/alertmanager/config"
|
||||
@ -27,11 +29,21 @@ import (
|
||||
func TestWebhookRetry(t *testing.T) {
|
||||
u, err := url.Parse("http://example.com")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse URL: %v", err)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
notifier, err := New(
|
||||
&config.WebhookConfig{
|
||||
URL: &config.URL{URL: u},
|
||||
HTTPConfig: &commoncfg.HTTPClientConfig{},
|
||||
},
|
||||
test.CreateTmpl(t),
|
||||
log.NewNopLogger(),
|
||||
)
|
||||
if err != nil {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
notifier := &Notifier{conf: &config.WebhookConfig{URL: &config.URL{URL: u}}}
|
||||
for statusCode, expected := range test.RetryTests(test.DefaultRetryCodes()) {
|
||||
actual, _ := notifier.retry(statusCode)
|
||||
actual, _ := notifier.retrier.Check(statusCode, nil)
|
||||
require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode))
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user