alertmanager/notify/notify_test.go

1082 lines
33 KiB
Go
Raw Permalink Normal View History

2015-10-11 15:24:49 +00:00
// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2015-09-29 13:12:31 +00:00
package notify
2015-09-28 12:13:18 +00:00
import (
"context"
"errors"
2015-09-28 19:43:28 +00:00
"fmt"
"io"
"log/slog"
2015-09-28 12:13:18 +00:00
"reflect"
"sort"
"strings"
2015-09-28 12:13:18 +00:00
"testing"
2015-09-28 16:28:13 +00:00
"time"
2015-09-28 12:13:18 +00:00
"github.com/prometheus/client_golang/prometheus"
prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
2015-09-28 12:13:18 +00:00
"github.com/prometheus/common/model"
"github.com/prometheus/common/promslog"
"github.com/stretchr/testify/require"
2015-09-28 12:13:18 +00:00
"github.com/prometheus/alertmanager/featurecontrol"
"github.com/prometheus/alertmanager/nflog"
"github.com/prometheus/alertmanager/nflog/nflogpb"
2016-08-30 09:58:27 +00:00
"github.com/prometheus/alertmanager/silence"
"github.com/prometheus/alertmanager/silence/silencepb"
"github.com/prometheus/alertmanager/timeinterval"
2015-09-28 12:13:18 +00:00
"github.com/prometheus/alertmanager/types"
)
type sendResolved bool
func (s sendResolved) SendResolved() bool {
return bool(s)
}
type notifierFunc func(ctx context.Context, alerts ...*types.Alert) (bool, error)
func (f notifierFunc) Notify(ctx context.Context, alerts ...*types.Alert) (bool, error) {
return f(ctx, alerts...)
}
2016-08-12 17:18:26 +00:00
type failStage struct{}
2015-09-28 12:13:18 +00:00
func (s failStage) Exec(ctx context.Context, l *slog.Logger, as ...*types.Alert) (context.Context, []*types.Alert, error) {
return ctx, nil, fmt.Errorf("some error")
2015-09-28 19:43:28 +00:00
}
type testNflog struct {
qres []*nflogpb.Entry
qerr error
logFunc func(r *nflogpb.Receiver, gkey string, firingAlerts, resolvedAlerts []uint64, expiry time.Duration) error
}
func (l *testNflog) Query(p ...nflog.QueryParam) ([]*nflogpb.Entry, error) {
return l.qres, l.qerr
}
func (l *testNflog) Log(r *nflogpb.Receiver, gkey string, firingAlerts, resolvedAlerts []uint64, expiry time.Duration) error {
return l.logFunc(r, gkey, firingAlerts, resolvedAlerts, expiry)
}
func (l *testNflog) GC() (int, error) {
return 0, nil
}
func (l *testNflog) Snapshot(w io.Writer) (int, error) {
return 0, nil
}
func alertHashSet(hashes ...uint64) map[uint64]struct{} {
res := map[uint64]struct{}{}
for _, h := range hashes {
res[h] = struct{}{}
}
return res
}
func TestDedupStageNeedsUpdate(t *testing.T) {
now := utcNow()
cases := []struct {
entry *nflogpb.Entry
firingAlerts map[uint64]struct{}
resolvedAlerts map[uint64]struct{}
repeat time.Duration
resolve bool
res bool
}{
{
// No matching nflog entry should update.
entry: nil,
firingAlerts: alertHashSet(2, 3, 4),
res: true,
}, {
// No matching nflog entry shouldn't update if no alert fires.
entry: nil,
resolvedAlerts: alertHashSet(2, 3, 4),
res: false,
}, {
// Different sets of firing alerts should update.
entry: &nflogpb.Entry{FiringAlerts: []uint64{1, 2, 3}},
firingAlerts: alertHashSet(2, 3, 4),
res: true,
}, {
// Zero timestamp in the nflog entry should always update.
entry: &nflogpb.Entry{
FiringAlerts: []uint64{1, 2, 3},
Timestamp: time.Time{},
},
firingAlerts: alertHashSet(1, 2, 3),
res: true,
}, {
// Identical sets of alerts shouldn't update before repeat_interval.
entry: &nflogpb.Entry{
FiringAlerts: []uint64{1, 2, 3},
Timestamp: now.Add(-9 * time.Minute),
},
repeat: 10 * time.Minute,
firingAlerts: alertHashSet(1, 2, 3),
res: false,
}, {
// Identical sets of alerts should update after repeat_interval.
entry: &nflogpb.Entry{
FiringAlerts: []uint64{1, 2, 3},
Timestamp: now.Add(-11 * time.Minute),
},
repeat: 10 * time.Minute,
firingAlerts: alertHashSet(1, 2, 3),
res: true,
}, {
// Different sets of resolved alerts without firing alerts shouldn't update after repeat_interval.
entry: &nflogpb.Entry{
ResolvedAlerts: []uint64{1, 2, 3},
Timestamp: now.Add(-11 * time.Minute),
},
repeat: 10 * time.Minute,
resolvedAlerts: alertHashSet(3, 4, 5),
resolve: true,
res: false,
}, {
// Different sets of resolved alerts shouldn't update when resolve is false.
entry: &nflogpb.Entry{
FiringAlerts: []uint64{1, 2},
ResolvedAlerts: []uint64{3},
Timestamp: now.Add(-9 * time.Minute),
},
repeat: 10 * time.Minute,
firingAlerts: alertHashSet(1),
resolvedAlerts: alertHashSet(2, 3),
resolve: false,
res: false,
}, {
// Different sets of resolved alerts should update when resolve is true.
entry: &nflogpb.Entry{
FiringAlerts: []uint64{1, 2},
ResolvedAlerts: []uint64{3},
Timestamp: now.Add(-9 * time.Minute),
},
repeat: 10 * time.Minute,
firingAlerts: alertHashSet(1),
resolvedAlerts: alertHashSet(2, 3),
resolve: true,
res: true,
}, {
// Empty set of firing alerts should update when resolve is false.
entry: &nflogpb.Entry{
FiringAlerts: []uint64{1, 2},
ResolvedAlerts: []uint64{3},
Timestamp: now.Add(-9 * time.Minute),
},
repeat: 10 * time.Minute,
firingAlerts: alertHashSet(),
resolvedAlerts: alertHashSet(1, 2, 3),
resolve: false,
res: true,
}, {
// Empty set of firing alerts should update when resolve is true.
entry: &nflogpb.Entry{
FiringAlerts: []uint64{1, 2},
ResolvedAlerts: []uint64{3},
Timestamp: now.Add(-9 * time.Minute),
},
repeat: 10 * time.Minute,
firingAlerts: alertHashSet(),
resolvedAlerts: alertHashSet(1, 2, 3),
resolve: true,
res: true,
},
}
for i, c := range cases {
t.Log("case", i)
s := &DedupStage{
now: func() time.Time { return now },
rs: sendResolved(c.resolve),
}
res := s.needsUpdate(c.entry, c.firingAlerts, c.resolvedAlerts, c.repeat)
require.Equal(t, c.res, res)
}
}
func TestDedupStage(t *testing.T) {
i := 0
now := utcNow()
s := &DedupStage{
hash: func(a *types.Alert) uint64 {
res := uint64(i)
i++
return res
},
now: func() time.Time {
return now
},
rs: sendResolved(false),
}
ctx := context.Background()
_, _, err := s.Exec(ctx, promslog.NewNopLogger())
require.EqualError(t, err, "group key missing")
ctx = WithGroupKey(ctx, "1")
_, _, err = s.Exec(ctx, promslog.NewNopLogger())
require.EqualError(t, err, "repeat interval missing")
ctx = WithRepeatInterval(ctx, time.Hour)
alerts := []*types.Alert{{}, {}, {}}
// Must catch notification log query errors.
s.nflog = &testNflog{
qerr: errors.New("bad things"),
}
ctx, _, err = s.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.EqualError(t, err, "bad things")
// ... but skip ErrNotFound.
s.nflog = &testNflog{
qerr: nflog.ErrNotFound,
}
ctx, res, err := s.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.NoError(t, err, "unexpected error on not found log entry")
require.Equal(t, alerts, res, "input alerts differ from result alerts")
s.nflog = &testNflog{
qerr: nil,
qres: []*nflogpb.Entry{
{FiringAlerts: []uint64{0, 1, 2}},
{FiringAlerts: []uint64{1, 2, 3}},
},
}
ctx, _, err = s.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.Contains(t, err.Error(), "result size")
// Must return no error and no alerts no need to update.
i = 0
s.nflog = &testNflog{
qerr: nflog.ErrNotFound,
qres: []*nflogpb.Entry{
{
FiringAlerts: []uint64{0, 1, 2},
Timestamp: now,
},
},
}
ctx, res, err = s.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.NoError(t, err)
require.Nil(t, res, "unexpected alerts returned")
// Must return no error and all input alerts on changes.
i = 0
s.nflog = &testNflog{
qerr: nil,
qres: []*nflogpb.Entry{
{
FiringAlerts: []uint64{1, 2, 3, 4},
Timestamp: now,
},
},
}
_, res, err = s.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.NoError(t, err)
require.Equal(t, alerts, res, "unexpected alerts returned")
}
2016-08-12 17:18:26 +00:00
func TestMultiStage(t *testing.T) {
var (
alerts1 = []*types.Alert{{}}
alerts2 = []*types.Alert{{}, {}}
alerts3 = []*types.Alert{{}, {}, {}}
)
stage := MultiStage{
StageFunc(func(ctx context.Context, l *slog.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
2016-08-12 17:18:26 +00:00
if !reflect.DeepEqual(alerts, alerts1) {
t.Fatal("Input not equal to input of MultiStage")
}
//nolint:staticcheck // Ignore SA1029
ctx = context.WithValue(ctx, "key", "value")
return ctx, alerts2, nil
2016-08-12 17:18:26 +00:00
}),
StageFunc(func(ctx context.Context, l *slog.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
2016-08-12 17:18:26 +00:00
if !reflect.DeepEqual(alerts, alerts2) {
t.Fatal("Input not equal to output of previous stage")
}
v, ok := ctx.Value("key").(string)
if !ok || v != "value" {
t.Fatalf("Expected value %q for key %q but got %q", "value", "key", v)
}
return ctx, alerts3, nil
2016-08-12 17:18:26 +00:00
}),
}
_, alerts, err := stage.Exec(context.Background(), promslog.NewNopLogger(), alerts1...)
2016-08-12 17:18:26 +00:00
if err != nil {
t.Fatalf("Exec failed: %s", err)
}
if !reflect.DeepEqual(alerts, alerts3) {
t.Fatal("Output of MultiStage is not equal to the output of the last stage")
}
}
func TestMultiStageFailure(t *testing.T) {
var (
ctx = context.Background()
s1 = failStage{}
stage = MultiStage{s1}
)
_, _, err := stage.Exec(ctx, promslog.NewNopLogger(), nil)
2016-08-12 17:18:26 +00:00
if err.Error() != "some error" {
t.Fatal("Errors were not propagated correctly by MultiStage")
}
}
func TestRoutingStage(t *testing.T) {
var (
alerts1 = []*types.Alert{{}}
alerts2 = []*types.Alert{{}, {}}
)
stage := RoutingStage{
"name": StageFunc(func(ctx context.Context, l *slog.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
2016-08-12 17:18:26 +00:00
if !reflect.DeepEqual(alerts, alerts1) {
t.Fatal("Input not equal to input of RoutingStage")
}
return ctx, alerts2, nil
2016-08-12 17:18:26 +00:00
}),
"not": failStage{},
}
ctx := WithReceiverName(context.Background(), "name")
2016-08-12 17:18:26 +00:00
_, alerts, err := stage.Exec(ctx, promslog.NewNopLogger(), alerts1...)
2016-08-12 17:18:26 +00:00
if err != nil {
t.Fatalf("Exec failed: %s", err)
}
if !reflect.DeepEqual(alerts, alerts2) {
t.Fatal("Output of RoutingStage is not equal to the output of the inner stage")
}
}
func TestRetryStageWithError(t *testing.T) {
fail, retry := true, true
sent := []*types.Alert{}
i := Integration{
notifier: notifierFunc(func(ctx context.Context, alerts ...*types.Alert) (bool, error) {
if fail {
fail = false
return retry, errors.New("fail to deliver notification")
}
sent = append(sent, alerts...)
return false, nil
}),
rs: sendResolved(false),
}
r := NewRetryStage(i, "", NewMetrics(prometheus.NewRegistry(), featurecontrol.NoopFlags{}))
alerts := []*types.Alert{
{
Alert: model.Alert{
EndsAt: time.Now().Add(time.Hour),
},
},
}
ctx := context.Background()
ctx = WithFiringAlerts(ctx, []uint64{0})
// Notify with a recoverable error should retry and succeed.
resctx, res, err := r.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.NoError(t, err)
require.Equal(t, alerts, res)
require.Equal(t, alerts, sent)
require.NotNil(t, resctx)
// Notify with an unrecoverable error should fail.
sent = sent[:0]
fail = true
retry = false
resctx, _, err = r.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.Error(t, err)
require.NotNil(t, resctx)
}
func TestRetryStageWithErrorCode(t *testing.T) {
testcases := map[string]struct {
isNewErrorWithReason bool
reason Reason
reasonlabel string
expectedCount int
}{
"for clientError": {isNewErrorWithReason: true, reason: ClientErrorReason, reasonlabel: ClientErrorReason.String(), expectedCount: 1},
"for serverError": {isNewErrorWithReason: true, reason: ServerErrorReason, reasonlabel: ServerErrorReason.String(), expectedCount: 1},
"for unexpected code": {isNewErrorWithReason: false, reason: DefaultReason, reasonlabel: DefaultReason.String(), expectedCount: 1},
}
for _, testData := range testcases {
retry := false
testData := testData
i := Integration{
name: "test",
notifier: notifierFunc(func(ctx context.Context, alerts ...*types.Alert) (bool, error) {
if !testData.isNewErrorWithReason {
return retry, errors.New("fail to deliver notification")
}
return retry, NewErrorWithReason(testData.reason, errors.New("fail to deliver notification"))
}),
rs: sendResolved(false),
}
r := NewRetryStage(i, "", NewMetrics(prometheus.NewRegistry(), featurecontrol.NoopFlags{}))
alerts := []*types.Alert{
{
Alert: model.Alert{
EndsAt: time.Now().Add(time.Hour),
},
},
}
ctx := context.Background()
ctx = WithFiringAlerts(ctx, []uint64{0})
// Notify with a non-recoverable error.
resctx, _, err := r.Exec(ctx, promslog.NewNopLogger(), alerts...)
counter := r.metrics.numTotalFailedNotifications
require.Equal(t, testData.expectedCount, int(prom_testutil.ToFloat64(counter.WithLabelValues(r.integration.Name(), testData.reasonlabel))))
require.Error(t, err)
require.NotNil(t, resctx)
}
}
func TestRetryStageWithContextCanceled(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
i := Integration{
name: "test",
notifier: notifierFunc(func(ctx context.Context, alerts ...*types.Alert) (bool, error) {
cancel()
return true, errors.New("request failed: context canceled")
}),
rs: sendResolved(false),
}
r := NewRetryStage(i, "", NewMetrics(prometheus.NewRegistry(), featurecontrol.NoopFlags{}))
alerts := []*types.Alert{
{
Alert: model.Alert{
EndsAt: time.Now().Add(time.Hour),
},
},
}
ctx = WithFiringAlerts(ctx, []uint64{0})
// Notify with a non-recoverable error.
resctx, _, err := r.Exec(ctx, promslog.NewNopLogger(), alerts...)
counter := r.metrics.numTotalFailedNotifications
require.Equal(t, 1, int(prom_testutil.ToFloat64(counter.WithLabelValues(r.integration.Name(), ContextCanceledReason.String()))))
require.Error(t, err)
require.NotNil(t, resctx)
}
func TestRetryStageNoResolved(t *testing.T) {
sent := []*types.Alert{}
i := Integration{
notifier: notifierFunc(func(ctx context.Context, alerts ...*types.Alert) (bool, error) {
sent = append(sent, alerts...)
return false, nil
}),
rs: sendResolved(false),
}
r := NewRetryStage(i, "", NewMetrics(prometheus.NewRegistry(), featurecontrol.NoopFlags{}))
alerts := []*types.Alert{
{
Alert: model.Alert{
EndsAt: time.Now().Add(-time.Hour),
},
},
{
Alert: model.Alert{
EndsAt: time.Now().Add(time.Hour),
},
},
}
ctx := context.Background()
resctx, res, err := r.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.EqualError(t, err, "firing alerts missing")
require.Nil(t, res)
require.NotNil(t, resctx)
ctx = WithFiringAlerts(ctx, []uint64{0})
resctx, res, err = r.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.NoError(t, err)
require.Equal(t, alerts, res)
require.Equal(t, []*types.Alert{alerts[1]}, sent)
require.NotNil(t, resctx)
// All alerts are resolved.
sent = sent[:0]
ctx = WithFiringAlerts(ctx, []uint64{})
alerts[1].Alert.EndsAt = time.Now().Add(-time.Hour)
resctx, res, err = r.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.NoError(t, err)
require.Equal(t, alerts, res)
require.Equal(t, []*types.Alert{}, sent)
require.NotNil(t, resctx)
}
func TestRetryStageSendResolved(t *testing.T) {
sent := []*types.Alert{}
i := Integration{
notifier: notifierFunc(func(ctx context.Context, alerts ...*types.Alert) (bool, error) {
sent = append(sent, alerts...)
return false, nil
}),
rs: sendResolved(true),
}
r := NewRetryStage(i, "", NewMetrics(prometheus.NewRegistry(), featurecontrol.NoopFlags{}))
alerts := []*types.Alert{
{
Alert: model.Alert{
EndsAt: time.Now().Add(-time.Hour),
},
},
{
Alert: model.Alert{
EndsAt: time.Now().Add(time.Hour),
},
},
}
ctx := context.Background()
ctx = WithFiringAlerts(ctx, []uint64{0})
resctx, res, err := r.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.NoError(t, err)
require.Equal(t, alerts, res)
require.Equal(t, alerts, sent)
require.NotNil(t, resctx)
// All alerts are resolved.
sent = sent[:0]
ctx = WithFiringAlerts(ctx, []uint64{})
alerts[1].Alert.EndsAt = time.Now().Add(-time.Hour)
resctx, res, err = r.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.NoError(t, err)
require.Equal(t, alerts, res)
require.Equal(t, alerts, sent)
require.NotNil(t, resctx)
}
func TestSetNotifiesStage(t *testing.T) {
tnflog := &testNflog{}
s := &SetNotifiesStage{
recv: &nflogpb.Receiver{GroupName: "test"},
nflog: tnflog,
}
alerts := []*types.Alert{{}, {}, {}}
ctx := context.Background()
resctx, res, err := s.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.EqualError(t, err, "group key missing")
require.Nil(t, res)
require.NotNil(t, resctx)
ctx = WithGroupKey(ctx, "1")
resctx, res, err = s.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.EqualError(t, err, "firing alerts missing")
require.Nil(t, res)
require.NotNil(t, resctx)
ctx = WithFiringAlerts(ctx, []uint64{0, 1, 2})
resctx, res, err = s.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.EqualError(t, err, "resolved alerts missing")
require.Nil(t, res)
require.NotNil(t, resctx)
ctx = WithResolvedAlerts(ctx, []uint64{})
ctx = WithRepeatInterval(ctx, time.Hour)
tnflog.logFunc = func(r *nflogpb.Receiver, gkey string, firingAlerts, resolvedAlerts []uint64, expiry time.Duration) error {
require.Equal(t, s.recv, r)
require.Equal(t, "1", gkey)
require.Equal(t, []uint64{0, 1, 2}, firingAlerts)
require.Equal(t, []uint64{}, resolvedAlerts)
require.Equal(t, 2*time.Hour, expiry)
return nil
}
resctx, res, err = s.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.NoError(t, err)
require.Equal(t, alerts, res)
require.NotNil(t, resctx)
ctx = WithFiringAlerts(ctx, []uint64{})
ctx = WithResolvedAlerts(ctx, []uint64{0, 1, 2})
tnflog.logFunc = func(r *nflogpb.Receiver, gkey string, firingAlerts, resolvedAlerts []uint64, expiry time.Duration) error {
require.Equal(t, s.recv, r)
require.Equal(t, "1", gkey)
require.Equal(t, []uint64{}, firingAlerts)
require.Equal(t, []uint64{0, 1, 2}, resolvedAlerts)
require.Equal(t, 2*time.Hour, expiry)
return nil
}
resctx, res, err = s.Exec(ctx, promslog.NewNopLogger(), alerts...)
require.NoError(t, err)
require.Equal(t, alerts, res)
require.NotNil(t, resctx)
2015-09-28 19:43:28 +00:00
}
func TestMuteStage(t *testing.T) {
// Mute all label sets that have a "mute" key.
muter := types.MuteFunc(func(lset model.LabelSet) bool {
_, ok := lset["mute"]
return ok
})
2015-09-28 12:13:18 +00:00
feat: add counter to track alerts dropped outside of time_intervals (#3565) * feat: add counter to track alerts dropped outside of time_intervals Addresses: #3512 This adds a new counter metric `alertmanager_alerts_supressed_total` that is incremented by `len(alerts)` when an alert is suppressed for being outside of a time_interval, ie inside of a mute_time_intervals or outside of an active_time_intervals. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: add time interval suppression metric checks for notify Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: fix failure message log values in notifier Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * ref: address PR feedback for #3565 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: track suppressed notifications metric for inhibit/silence Based on PR feedback: https://github.com/prometheus/alertmanager/pull/3565/files#r1393068026 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: broken notifier tests - fixed metric count check to properly check the diff between input/output notifications from the suppression to compare to suppression metric, was previously inverted to compare to how many notifications it suppressed. - stopped using `Reset()` to compare collection counts between the multiple stages that are executed in `TestMuteStageWithSilences()`. the intent was to compare a clean metric collection after each stage execution, but the final stage where all silences are lifted results in no metric being created in the test, causing `prom_testutil.ToFloat64()` to panic. changed to separate vars to check counts between each stage, with care to consider prior counts. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * rename metric and add constants Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: gotjosh <josue.abreu@gmail.com>
2024-02-13 11:17:24 +00:00
metrics := NewMetrics(prometheus.NewRegistry(), featurecontrol.NoopFlags{})
stage := NewMuteStage(muter, metrics)
2015-09-28 12:13:18 +00:00
in := []model.LabelSet{
{},
{"test": "set"},
{"mute": "me"},
{"foo": "bar", "test": "set"},
{"foo": "bar", "mute": "me"},
{},
{"not": "muted"},
}
out := []model.LabelSet{
{},
{"test": "set"},
{"foo": "bar", "test": "set"},
{},
{"not": "muted"},
}
var inAlerts []*types.Alert
for _, lset := range in {
2015-10-01 12:53:49 +00:00
inAlerts = append(inAlerts, &types.Alert{
Alert: model.Alert{Labels: lset},
})
2015-09-28 12:13:18 +00:00
}
_, alerts, err := stage.Exec(context.Background(), promslog.NewNopLogger(), inAlerts...)
if err != nil {
2016-08-12 17:18:26 +00:00
t.Fatalf("Exec failed: %s", err)
2015-12-03 16:27:36 +00:00
}
var got []model.LabelSet
for _, a := range alerts {
2015-12-03 16:27:36 +00:00
got = append(got, a.Labels)
}
if !reflect.DeepEqual(got, out) {
t.Fatalf("Muting failed, expected: %v\ngot %v", out, got)
}
feat: add counter to track alerts dropped outside of time_intervals (#3565) * feat: add counter to track alerts dropped outside of time_intervals Addresses: #3512 This adds a new counter metric `alertmanager_alerts_supressed_total` that is incremented by `len(alerts)` when an alert is suppressed for being outside of a time_interval, ie inside of a mute_time_intervals or outside of an active_time_intervals. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: add time interval suppression metric checks for notify Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: fix failure message log values in notifier Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * ref: address PR feedback for #3565 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: track suppressed notifications metric for inhibit/silence Based on PR feedback: https://github.com/prometheus/alertmanager/pull/3565/files#r1393068026 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: broken notifier tests - fixed metric count check to properly check the diff between input/output notifications from the suppression to compare to suppression metric, was previously inverted to compare to how many notifications it suppressed. - stopped using `Reset()` to compare collection counts between the multiple stages that are executed in `TestMuteStageWithSilences()`. the intent was to compare a clean metric collection after each stage execution, but the final stage where all silences are lifted results in no metric being created in the test, causing `prom_testutil.ToFloat64()` to panic. changed to separate vars to check counts between each stage, with care to consider prior counts. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * rename metric and add constants Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: gotjosh <josue.abreu@gmail.com>
2024-02-13 11:17:24 +00:00
suppressed := int(prom_testutil.ToFloat64(metrics.numNotificationSuppressedTotal))
if (len(in) - len(got)) != suppressed {
t.Fatalf("Expected %d alerts counted in suppressed metric but got %d", (len(in) - len(got)), suppressed)
}
2015-12-03 16:27:36 +00:00
}
func TestMuteStageWithSilences(t *testing.T) {
silences, err := silence.New(silence.Options{Retention: time.Hour})
if err != nil {
t.Fatal(err)
}
sil := &silencepb.Silence{
EndsAt: utcNow().Add(time.Hour),
Matchers: []*silencepb.Matcher{{Name: "mute", Pattern: "me"}},
}
if err = silences.Set(sil); err != nil {
t.Fatal(err)
}
2015-12-03 16:27:36 +00:00
feat: add counter to track alerts dropped outside of time_intervals (#3565) * feat: add counter to track alerts dropped outside of time_intervals Addresses: #3512 This adds a new counter metric `alertmanager_alerts_supressed_total` that is incremented by `len(alerts)` when an alert is suppressed for being outside of a time_interval, ie inside of a mute_time_intervals or outside of an active_time_intervals. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: add time interval suppression metric checks for notify Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: fix failure message log values in notifier Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * ref: address PR feedback for #3565 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: track suppressed notifications metric for inhibit/silence Based on PR feedback: https://github.com/prometheus/alertmanager/pull/3565/files#r1393068026 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: broken notifier tests - fixed metric count check to properly check the diff between input/output notifications from the suppression to compare to suppression metric, was previously inverted to compare to how many notifications it suppressed. - stopped using `Reset()` to compare collection counts between the multiple stages that are executed in `TestMuteStageWithSilences()`. the intent was to compare a clean metric collection after each stage execution, but the final stage where all silences are lifted results in no metric being created in the test, causing `prom_testutil.ToFloat64()` to panic. changed to separate vars to check counts between each stage, with care to consider prior counts. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * rename metric and add constants Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: gotjosh <josue.abreu@gmail.com>
2024-02-13 11:17:24 +00:00
reg := prometheus.NewRegistry()
marker := types.NewMarker(reg)
silencer := silence.NewSilencer(silences, marker, promslog.NewNopLogger())
feat: add counter to track alerts dropped outside of time_intervals (#3565) * feat: add counter to track alerts dropped outside of time_intervals Addresses: #3512 This adds a new counter metric `alertmanager_alerts_supressed_total` that is incremented by `len(alerts)` when an alert is suppressed for being outside of a time_interval, ie inside of a mute_time_intervals or outside of an active_time_intervals. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: add time interval suppression metric checks for notify Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: fix failure message log values in notifier Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * ref: address PR feedback for #3565 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: track suppressed notifications metric for inhibit/silence Based on PR feedback: https://github.com/prometheus/alertmanager/pull/3565/files#r1393068026 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: broken notifier tests - fixed metric count check to properly check the diff between input/output notifications from the suppression to compare to suppression metric, was previously inverted to compare to how many notifications it suppressed. - stopped using `Reset()` to compare collection counts between the multiple stages that are executed in `TestMuteStageWithSilences()`. the intent was to compare a clean metric collection after each stage execution, but the final stage where all silences are lifted results in no metric being created in the test, causing `prom_testutil.ToFloat64()` to panic. changed to separate vars to check counts between each stage, with care to consider prior counts. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * rename metric and add constants Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: gotjosh <josue.abreu@gmail.com>
2024-02-13 11:17:24 +00:00
metrics := NewMetrics(reg, featurecontrol.NoopFlags{})
stage := NewMuteStage(silencer, metrics)
2015-12-03 16:27:36 +00:00
in := []model.LabelSet{
{},
{"test": "set"},
{"mute": "me"},
{"foo": "bar", "test": "set"},
{"foo": "bar", "mute": "me"},
{},
{"not": "muted"},
}
out := []model.LabelSet{
{},
{"test": "set"},
{"foo": "bar", "test": "set"},
{},
{"not": "muted"},
}
var inAlerts []*types.Alert
for _, lset := range in {
inAlerts = append(inAlerts, &types.Alert{
Alert: model.Alert{Labels: lset},
})
}
// Set the second alert as previously silenced with an old version
// number. This is expected to get unsilenced by the stage.
marker.SetActiveOrSilenced(inAlerts[1].Fingerprint(), 0, []string{"123"}, nil)
_, alerts, err := stage.Exec(context.Background(), promslog.NewNopLogger(), inAlerts...)
if err != nil {
2016-08-12 17:18:26 +00:00
t.Fatalf("Exec failed: %s", err)
2015-09-28 12:13:18 +00:00
}
var got []model.LabelSet
for _, a := range alerts {
2015-09-28 12:13:18 +00:00
got = append(got, a.Labels)
}
if !reflect.DeepEqual(got, out) {
t.Fatalf("Muting failed, expected: %v\ngot %v", out, got)
}
feat: add counter to track alerts dropped outside of time_intervals (#3565) * feat: add counter to track alerts dropped outside of time_intervals Addresses: #3512 This adds a new counter metric `alertmanager_alerts_supressed_total` that is incremented by `len(alerts)` when an alert is suppressed for being outside of a time_interval, ie inside of a mute_time_intervals or outside of an active_time_intervals. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: add time interval suppression metric checks for notify Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: fix failure message log values in notifier Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * ref: address PR feedback for #3565 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: track suppressed notifications metric for inhibit/silence Based on PR feedback: https://github.com/prometheus/alertmanager/pull/3565/files#r1393068026 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: broken notifier tests - fixed metric count check to properly check the diff between input/output notifications from the suppression to compare to suppression metric, was previously inverted to compare to how many notifications it suppressed. - stopped using `Reset()` to compare collection counts between the multiple stages that are executed in `TestMuteStageWithSilences()`. the intent was to compare a clean metric collection after each stage execution, but the final stage where all silences are lifted results in no metric being created in the test, causing `prom_testutil.ToFloat64()` to panic. changed to separate vars to check counts between each stage, with care to consider prior counts. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * rename metric and add constants Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: gotjosh <josue.abreu@gmail.com>
2024-02-13 11:17:24 +00:00
suppressedRoundOne := int(prom_testutil.ToFloat64(metrics.numNotificationSuppressedTotal))
if (len(in) - len(got)) != suppressedRoundOne {
t.Fatalf("Expected %d alerts counted in suppressed metric but got %d", (len(in) - len(got)), suppressedRoundOne)
}
// Do it again to exercise the version tracking of silences.
_, alerts, err = stage.Exec(context.Background(), promslog.NewNopLogger(), inAlerts...)
if err != nil {
t.Fatalf("Exec failed: %s", err)
}
got = got[:0]
for _, a := range alerts {
got = append(got, a.Labels)
}
if !reflect.DeepEqual(got, out) {
t.Fatalf("Muting failed, expected: %v\ngot %v", out, got)
}
feat: add counter to track alerts dropped outside of time_intervals (#3565) * feat: add counter to track alerts dropped outside of time_intervals Addresses: #3512 This adds a new counter metric `alertmanager_alerts_supressed_total` that is incremented by `len(alerts)` when an alert is suppressed for being outside of a time_interval, ie inside of a mute_time_intervals or outside of an active_time_intervals. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: add time interval suppression metric checks for notify Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: fix failure message log values in notifier Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * ref: address PR feedback for #3565 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: track suppressed notifications metric for inhibit/silence Based on PR feedback: https://github.com/prometheus/alertmanager/pull/3565/files#r1393068026 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: broken notifier tests - fixed metric count check to properly check the diff between input/output notifications from the suppression to compare to suppression metric, was previously inverted to compare to how many notifications it suppressed. - stopped using `Reset()` to compare collection counts between the multiple stages that are executed in `TestMuteStageWithSilences()`. the intent was to compare a clean metric collection after each stage execution, but the final stage where all silences are lifted results in no metric being created in the test, causing `prom_testutil.ToFloat64()` to panic. changed to separate vars to check counts between each stage, with care to consider prior counts. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * rename metric and add constants Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: gotjosh <josue.abreu@gmail.com>
2024-02-13 11:17:24 +00:00
suppressedRoundTwo := int(prom_testutil.ToFloat64(metrics.numNotificationSuppressedTotal))
if (len(in) - len(got) + suppressedRoundOne) != suppressedRoundTwo {
t.Fatalf("Expected %d alerts counted in suppressed metric but got %d", (len(in) - len(got)), suppressedRoundTwo)
}
// Expire the silence and verify that no alerts are silenced now.
if err := silences.Expire(sil.Id); err != nil {
t.Fatal(err)
}
_, alerts, err = stage.Exec(context.Background(), promslog.NewNopLogger(), inAlerts...)
if err != nil {
t.Fatalf("Exec failed: %s", err)
}
got = got[:0]
for _, a := range alerts {
got = append(got, a.Labels)
}
if !reflect.DeepEqual(got, in) {
t.Fatalf("Unmuting failed, expected: %v\ngot %v", in, got)
}
feat: add counter to track alerts dropped outside of time_intervals (#3565) * feat: add counter to track alerts dropped outside of time_intervals Addresses: #3512 This adds a new counter metric `alertmanager_alerts_supressed_total` that is incremented by `len(alerts)` when an alert is suppressed for being outside of a time_interval, ie inside of a mute_time_intervals or outside of an active_time_intervals. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: add time interval suppression metric checks for notify Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: fix failure message log values in notifier Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * ref: address PR feedback for #3565 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: track suppressed notifications metric for inhibit/silence Based on PR feedback: https://github.com/prometheus/alertmanager/pull/3565/files#r1393068026 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: broken notifier tests - fixed metric count check to properly check the diff between input/output notifications from the suppression to compare to suppression metric, was previously inverted to compare to how many notifications it suppressed. - stopped using `Reset()` to compare collection counts between the multiple stages that are executed in `TestMuteStageWithSilences()`. the intent was to compare a clean metric collection after each stage execution, but the final stage where all silences are lifted results in no metric being created in the test, causing `prom_testutil.ToFloat64()` to panic. changed to separate vars to check counts between each stage, with care to consider prior counts. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * rename metric and add constants Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: gotjosh <josue.abreu@gmail.com>
2024-02-13 11:17:24 +00:00
suppressedRoundThree := int(prom_testutil.ToFloat64(metrics.numNotificationSuppressedTotal))
if (len(in) - len(got) + suppressedRoundTwo) != suppressedRoundThree {
t.Fatalf("Expected %d alerts counted in suppressed metric but got %d", (len(in) - len(got)), suppressedRoundThree)
}
2015-09-28 12:13:18 +00:00
}
func TestTimeMuteStage(t *testing.T) {
sydney, err := time.LoadLocation("Australia/Sydney")
if err != nil {
t.Fatalf("Failed to load location Australia/Sydney: %s", err)
}
eveningsAndWeekends := map[string][]timeinterval.TimeInterval{
"evenings": {{
Times: []timeinterval.TimeRange{{
StartMinute: 0, // 00:00
EndMinute: 540, // 09:00
}, {
StartMinute: 1020, // 17:00
EndMinute: 1440, // 24:00
}},
Location: &timeinterval.Location{Location: sydney},
}},
"weekends": {{
Weekdays: []timeinterval.WeekdayRange{{
InclusiveRange: timeinterval.InclusiveRange{Begin: 6, End: 6}, // Saturday
}, {
InclusiveRange: timeinterval.InclusiveRange{Begin: 0, End: 0}, // Sunday
}},
Location: &timeinterval.Location{Location: sydney},
}},
}
tests := []struct {
name string
intervals map[string][]timeinterval.TimeInterval
now time.Time
alerts []*types.Alert
mutedBy []string
}{{
name: "Should be muted outside working hours",
intervals: eveningsAndWeekends,
now: time.Date(2024, 1, 1, 0, 0, 0, 0, sydney),
alerts: []*types.Alert{{Alert: model.Alert{Labels: model.LabelSet{"foo": "bar"}}}},
mutedBy: []string{"evenings"},
}, {
name: "Should not be muted during workings hours",
intervals: eveningsAndWeekends,
now: time.Date(2024, 1, 1, 9, 0, 0, 0, sydney),
alerts: []*types.Alert{{Alert: model.Alert{Labels: model.LabelSet{"foo": "bar"}}}},
mutedBy: nil,
}, {
name: "Should be muted during weekends",
intervals: eveningsAndWeekends,
now: time.Date(2024, 1, 6, 10, 0, 0, 0, sydney),
alerts: []*types.Alert{{Alert: model.Alert{Labels: model.LabelSet{"foo": "bar"}}}},
mutedBy: []string{"weekends"},
}, {
name: "Should be muted at 12pm UTC on a weekday",
intervals: eveningsAndWeekends,
now: time.Date(2024, 1, 1, 10, 0, 0, 0, time.UTC),
alerts: []*types.Alert{{Alert: model.Alert{Labels: model.LabelSet{"foo": "bar"}}}},
mutedBy: []string{"evenings"},
}, {
name: "Should be muted at 12pm UTC on a weekend",
intervals: eveningsAndWeekends,
now: time.Date(2024, 1, 6, 10, 0, 0, 0, time.UTC),
alerts: []*types.Alert{{Alert: model.Alert{Labels: model.LabelSet{"foo": "bar"}}}},
mutedBy: []string{"evenings", "weekends"},
}}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
metrics := NewMetrics(r, featurecontrol.NoopFlags{})
intervener := timeinterval.NewIntervener(test.intervals)
st := NewTimeMuteStage(intervener, marker, metrics)
// Get the names of all time intervals for the context.
muteTimeIntervalNames := make([]string, 0, len(test.intervals))
for name := range test.intervals {
muteTimeIntervalNames = append(muteTimeIntervalNames, name)
}
// Sort the names so we can compare mutedBy with test.mutedBy.
sort.Strings(muteTimeIntervalNames)
ctx := context.Background()
ctx = WithNow(ctx, test.now)
ctx = WithGroupKey(ctx, "group1")
ctx = WithActiveTimeIntervals(ctx, nil)
ctx = WithMuteTimeIntervals(ctx, muteTimeIntervalNames)
ctx = WithRouteID(ctx, "route1")
_, active, err := st.Exec(ctx, promslog.NewNopLogger(), test.alerts...)
require.NoError(t, err)
if len(test.mutedBy) == 0 {
// All alerts should be active.
require.Equal(t, len(test.alerts), len(active))
// The group should not be marked.
mutedBy, isMuted := marker.Muted("route1", "group1")
require.False(t, isMuted)
require.Empty(t, mutedBy)
// The metric for total suppressed notifications should not
// have been incremented, which means it will not be collected.
require.NoError(t, prom_testutil.GatherAndCompare(r, strings.NewReader(`
# HELP alertmanager_marked_alerts How many alerts by state are currently marked in the Alertmanager regardless of their expiry.
# TYPE alertmanager_marked_alerts gauge
alertmanager_marked_alerts{state="active"} 0
alertmanager_marked_alerts{state="suppressed"} 0
alertmanager_marked_alerts{state="unprocessed"} 0
`)))
} else {
// All alerts should be muted.
require.Empty(t, active)
// The group should be marked as muted.
mutedBy, isMuted := marker.Muted("route1", "group1")
require.True(t, isMuted)
require.Equal(t, test.mutedBy, mutedBy)
// Gets the metric for total suppressed notifications.
require.NoError(t, prom_testutil.GatherAndCompare(r, strings.NewReader(fmt.Sprintf(`
# HELP alertmanager_marked_alerts How many alerts by state are currently marked in the Alertmanager regardless of their expiry.
# TYPE alertmanager_marked_alerts gauge
alertmanager_marked_alerts{state="active"} 0
alertmanager_marked_alerts{state="suppressed"} 0
alertmanager_marked_alerts{state="unprocessed"} 0
# HELP alertmanager_notifications_suppressed_total The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.
# TYPE alertmanager_notifications_suppressed_total counter
alertmanager_notifications_suppressed_total{reason="mute_time_interval"} %d
`, len(test.alerts)))))
}
})
feat: add counter to track alerts dropped outside of time_intervals (#3565) * feat: add counter to track alerts dropped outside of time_intervals Addresses: #3512 This adds a new counter metric `alertmanager_alerts_supressed_total` that is incremented by `len(alerts)` when an alert is suppressed for being outside of a time_interval, ie inside of a mute_time_intervals or outside of an active_time_intervals. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: add time interval suppression metric checks for notify Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: fix failure message log values in notifier Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * ref: address PR feedback for #3565 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: track suppressed notifications metric for inhibit/silence Based on PR feedback: https://github.com/prometheus/alertmanager/pull/3565/files#r1393068026 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: broken notifier tests - fixed metric count check to properly check the diff between input/output notifications from the suppression to compare to suppression metric, was previously inverted to compare to how many notifications it suppressed. - stopped using `Reset()` to compare collection counts between the multiple stages that are executed in `TestMuteStageWithSilences()`. the intent was to compare a clean metric collection after each stage execution, but the final stage where all silences are lifted results in no metric being created in the test, causing `prom_testutil.ToFloat64()` to panic. changed to separate vars to check counts between each stage, with care to consider prior counts. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * rename metric and add constants Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: gotjosh <josue.abreu@gmail.com>
2024-02-13 11:17:24 +00:00
}
}
Adds: Active time interval (#2779) * add active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix unittests for active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update dispatch/route.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * split the stage for active and mute intervals Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Adds doc for a helper function Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix code after commit suggestions Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Making mute_time_interval and time_intervals can coexist in the config Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * docs: configuration's doc has been updated about time intervals Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update config/config.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * updates configuration readme to improve active time description Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * merge deprecated mute_time_intervals and time_intervals Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update cmd/alertmanager/main.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update cmd/alertmanager/main.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fmt main.go Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix lint error Signed-off-by: clyang82 <chuyang@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Document that matchers are ANDed together Signed-off-by: Mac Chaffee <me@macchaffee.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Remove extra parentheticals Signed-off-by: Mac Chaffee <me@macchaffee.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * config: root route should have empty matchers Unmarshal should validate that the root route does not contain any matchers. Prior to this change, only the deprecated match structures were checked. Signed-off-by: Philip Gough <philip.p.gough@gmail.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * chore: Let git ignore temporary files for ui/app Signed-off-by: nekketsuuu <nekketsuuu@users.noreply.github.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * adding max_alerts parameter to slack webhook config correcting the logic to trucate fields instead of dropping alerts in the slack integration Signed-off-by: Prashant Balachandran <pnair@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * *: bump to Go 1.17 (#2792) * *: bump to Go 1.17 Signed-off-by: Simon Pasquier <spasquie@redhat.com> * *: fix yamllint errors Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Automate CSS-inlining for default HTML email template (#2798) * Automate CSS-inlining for default HTML email template The original HTML email template was added in `template/email.html`. It looks like the CSS was manually inlined. Most likely using the premailer.dialect.ca web form, which is mentioned in the README for the Mailgun transactional-email-templates project. The resulting HTML with inlined CSS was then copied into `template/default.tmpl`. This has resulted in `email.html` and `default.tmpl` diverging at times. This commit adds build automation to inline the CSS automatically using [juice][1]. The Go template containing the resulting HTML has been moved into its own file to avoid the script that performs the CSS inlining having to parse the `default.tmpl` file to insert it there. Fixes #1939. [1]: https://www.npmjs.com/package/juice Signed-off-by: Brad Ison <bison@xvdf.io> * Update asset/assets_vfsdata.go Signed-off-by: Brad Ison <bison@xvdf.io> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * go.{mod,sum}: update Go dependencies Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * amtool to support http_config to access alertmanager (#2764) * Support http_config for amtool Co-authored-by: Julien Pivotto <roidelapluie@gmail.com> Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: clyang82 <chuyang@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * notify/sns: detect FIFO topic based on the rendered value Since the TopicARN field is a template string, it's safer to check for the ".fifo" suffix in the rendered string. Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * config: delegate Sigv4 validation to the inner type This change also adds unit tests for SNS configuration. Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix unittests Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix comment about active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix another comment about active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Fix typo in documentation Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> Co-authored-by: Simon Pasquier <spasquie@redhat.com> Co-authored-by: clyang82 <chuyang@redhat.com> Co-authored-by: Mac Chaffee <me@macchaffee.com> Co-authored-by: Philip Gough <philip.p.gough@gmail.com> Co-authored-by: nekketsuuu <nekketsuuu@users.noreply.github.com> Co-authored-by: Prashant Balachandran <pnair@redhat.com> Co-authored-by: Simon Pasquier <pasquier.simon@gmail.com> Co-authored-by: Brad Ison <brad.ison@redhat.com> Co-authored-by: Julien Pivotto <roidelapluie@gmail.com>
2022-03-04 14:24:29 +00:00
func TestTimeActiveStage(t *testing.T) {
sydney, err := time.LoadLocation("Australia/Sydney")
Adds: Active time interval (#2779) * add active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix unittests for active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update dispatch/route.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * split the stage for active and mute intervals Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Adds doc for a helper function Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix code after commit suggestions Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Making mute_time_interval and time_intervals can coexist in the config Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * docs: configuration's doc has been updated about time intervals Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update config/config.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * updates configuration readme to improve active time description Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * merge deprecated mute_time_intervals and time_intervals Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update cmd/alertmanager/main.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update cmd/alertmanager/main.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fmt main.go Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix lint error Signed-off-by: clyang82 <chuyang@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Document that matchers are ANDed together Signed-off-by: Mac Chaffee <me@macchaffee.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Remove extra parentheticals Signed-off-by: Mac Chaffee <me@macchaffee.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * config: root route should have empty matchers Unmarshal should validate that the root route does not contain any matchers. Prior to this change, only the deprecated match structures were checked. Signed-off-by: Philip Gough <philip.p.gough@gmail.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * chore: Let git ignore temporary files for ui/app Signed-off-by: nekketsuuu <nekketsuuu@users.noreply.github.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * adding max_alerts parameter to slack webhook config correcting the logic to trucate fields instead of dropping alerts in the slack integration Signed-off-by: Prashant Balachandran <pnair@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * *: bump to Go 1.17 (#2792) * *: bump to Go 1.17 Signed-off-by: Simon Pasquier <spasquie@redhat.com> * *: fix yamllint errors Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Automate CSS-inlining for default HTML email template (#2798) * Automate CSS-inlining for default HTML email template The original HTML email template was added in `template/email.html`. It looks like the CSS was manually inlined. Most likely using the premailer.dialect.ca web form, which is mentioned in the README for the Mailgun transactional-email-templates project. The resulting HTML with inlined CSS was then copied into `template/default.tmpl`. This has resulted in `email.html` and `default.tmpl` diverging at times. This commit adds build automation to inline the CSS automatically using [juice][1]. The Go template containing the resulting HTML has been moved into its own file to avoid the script that performs the CSS inlining having to parse the `default.tmpl` file to insert it there. Fixes #1939. [1]: https://www.npmjs.com/package/juice Signed-off-by: Brad Ison <bison@xvdf.io> * Update asset/assets_vfsdata.go Signed-off-by: Brad Ison <bison@xvdf.io> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * go.{mod,sum}: update Go dependencies Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * amtool to support http_config to access alertmanager (#2764) * Support http_config for amtool Co-authored-by: Julien Pivotto <roidelapluie@gmail.com> Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: clyang82 <chuyang@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * notify/sns: detect FIFO topic based on the rendered value Since the TopicARN field is a template string, it's safer to check for the ".fifo" suffix in the rendered string. Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * config: delegate Sigv4 validation to the inner type This change also adds unit tests for SNS configuration. Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix unittests Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix comment about active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix another comment about active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Fix typo in documentation Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> Co-authored-by: Simon Pasquier <spasquie@redhat.com> Co-authored-by: clyang82 <chuyang@redhat.com> Co-authored-by: Mac Chaffee <me@macchaffee.com> Co-authored-by: Philip Gough <philip.p.gough@gmail.com> Co-authored-by: nekketsuuu <nekketsuuu@users.noreply.github.com> Co-authored-by: Prashant Balachandran <pnair@redhat.com> Co-authored-by: Simon Pasquier <pasquier.simon@gmail.com> Co-authored-by: Brad Ison <brad.ison@redhat.com> Co-authored-by: Julien Pivotto <roidelapluie@gmail.com>
2022-03-04 14:24:29 +00:00
if err != nil {
t.Fatalf("Failed to load location Australia/Sydney: %s", err)
}
weekdays := map[string][]timeinterval.TimeInterval{
"weekdays": {{
Weekdays: []timeinterval.WeekdayRange{{
InclusiveRange: timeinterval.InclusiveRange{
Begin: 1, // Monday
End: 5, // Friday
},
}},
Times: []timeinterval.TimeRange{{
StartMinute: 540, // 09:00
EndMinute: 1020, // 17:00
}},
Location: &timeinterval.Location{Location: sydney},
}},
}
tests := []struct {
name string
intervals map[string][]timeinterval.TimeInterval
now time.Time
alerts []*types.Alert
mutedBy []string
}{{
name: "Should be muted outside working hours",
intervals: weekdays,
now: time.Date(2024, 1, 1, 0, 0, 0, 0, sydney),
alerts: []*types.Alert{{Alert: model.Alert{Labels: model.LabelSet{"foo": "bar"}}}},
mutedBy: []string{"weekdays"},
}, {
name: "Should not be muted during workings hours",
intervals: weekdays,
now: time.Date(2024, 1, 1, 9, 0, 0, 0, sydney),
alerts: []*types.Alert{{Alert: model.Alert{Labels: model.LabelSet{"foo": "bar"}}}},
mutedBy: nil,
}, {
name: "Should be muted during weekends",
intervals: weekdays,
now: time.Date(2024, 1, 6, 10, 0, 0, 0, sydney),
alerts: []*types.Alert{{Alert: model.Alert{Labels: model.LabelSet{"foo": "bar"}}}},
mutedBy: []string{"weekdays"},
}, {
name: "Should be muted at 12pm UTC",
intervals: weekdays,
now: time.Date(2024, 1, 6, 10, 0, 0, 0, time.UTC),
alerts: []*types.Alert{{Alert: model.Alert{Labels: model.LabelSet{"foo": "bar"}}}},
mutedBy: []string{"weekdays"},
}}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
metrics := NewMetrics(r, featurecontrol.NoopFlags{})
intervener := timeinterval.NewIntervener(test.intervals)
st := NewTimeActiveStage(intervener, marker, metrics)
// Get the names of all time intervals for the context.
activeTimeIntervalNames := make([]string, 0, len(test.intervals))
for name := range test.intervals {
activeTimeIntervalNames = append(activeTimeIntervalNames, name)
}
// Sort the names so we can compare mutedBy with test.mutedBy.
sort.Strings(activeTimeIntervalNames)
Adds: Active time interval (#2779) * add active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix unittests for active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update dispatch/route.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * split the stage for active and mute intervals Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Adds doc for a helper function Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix code after commit suggestions Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Making mute_time_interval and time_intervals can coexist in the config Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * docs: configuration's doc has been updated about time intervals Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update config/config.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * updates configuration readme to improve active time description Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * merge deprecated mute_time_intervals and time_intervals Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update cmd/alertmanager/main.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update cmd/alertmanager/main.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fmt main.go Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix lint error Signed-off-by: clyang82 <chuyang@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Document that matchers are ANDed together Signed-off-by: Mac Chaffee <me@macchaffee.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Remove extra parentheticals Signed-off-by: Mac Chaffee <me@macchaffee.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * config: root route should have empty matchers Unmarshal should validate that the root route does not contain any matchers. Prior to this change, only the deprecated match structures were checked. Signed-off-by: Philip Gough <philip.p.gough@gmail.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * chore: Let git ignore temporary files for ui/app Signed-off-by: nekketsuuu <nekketsuuu@users.noreply.github.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * adding max_alerts parameter to slack webhook config correcting the logic to trucate fields instead of dropping alerts in the slack integration Signed-off-by: Prashant Balachandran <pnair@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * *: bump to Go 1.17 (#2792) * *: bump to Go 1.17 Signed-off-by: Simon Pasquier <spasquie@redhat.com> * *: fix yamllint errors Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Automate CSS-inlining for default HTML email template (#2798) * Automate CSS-inlining for default HTML email template The original HTML email template was added in `template/email.html`. It looks like the CSS was manually inlined. Most likely using the premailer.dialect.ca web form, which is mentioned in the README for the Mailgun transactional-email-templates project. The resulting HTML with inlined CSS was then copied into `template/default.tmpl`. This has resulted in `email.html` and `default.tmpl` diverging at times. This commit adds build automation to inline the CSS automatically using [juice][1]. The Go template containing the resulting HTML has been moved into its own file to avoid the script that performs the CSS inlining having to parse the `default.tmpl` file to insert it there. Fixes #1939. [1]: https://www.npmjs.com/package/juice Signed-off-by: Brad Ison <bison@xvdf.io> * Update asset/assets_vfsdata.go Signed-off-by: Brad Ison <bison@xvdf.io> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * go.{mod,sum}: update Go dependencies Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * amtool to support http_config to access alertmanager (#2764) * Support http_config for amtool Co-authored-by: Julien Pivotto <roidelapluie@gmail.com> Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: clyang82 <chuyang@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * notify/sns: detect FIFO topic based on the rendered value Since the TopicARN field is a template string, it's safer to check for the ".fifo" suffix in the rendered string. Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * config: delegate Sigv4 validation to the inner type This change also adds unit tests for SNS configuration. Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix unittests Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix comment about active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix another comment about active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Fix typo in documentation Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> Co-authored-by: Simon Pasquier <spasquie@redhat.com> Co-authored-by: clyang82 <chuyang@redhat.com> Co-authored-by: Mac Chaffee <me@macchaffee.com> Co-authored-by: Philip Gough <philip.p.gough@gmail.com> Co-authored-by: nekketsuuu <nekketsuuu@users.noreply.github.com> Co-authored-by: Prashant Balachandran <pnair@redhat.com> Co-authored-by: Simon Pasquier <pasquier.simon@gmail.com> Co-authored-by: Brad Ison <brad.ison@redhat.com> Co-authored-by: Julien Pivotto <roidelapluie@gmail.com>
2022-03-04 14:24:29 +00:00
ctx := context.Background()
ctx = WithNow(ctx, test.now)
ctx = WithGroupKey(ctx, "group1")
ctx = WithActiveTimeIntervals(ctx, activeTimeIntervalNames)
ctx = WithMuteTimeIntervals(ctx, nil)
ctx = WithRouteID(ctx, "route1")
_, active, err := st.Exec(ctx, promslog.NewNopLogger(), test.alerts...)
require.NoError(t, err)
if len(test.mutedBy) == 0 {
// All alerts should be active.
require.Equal(t, len(test.alerts), len(active))
// The group should not be marked.
mutedBy, isMuted := marker.Muted("route1", "group1")
require.False(t, isMuted)
require.Empty(t, mutedBy)
// The metric for total suppressed notifications should not
// have been incremented, which means it will not be collected.
require.NoError(t, prom_testutil.GatherAndCompare(r, strings.NewReader(`
# HELP alertmanager_marked_alerts How many alerts by state are currently marked in the Alertmanager regardless of their expiry.
# TYPE alertmanager_marked_alerts gauge
alertmanager_marked_alerts{state="active"} 0
alertmanager_marked_alerts{state="suppressed"} 0
alertmanager_marked_alerts{state="unprocessed"} 0
`)))
} else {
// All alerts should be muted.
require.Empty(t, active)
// The group should be marked as muted.
mutedBy, isMuted := marker.Muted("route1", "group1")
require.True(t, isMuted)
require.Equal(t, test.mutedBy, mutedBy)
// Gets the metric for total suppressed notifications.
require.NoError(t, prom_testutil.GatherAndCompare(r, strings.NewReader(fmt.Sprintf(`
# HELP alertmanager_marked_alerts How many alerts by state are currently marked in the Alertmanager regardless of their expiry.
# TYPE alertmanager_marked_alerts gauge
alertmanager_marked_alerts{state="active"} 0
alertmanager_marked_alerts{state="suppressed"} 0
alertmanager_marked_alerts{state="unprocessed"} 0
# HELP alertmanager_notifications_suppressed_total The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.
# TYPE alertmanager_notifications_suppressed_total counter
alertmanager_notifications_suppressed_total{reason="active_time_interval"} %d
`, len(test.alerts)))))
}
})
feat: add counter to track alerts dropped outside of time_intervals (#3565) * feat: add counter to track alerts dropped outside of time_intervals Addresses: #3512 This adds a new counter metric `alertmanager_alerts_supressed_total` that is incremented by `len(alerts)` when an alert is suppressed for being outside of a time_interval, ie inside of a mute_time_intervals or outside of an active_time_intervals. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: add time interval suppression metric checks for notify Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * test: fix failure message log values in notifier Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * ref: address PR feedback for #3565 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: track suppressed notifications metric for inhibit/silence Based on PR feedback: https://github.com/prometheus/alertmanager/pull/3565/files#r1393068026 Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * fix: broken notifier tests - fixed metric count check to properly check the diff between input/output notifications from the suppression to compare to suppression metric, was previously inverted to compare to how many notifications it suppressed. - stopped using `Reset()` to compare collection counts between the multiple stages that are executed in `TestMuteStageWithSilences()`. the intent was to compare a clean metric collection after each stage execution, but the final stage where all silences are lifted results in no metric being created in the test, causing `prom_testutil.ToFloat64()` to panic. changed to separate vars to check counts between each stage, with care to consider prior counts. Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> * rename metric and add constants Signed-off-by: gotjosh <josue.abreu@gmail.com> --------- Signed-off-by: TJ Hoplock <t.hoplock@gmail.com> Signed-off-by: gotjosh <josue.abreu@gmail.com> Co-authored-by: gotjosh <josue.abreu@gmail.com>
2024-02-13 11:17:24 +00:00
}
Adds: Active time interval (#2779) * add active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix unittests for active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update dispatch/route.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * split the stage for active and mute intervals Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Adds doc for a helper function Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update notify/notify.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix code after commit suggestions Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Making mute_time_interval and time_intervals can coexist in the config Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * docs: configuration's doc has been updated about time intervals Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update config/config.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * updates configuration readme to improve active time description Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * merge deprecated mute_time_intervals and time_intervals Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update cmd/alertmanager/main.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update cmd/alertmanager/main.go Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fmt main.go Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix lint error Signed-off-by: clyang82 <chuyang@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Document that matchers are ANDed together Signed-off-by: Mac Chaffee <me@macchaffee.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Remove extra parentheticals Signed-off-by: Mac Chaffee <me@macchaffee.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * config: root route should have empty matchers Unmarshal should validate that the root route does not contain any matchers. Prior to this change, only the deprecated match structures were checked. Signed-off-by: Philip Gough <philip.p.gough@gmail.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * chore: Let git ignore temporary files for ui/app Signed-off-by: nekketsuuu <nekketsuuu@users.noreply.github.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * adding max_alerts parameter to slack webhook config correcting the logic to trucate fields instead of dropping alerts in the slack integration Signed-off-by: Prashant Balachandran <pnair@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * *: bump to Go 1.17 (#2792) * *: bump to Go 1.17 Signed-off-by: Simon Pasquier <spasquie@redhat.com> * *: fix yamllint errors Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Automate CSS-inlining for default HTML email template (#2798) * Automate CSS-inlining for default HTML email template The original HTML email template was added in `template/email.html`. It looks like the CSS was manually inlined. Most likely using the premailer.dialect.ca web form, which is mentioned in the README for the Mailgun transactional-email-templates project. The resulting HTML with inlined CSS was then copied into `template/default.tmpl`. This has resulted in `email.html` and `default.tmpl` diverging at times. This commit adds build automation to inline the CSS automatically using [juice][1]. The Go template containing the resulting HTML has been moved into its own file to avoid the script that performs the CSS inlining having to parse the `default.tmpl` file to insert it there. Fixes #1939. [1]: https://www.npmjs.com/package/juice Signed-off-by: Brad Ison <bison@xvdf.io> * Update asset/assets_vfsdata.go Signed-off-by: Brad Ison <bison@xvdf.io> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * go.{mod,sum}: update Go dependencies Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * amtool to support http_config to access alertmanager (#2764) * Support http_config for amtool Co-authored-by: Julien Pivotto <roidelapluie@gmail.com> Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: clyang82 <chuyang@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * notify/sns: detect FIFO topic based on the rendered value Since the TopicARN field is a template string, it's safer to check for the ".fifo" suffix in the rendered string. Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * config: delegate Sigv4 validation to the inner type This change also adds unit tests for SNS configuration. Signed-off-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix unittests Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix comment about active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * fix another comment about active time interval Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Fix typo in documentation Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> * Update docs/configuration.md Co-authored-by: Simon Pasquier <spasquie@redhat.com> Signed-off-by: Sinuhe Tellez <dubyte@gmail.com> Co-authored-by: Simon Pasquier <spasquie@redhat.com> Co-authored-by: clyang82 <chuyang@redhat.com> Co-authored-by: Mac Chaffee <me@macchaffee.com> Co-authored-by: Philip Gough <philip.p.gough@gmail.com> Co-authored-by: nekketsuuu <nekketsuuu@users.noreply.github.com> Co-authored-by: Prashant Balachandran <pnair@redhat.com> Co-authored-by: Simon Pasquier <pasquier.simon@gmail.com> Co-authored-by: Brad Ison <brad.ison@redhat.com> Co-authored-by: Julien Pivotto <roidelapluie@gmail.com>
2022-03-04 14:24:29 +00:00
}
func BenchmarkHashAlert(b *testing.B) {
alert := &types.Alert{
Alert: model.Alert{
Labels: model.LabelSet{"foo": "the_first_value", "bar": "the_second_value", "another": "value"},
},
}
for i := 0; i < b.N; i++ {
hashAlert(alert)
}
}