alertmanager/nflog/nflog_test.go

384 lines
11 KiB
Go
Raw Permalink Normal View History

// Copyright 2016 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nflog
2016-08-12 14:48:09 +00:00
import (
2018-02-07 15:36:47 +00:00
"bytes"
"io"
2016-08-12 14:48:09 +00:00
"os"
"path/filepath"
"sync"
2016-08-12 14:48:09 +00:00
"testing"
"time"
pb "github.com/prometheus/alertmanager/nflog/nflogpb"
"github.com/benbjohnson/clock"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
2016-08-12 14:48:09 +00:00
"github.com/stretchr/testify/require"
"go.uber.org/atomic"
2016-08-12 14:48:09 +00:00
)
2018-02-07 15:36:47 +00:00
func TestLogGC(t *testing.T) {
mockClock := clock.NewMock()
now := mockClock.Now()
2016-08-12 14:48:09 +00:00
// We only care about key names and expiration timestamps.
newEntry := func(ts time.Time) *pb.MeshEntry {
return &pb.MeshEntry{
ExpiresAt: ts,
2016-08-12 14:48:09 +00:00
}
}
2018-02-07 15:36:47 +00:00
l := &Log{
st: state{
2016-08-12 14:48:09 +00:00
"a1": newEntry(now),
"a2": newEntry(now.Add(time.Second)),
"a3": newEntry(now.Add(-time.Second)),
},
clock: mockClock,
2016-11-21 14:22:35 +00:00
metrics: newMetrics(nil),
2016-08-12 14:48:09 +00:00
}
n, err := l.GC()
require.NoError(t, err, "unexpected error in garbage collection")
require.Equal(t, 2, n, "unexpected number of removed entries")
2018-02-07 15:36:47 +00:00
expected := state{
2016-08-12 14:48:09 +00:00
"a2": newEntry(now.Add(time.Second)),
}
require.Equal(t, l.st, expected, "unexpected state after garbage collection")
2016-08-12 14:48:09 +00:00
}
2018-02-07 15:36:47 +00:00
func TestLogSnapshot(t *testing.T) {
2016-08-12 14:48:09 +00:00
// Check whether storing and loading the snapshot is symmetric.
mockClock := clock.NewMock()
now := mockClock.Now().UTC()
2016-08-12 14:48:09 +00:00
cases := []struct {
entries []*pb.MeshEntry
}{
{
entries: []*pb.MeshEntry{
{
Entry: &pb.Entry{
GroupKey: []byte("d8e8fca2dc0f896fd7cb4cb0031ba249"),
Receiver: &pb.Receiver{GroupName: "abc", Integration: "test1", Idx: 1},
GroupHash: []byte("126a8a51b9d1bbd07fddc65819a542c3"),
Resolved: false,
Timestamp: now,
2016-08-12 14:48:09 +00:00
},
ExpiresAt: now,
2016-08-12 14:48:09 +00:00
}, {
Entry: &pb.Entry{
GroupKey: []byte("d8e8fca2dc0f8abce7cb4cb0031ba249"),
Receiver: &pb.Receiver{GroupName: "def", Integration: "test2", Idx: 29},
GroupHash: []byte("122c2331b9d1bbd07fddc65819a542c3"),
Resolved: true,
Timestamp: now,
2016-08-12 14:48:09 +00:00
},
ExpiresAt: now,
2016-08-12 14:48:09 +00:00
}, {
Entry: &pb.Entry{
GroupKey: []byte("aaaaaca2dc0f896fd7cb4cb0031ba249"),
Receiver: &pb.Receiver{GroupName: "ghi", Integration: "test3", Idx: 0},
GroupHash: []byte("126a8a51b9d1bbd07fddc6e3e3e542c3"),
Resolved: false,
Timestamp: now,
2016-08-12 14:48:09 +00:00
},
ExpiresAt: now,
2016-08-12 14:48:09 +00:00
},
},
},
}
for _, c := range cases {
f, err := os.CreateTemp("", "snapshot")
2016-08-12 14:48:09 +00:00
require.NoError(t, err, "creating temp file failed")
2018-02-07 15:36:47 +00:00
l1 := &Log{
st: state{},
2016-11-21 14:22:35 +00:00
metrics: newMetrics(nil),
}
2016-08-12 14:48:09 +00:00
// Setup internal state manually.
for _, e := range c.entries {
l1.st[stateKey(string(e.Entry.GroupKey), e.Entry.Receiver)] = e
2016-08-12 14:48:09 +00:00
}
_, err = l1.Snapshot(f)
require.NoError(t, err, "creating snapshot failed")
require.NoError(t, f.Close(), "closing snapshot file failed")
f, err = os.Open(f.Name())
require.NoError(t, err, "opening snapshot file failed")
// Check again against new nlog instance.
2018-02-07 15:36:47 +00:00
l2 := &Log{}
2016-08-12 14:48:09 +00:00
err = l2.loadSnapshot(f)
require.NoError(t, err, "error loading snapshot")
require.Equal(t, l1.st, l2.st, "state after loading snapshot did not match snapshotted state")
require.NoError(t, f.Close(), "closing snapshot file failed")
}
}
func TestWithMaintenance_SupportsCustomCallback(t *testing.T) {
f, err := os.CreateTemp("", "snapshot")
require.NoError(t, err, "creating temp file failed")
stopc := make(chan struct{})
reg := prometheus.NewPedanticRegistry()
opts := Options{
Metrics: reg,
SnapshotFile: f.Name(),
}
l, err := New(opts)
clock := clock.NewMock()
l.clock = clock
require.NoError(t, err)
var calls atomic.Int32
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
l.Maintenance(100*time.Millisecond, f.Name(), stopc, func() (int64, error) {
calls.Add(1)
return 0, nil
})
}()
gosched()
// Before the first tick, no maintenance executed.
clock.Add(99 * time.Millisecond)
require.EqualValues(t, 0, calls.Load())
// Tick once.
clock.Add(1 * time.Millisecond)
require.EqualValues(t, 1, calls.Load())
// Stop the maintenance loop. We should get exactly one more execution of the maintenance func.
close(stopc)
wg.Wait()
require.EqualValues(t, 2, calls.Load())
// Check the maintenance metrics.
require.NoError(t, testutil.GatherAndCompare(reg, bytes.NewBufferString(`
# HELP alertmanager_nflog_maintenance_errors_total How many maintenances were executed for the notification log that failed.
# TYPE alertmanager_nflog_maintenance_errors_total counter
alertmanager_nflog_maintenance_errors_total 0
# HELP alertmanager_nflog_maintenance_total How many maintenances were executed for the notification log.
# TYPE alertmanager_nflog_maintenance_total counter
alertmanager_nflog_maintenance_total 2
`), "alertmanager_nflog_maintenance_total", "alertmanager_nflog_maintenance_errors_total"))
}
2016-08-12 14:48:09 +00:00
func TestReplaceFile(t *testing.T) {
dir, err := os.MkdirTemp("", "replace_file")
2016-08-12 14:48:09 +00:00
require.NoError(t, err, "creating temp dir failed")
origFilename := filepath.Join(dir, "testfile")
of, err := os.Create(origFilename)
require.NoError(t, err, "creating file failed")
nf, err := openReplace(origFilename)
require.NoError(t, err, "opening replacement file failed")
_, err = nf.Write([]byte("test"))
require.NoError(t, err, "writing replace file failed")
require.NotEqual(t, nf.Name(), of.Name(), "replacement file must have different name while editing")
require.NoError(t, nf.Close(), "closing replacement file failed")
require.NoError(t, of.Close(), "closing original file failed")
ofr, err := os.Open(origFilename)
require.NoError(t, err, "opening original file failed")
defer ofr.Close()
res, err := io.ReadAll(ofr)
2016-08-12 14:48:09 +00:00
require.NoError(t, err, "reading original file failed")
require.Equal(t, "test", string(res), "unexpected file contents")
}
2018-02-07 15:36:47 +00:00
func TestStateMerge(t *testing.T) {
mockClock := clock.NewMock()
now := mockClock.Now()
2016-08-12 14:48:09 +00:00
// We only care about key names and timestamps for the
// merging logic.
newEntry := func(name string, ts, exp time.Time) *pb.MeshEntry {
2016-08-12 14:48:09 +00:00
return &pb.MeshEntry{
2018-02-07 15:36:47 +00:00
Entry: &pb.Entry{
Timestamp: ts,
GroupKey: []byte("key"),
Receiver: &pb.Receiver{
GroupName: name,
Idx: 1,
Integration: "integr",
},
},
ExpiresAt: exp,
2016-08-12 14:48:09 +00:00
}
}
exp := now.Add(time.Minute)
2016-08-12 14:48:09 +00:00
cases := []struct {
2018-02-07 15:36:47 +00:00
a, b state
final state
2016-08-12 14:48:09 +00:00
}{
{
2018-02-07 15:36:47 +00:00
a: state{
"key:a1/integr/1": newEntry("a1", now, exp),
"key:a2/integr/1": newEntry("a2", now, exp),
"key:a3/integr/1": newEntry("a3", now, exp),
2016-08-12 14:48:09 +00:00
},
2018-02-07 15:36:47 +00:00
b: state{
"key:b1/integr/1": newEntry("b1", now, exp), // new key, should be added
"key:b2/integr/1": newEntry("b2", now.Add(-time.Minute), now.Add(-time.Millisecond)), // new key, expired, should not be added
"key:a2/integr/1": newEntry("a2", now.Add(-time.Minute), exp), // older timestamp, should be dropped
"key:a3/integr/1": newEntry("a3", now.Add(time.Minute), exp), // newer timestamp, should overwrite
2016-08-12 14:48:09 +00:00
},
2018-02-07 15:36:47 +00:00
final: state{
"key:a1/integr/1": newEntry("a1", now, exp),
"key:a2/integr/1": newEntry("a2", now, exp),
"key:a3/integr/1": newEntry("a3", now.Add(time.Minute), exp),
"key:b1/integr/1": newEntry("b1", now, exp),
2016-08-12 14:48:09 +00:00
},
},
}
for _, c := range cases {
ca, cb := c.a.clone(), c.b.clone()
2018-02-07 15:36:47 +00:00
res := c.a.clone()
for _, e := range cb {
res.merge(e, now)
2018-02-07 15:36:47 +00:00
}
2016-08-12 14:48:09 +00:00
require.Equal(t, c.final, res, "Merge result should match expectation")
require.Equal(t, c.b, cb, "Merged state should remain unmodified")
require.NotEqual(t, c.final, ca, "Merge should not change original state")
2016-08-12 14:48:09 +00:00
}
}
2018-02-07 15:36:47 +00:00
func TestStateDataCoding(t *testing.T) {
2016-08-12 14:48:09 +00:00
// Check whether encoding and decoding the data is symmetric.
mockClock := clock.NewMock()
now := mockClock.Now().UTC()
2016-08-12 14:48:09 +00:00
cases := []struct {
entries []*pb.MeshEntry
}{
{
entries: []*pb.MeshEntry{
{
Entry: &pb.Entry{
GroupKey: []byte("d8e8fca2dc0f896fd7cb4cb0031ba249"),
Receiver: &pb.Receiver{GroupName: "abc", Integration: "test1", Idx: 1},
GroupHash: []byte("126a8a51b9d1bbd07fddc65819a542c3"),
Resolved: false,
Timestamp: now,
2016-08-12 14:48:09 +00:00
},
ExpiresAt: now,
2016-08-12 14:48:09 +00:00
}, {
Entry: &pb.Entry{
GroupKey: []byte("d8e8fca2dc0f8abce7cb4cb0031ba249"),
Receiver: &pb.Receiver{GroupName: "def", Integration: "test2", Idx: 29},
GroupHash: []byte("122c2331b9d1bbd07fddc65819a542c3"),
Resolved: true,
Timestamp: now,
2016-08-12 14:48:09 +00:00
},
ExpiresAt: now,
2016-08-12 14:48:09 +00:00
}, {
Entry: &pb.Entry{
GroupKey: []byte("aaaaaca2dc0f896fd7cb4cb0031ba249"),
Receiver: &pb.Receiver{GroupName: "ghi", Integration: "test3", Idx: 0},
GroupHash: []byte("126a8a51b9d1bbd07fddc6e3e3e542c3"),
Resolved: false,
Timestamp: now,
2016-08-12 14:48:09 +00:00
},
ExpiresAt: now,
2016-08-12 14:48:09 +00:00
},
},
},
}
for _, c := range cases {
// Create gossip data from input.
2018-02-07 15:36:47 +00:00
in := state{}
2016-08-12 14:48:09 +00:00
for _, e := range c.entries {
in[stateKey(string(e.Entry.GroupKey), e.Entry.Receiver)] = e
2016-08-12 14:48:09 +00:00
}
2018-02-07 15:36:47 +00:00
msg, err := in.MarshalBinary()
require.NoError(t, err)
2016-08-12 14:48:09 +00:00
2018-02-07 15:36:47 +00:00
out, err := decodeState(bytes.NewReader(msg))
2016-08-12 14:48:09 +00:00
require.NoError(t, err, "decoding message failed")
require.Equal(t, in, out, "decoded data doesn't match encoded data")
}
}
2017-11-01 19:38:00 +00:00
func TestQuery(t *testing.T) {
opts := Options{Retention: time.Second}
nl, err := New(opts)
2017-11-01 19:38:00 +00:00
if err != nil {
require.NoError(t, err, "constructing nflog failed")
}
recv := new(pb.Receiver)
// no key param
_, err = nl.Query(QGroupKey("key"))
require.EqualError(t, err, "no query parameters specified")
// no recv param
_, err = nl.Query(QReceiver(recv))
require.EqualError(t, err, "no query parameters specified")
// no entry
_, err = nl.Query(QGroupKey("nonexistentkey"), QReceiver(recv))
2017-11-01 19:38:00 +00:00
require.EqualError(t, err, "not found")
// existing entry
firingAlerts := []uint64{1, 2, 3}
resolvedAlerts := []uint64{4, 5}
err = nl.Log(recv, "key", firingAlerts, resolvedAlerts, 0)
2017-11-01 19:38:00 +00:00
require.NoError(t, err, "logging notification failed")
entries, err := nl.Query(QGroupKey("key"), QReceiver(recv))
require.NoError(t, err, "querying nflog failed")
2017-11-01 19:38:00 +00:00
entry := entries[0]
require.EqualValues(t, firingAlerts, entry.FiringAlerts)
require.EqualValues(t, resolvedAlerts, entry.ResolvedAlerts)
}
func TestStateDecodingError(t *testing.T) {
// Check whether decoding copes with erroneous data.
s := state{"": &pb.MeshEntry{}}
msg, err := s.MarshalBinary()
require.NoError(t, err)
_, err = decodeState(bytes.NewReader(msg))
require.Equal(t, ErrInvalidState, err)
}
// runtime.Gosched() does not "suspend" the current goroutine so there's no guarantee that the main goroutine won't
// be able to continue. For more see https://pkg.go.dev/runtime#Gosched.
func gosched() {
time.Sleep(1 * time.Millisecond)
}