alertmanager/test/acceptance/send_test.go
2015-11-10 14:08:20 +01:00

264 lines
7.5 KiB
Go

// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package test
import (
"fmt"
"testing"
"time"
. "github.com/prometheus/alertmanager/test"
)
// This file contains acceptance tests around the basic sending logic
// for notifications, which includes batching and ensuring that each
// notification is eventually sent at least once and ideally exactly
// once.
func TestMergeAlerts(t *testing.T) {
t.Parallel()
conf := `
route:
receiver: "default"
group_by: []
group_wait: 1s
group_interval: 1s
repeat_interval: 1s
receivers:
- name: "default"
webhook_configs:
- url: 'http://%s'
`
at := NewAcceptanceTest(t, &AcceptanceOpts{
Tolerance: 150 * time.Millisecond,
})
co := at.Collector("webhook")
wh := NewWebhook(co)
am := at.Alertmanager(fmt.Sprintf(conf, wh.Address()))
// Refresh an alert several times. The starting time must remain at the earliest
// point in time.
am.Push(At(1), Alert("alertname", "test").Active(1.1))
// Another Prometheus server might be sending later but with an earlier start time.
am.Push(At(1.2), Alert("alertname", "test").Active(1))
co.Want(Between(2, 2.5), Alert("alertname", "test").Active(1))
am.Push(At(2.1), Alert("alertname", "test").Annotate("ann", "v1").Active(2))
co.Want(Between(3, 3.5), Alert("alertname", "test").Annotate("ann", "v1").Active(1))
// Annotations are always overwritten by the alert that arrived most recently.
am.Push(At(3.6), Alert("alertname", "test").Annotate("ann", "v2").Active(1.5))
co.Want(Between(4, 4.5), Alert("alertname", "test").Annotate("ann", "v2").Active(1))
// If an alert is marked resolved twice, the latest point in time must be
// set as the eventual resolve time.
am.Push(At(4.6), Alert("alertname", "test").Annotate("ann", "v2").Active(3, 4.5))
am.Push(At(4.8), Alert("alertname", "test").Annotate("ann", "v3").Active(2.9, 4.8))
am.Push(At(4.8), Alert("alertname", "test").Annotate("ann", "v3").Active(2.9, 4.1))
co.Want(Between(5, 5.5), Alert("alertname", "test").Annotate("ann", "v3").Active(1, 4.8))
// Reactivate an alert after a previous occurrence has been resolved.
// No overlap, no merge must occur.
am.Push(At(5.3), Alert("alertname", "test"))
co.Want(Between(6, 6.5), Alert("alertname", "test").Active(5.3))
// Test against a bug which ocurrec after a restart. The previous occurrence of
// the alert was sent rather than the most recent one.
at.Do(At(6.7), func() {
am.Terminate()
am.Start()
})
// On restart the alert is flushed right away as the group_wait has already passed.
// However, it must be caught in the deduplication stage.
// The next attempt will be 1s later and won't be filtered in deduping.
co.Want(Between(7.7, 8), Alert("alertname", "test").Active(5.3))
at.Run()
}
func TestRepeat(t *testing.T) {
t.Parallel()
conf := `
route:
receiver: "default"
group_by: []
group_wait: 1s
group_interval: 1s
repeat_interval: 1s
receivers:
- name: "default"
webhook_configs:
- url: 'http://%s'
`
// Create a new acceptance test that instantiates new Alertmanagers
// with the given configuration and verifies times with the given
// tollerance.
at := NewAcceptanceTest(t, &AcceptanceOpts{
Tolerance: 150 * time.Millisecond,
})
// Create a collector to which alerts can be written and verified
// against a set of expected alert notifications.
co := at.Collector("webhook")
// Run something that satisfies the webhook interface to which the
// Alertmanager pushes as defined by its configuration.
wh := NewWebhook(co)
// Create a new Alertmanager process listening to a random port
am := at.Alertmanager(fmt.Sprintf(conf, wh.Address()))
// Declare pushes to be made to the Alertmanager at the given time.
// Times are provided in fractions of seconds.
am.Push(At(1), Alert("alertname", "test").Active(1))
at.Do(At(1.2), func() {
am.Terminate()
am.Start()
})
am.Push(At(3.5), Alert("alertname", "test").Active(1, 3))
// Declare which alerts are expected to arrive at the collector within
// the defined time intervals.
co.Want(Between(2, 2.5), Alert("alertname", "test").Active(1))
co.Want(Between(3, 3.5), Alert("alertname", "test").Active(1))
co.Want(Between(4, 4.5), Alert("alertname", "test").Active(1, 3))
// Start the flow as defined above and run the checks afterwards.
at.Run()
}
func TestRetry(t *testing.T) {
t.Parallel()
// We create a notification config that fans out into two different
// webhooks.
// The succeeding one must still only receive the first successful
// notifications. Sending to the succeeding one must eventually succeed.
conf := `
route:
receiver: "default"
group_by: []
group_wait: 1s
group_interval: 1s
repeat_interval: 3s
receivers:
- name: "default"
webhook_configs:
- url: 'http://%s'
- url: 'http://%s'
`
at := NewAcceptanceTest(t, &AcceptanceOpts{
Tolerance: 150 * time.Millisecond,
})
co1 := at.Collector("webhook")
wh1 := NewWebhook(co1)
co2 := at.Collector("webhook_failing")
wh2 := NewWebhook(co2)
wh2.Func = func(ts float64) bool {
// Fail the first two interval periods but eventually
// succeed in the third interval after a few failed attempts.
return ts < 4.5
}
am := at.Alertmanager(fmt.Sprintf(conf, wh1.Address(), wh2.Address()))
am.Push(At(1), Alert("alertname", "test1"))
co1.Want(Between(2, 2.5), Alert("alertname", "test1").Active(1))
co1.Want(Between(5, 5.5), Alert("alertname", "test1").Active(1))
co2.Want(Between(4.5, 5), Alert("alertname", "test1").Active(1))
}
func TestBatching(t *testing.T) {
t.Parallel()
conf := `
route:
receiver: "default"
group_by: []
group_wait: 1s
group_interval: 1s
repeat_interval: 5s
receivers:
- name: "default"
webhook_configs:
- url: 'http://%s'
`
at := NewAcceptanceTest(t, &AcceptanceOpts{
Tolerance: 150 * time.Millisecond,
})
co := at.Collector("webhook")
wh := NewWebhook(co)
am := at.Alertmanager(fmt.Sprintf(conf, wh.Address()))
am.Push(At(1.1), Alert("alertname", "test1").Active(1))
am.Push(At(1.9), Alert("alertname", "test5").Active(1))
am.Push(At(2.3),
Alert("alertname", "test2").Active(1.5),
Alert("alertname", "test3").Active(1.5),
Alert("alertname", "test4").Active(1.6),
)
co.Want(Between(2.0, 2.5),
Alert("alertname", "test1").Active(1),
Alert("alertname", "test5").Active(1),
)
// Only expect the new ones with the next group interval.
co.Want(Between(3, 3.5),
Alert("alertname", "test2").Active(1.5),
Alert("alertname", "test3").Active(1.5),
Alert("alertname", "test4").Active(1.6),
)
// While no changes happen expect no additional notifications
// until the 5s repeat interval has ended.
// The last three notifications should sent with the first two even
// though their repeat interval has not yet passed. This way fragmented
// batches are unified and notification noise reduced.
co.Want(Between(7, 7.5),
Alert("alertname", "test1").Active(1),
Alert("alertname", "test5").Active(1),
Alert("alertname", "test2").Active(1.5),
Alert("alertname", "test3").Active(1.5),
Alert("alertname", "test4").Active(1.6),
)
at.Run()
}