alertmanager/test/acceptance/send_test.go

264 lines
7.5 KiB
Go
Raw Normal View History

2015-10-11 15:24:49 +00:00
// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package test
import (
"fmt"
"testing"
"time"
. "github.com/prometheus/alertmanager/test"
)
2015-10-12 05:35:22 +00:00
// This file contains acceptance tests around the basic sending logic
// for notifications, which includes batching and ensuring that each
// notification is eventually sent at least once and ideally exactly
// once.
2015-10-15 10:46:51 +00:00
func TestMergeAlerts(t *testing.T) {
t.Parallel()
conf := `
2015-10-19 14:52:54 +00:00
route:
2015-11-10 13:08:20 +00:00
receiver: "default"
group_by: []
2015-10-15 10:46:51 +00:00
group_wait: 1s
group_interval: 1s
repeat_interval: 1s
2015-11-10 13:08:20 +00:00
receivers:
2015-10-15 10:46:51 +00:00
- name: "default"
webhook_configs:
- url: 'http://%s'
`
at := NewAcceptanceTest(t, &AcceptanceOpts{
Tolerance: 150 * time.Millisecond,
})
co := at.Collector("webhook")
wh := NewWebhook(co)
am := at.Alertmanager(fmt.Sprintf(conf, wh.Address()))
// Refresh an alert several times. The starting time must remain at the earliest
// point in time.
am.Push(At(1), Alert("alertname", "test").Active(1.1))
// Another Prometheus server might be sending later but with an earlier start time.
am.Push(At(1.2), Alert("alertname", "test").Active(1))
2015-10-15 10:46:51 +00:00
co.Want(Between(2, 2.5), Alert("alertname", "test").Active(1))
am.Push(At(2.1), Alert("alertname", "test").Annotate("ann", "v1").Active(2))
co.Want(Between(3, 3.5), Alert("alertname", "test").Annotate("ann", "v1").Active(1))
// Annotations are always overwritten by the alert that arrived most recently.
2015-10-15 10:46:51 +00:00
am.Push(At(3.6), Alert("alertname", "test").Annotate("ann", "v2").Active(1.5))
co.Want(Between(4, 4.5), Alert("alertname", "test").Annotate("ann", "v2").Active(1))
// If an alert is marked resolved twice, the latest point in time must be
// set as the eventual resolve time.
am.Push(At(4.6), Alert("alertname", "test").Annotate("ann", "v2").Active(3, 4.5))
am.Push(At(4.8), Alert("alertname", "test").Annotate("ann", "v3").Active(2.9, 4.8))
am.Push(At(4.8), Alert("alertname", "test").Annotate("ann", "v3").Active(2.9, 4.1))
co.Want(Between(5, 5.5), Alert("alertname", "test").Annotate("ann", "v3").Active(1, 4.8))
// Reactivate an alert after a previous occurrence has been resolved.
// No overlap, no merge must occur.
am.Push(At(5.3), Alert("alertname", "test"))
co.Want(Between(6, 6.5), Alert("alertname", "test").Active(5.3))
// Test against a bug which ocurrec after a restart. The previous occurrence of
// the alert was sent rather than the most recent one.
at.Do(At(6.7), func() {
am.Terminate()
am.Start()
})
// On restart the alert is flushed right away as the group_wait has already passed.
// However, it must be caught in the deduplication stage.
// The next attempt will be 1s later and won't be filtered in deduping.
co.Want(Between(7.7, 8), Alert("alertname", "test").Active(5.3))
2015-10-15 10:46:51 +00:00
at.Run()
}
2015-10-12 05:35:22 +00:00
func TestRepeat(t *testing.T) {
t.Parallel()
conf := `
2015-10-19 14:52:54 +00:00
route:
2015-11-10 13:08:20 +00:00
receiver: "default"
group_by: []
group_wait: 1s
group_interval: 1s
repeat_interval: 1s
2015-11-10 13:08:20 +00:00
receivers:
- name: "default"
webhook_configs:
- url: 'http://%s'
`
// Create a new acceptance test that instantiates new Alertmanagers
// with the given configuration and verifies times with the given
// tollerance.
at := NewAcceptanceTest(t, &AcceptanceOpts{
Tolerance: 150 * time.Millisecond,
})
// Create a collector to which alerts can be written and verified
// against a set of expected alert notifications.
co := at.Collector("webhook")
// Run something that satisfies the webhook interface to which the
// Alertmanager pushes as defined by its configuration.
wh := NewWebhook(co)
// Create a new Alertmanager process listening to a random port
am := at.Alertmanager(fmt.Sprintf(conf, wh.Address()))
// Declare pushes to be made to the Alertmanager at the given time.
// Times are provided in fractions of seconds.
am.Push(At(1), Alert("alertname", "test").Active(1))
2015-10-07 14:18:55 +00:00
at.Do(At(1.2), func() {
am.Terminate()
am.Start()
})
am.Push(At(3.5), Alert("alertname", "test").Active(1, 3))
// Declare which alerts are expected to arrive at the collector within
// the defined time intervals.
co.Want(Between(2, 2.5), Alert("alertname", "test").Active(1))
co.Want(Between(3, 3.5), Alert("alertname", "test").Active(1))
2015-09-30 15:35:33 +00:00
co.Want(Between(4, 4.5), Alert("alertname", "test").Active(1, 3))
// Start the flow as defined above and run the checks afterwards.
at.Run()
}
2015-10-12 05:35:22 +00:00
func TestRetry(t *testing.T) {
t.Parallel()
2015-10-12 05:35:22 +00:00
// We create a notification config that fans out into two different
// webhooks.
// The succeeding one must still only receive the first successful
// notifications. Sending to the succeeding one must eventually succeed.
conf := `
2015-10-19 14:52:54 +00:00
route:
2015-11-10 13:08:20 +00:00
receiver: "default"
group_by: []
group_wait: 1s
group_interval: 1s
2015-10-12 05:35:22 +00:00
repeat_interval: 3s
2015-10-01 19:28:18 +00:00
2015-11-10 13:08:20 +00:00
receivers:
2015-10-01 19:28:18 +00:00
- name: "default"
webhook_configs:
- url: 'http://%s'
2015-10-12 05:35:22 +00:00
- url: 'http://%s'
2015-10-01 19:28:18 +00:00
`
at := NewAcceptanceTest(t, &AcceptanceOpts{
Tolerance: 150 * time.Millisecond,
})
2015-10-12 05:35:22 +00:00
co1 := at.Collector("webhook")
wh1 := NewWebhook(co1)
2015-10-01 19:28:18 +00:00
2015-10-12 05:35:22 +00:00
co2 := at.Collector("webhook_failing")
wh2 := NewWebhook(co2)
2015-10-01 19:28:18 +00:00
2015-10-12 05:35:22 +00:00
wh2.Func = func(ts float64) bool {
// Fail the first two interval periods but eventually
// succeed in the third interval after a few failed attempts.
return ts < 4.5
}
2015-10-01 19:28:18 +00:00
2015-10-12 05:35:22 +00:00
am := at.Alertmanager(fmt.Sprintf(conf, wh1.Address(), wh2.Address()))
2015-10-01 19:28:18 +00:00
2015-10-12 05:35:22 +00:00
am.Push(At(1), Alert("alertname", "test1"))
2015-10-01 19:28:18 +00:00
2015-10-12 05:35:22 +00:00
co1.Want(Between(2, 2.5), Alert("alertname", "test1").Active(1))
co1.Want(Between(5, 5.5), Alert("alertname", "test1").Active(1))
2015-10-01 19:28:18 +00:00
2015-10-12 05:35:22 +00:00
co2.Want(Between(4.5, 5), Alert("alertname", "test1").Active(1))
2015-10-01 19:28:18 +00:00
}
func TestBatching(t *testing.T) {
t.Parallel()
conf := `
2015-10-19 14:52:54 +00:00
route:
2015-11-10 13:08:20 +00:00
receiver: "default"
group_by: []
group_wait: 1s
group_interval: 1s
repeat_interval: 5s
2015-11-10 13:08:20 +00:00
receivers:
- name: "default"
webhook_configs:
- url: 'http://%s'
`
at := NewAcceptanceTest(t, &AcceptanceOpts{
Tolerance: 150 * time.Millisecond,
})
co := at.Collector("webhook")
wh := NewWebhook(co)
am := at.Alertmanager(fmt.Sprintf(conf, wh.Address()))
am.Push(At(1.1), Alert("alertname", "test1").Active(1))
am.Push(At(1.9), Alert("alertname", "test5").Active(1))
am.Push(At(2.3),
Alert("alertname", "test2").Active(1.5),
Alert("alertname", "test3").Active(1.5),
Alert("alertname", "test4").Active(1.6),
)
co.Want(Between(2.0, 2.5),
Alert("alertname", "test1").Active(1),
Alert("alertname", "test5").Active(1),
)
// Only expect the new ones with the next group interval.
co.Want(Between(3, 3.5),
Alert("alertname", "test2").Active(1.5),
Alert("alertname", "test3").Active(1.5),
Alert("alertname", "test4").Active(1.6),
)
// While no changes happen expect no additional notifications
// until the 5s repeat interval has ended.
2015-10-01 07:43:51 +00:00
// The last three notifications should sent with the first two even
// though their repeat interval has not yet passed. This way fragmented
2015-09-30 17:03:19 +00:00
// batches are unified and notification noise reduced.
co.Want(Between(7, 7.5),
Alert("alertname", "test1").Active(1),
Alert("alertname", "test5").Active(1),
Alert("alertname", "test2").Active(1.5),
Alert("alertname", "test3").Active(1.5),
Alert("alertname", "test4").Active(1.6),
)
at.Run()
}