Add variable interpolation to notification messages.
This includes required refactorings to enable replacing the http client (for testing) and moving the NotificationReq type definitions to the "notifications" package, so that this package doesn't need to depend on "rules" anymore and that it can instead use a representation of the required data which only includes the necessary fields.
This commit is contained in:
parent
483f2be3b3
commit
3b970c5133
4
main.go
4
main.go
|
@ -81,7 +81,7 @@ type prometheus struct {
|
||||||
unwrittenSamples chan *extraction.Result
|
unwrittenSamples chan *extraction.Result
|
||||||
|
|
||||||
ruleManager rules.RuleManager
|
ruleManager rules.RuleManager
|
||||||
notifications chan rules.NotificationReqs
|
notifications chan notification.NotificationReqs
|
||||||
storage *metric.TieredStorage
|
storage *metric.TieredStorage
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -192,7 +192,7 @@ func main() {
|
||||||
targetManager := retrieval.NewTargetManager(unwrittenSamples, *concurrentRetrievalAllowance)
|
targetManager := retrieval.NewTargetManager(unwrittenSamples, *concurrentRetrievalAllowance)
|
||||||
targetManager.AddTargetsFromConfig(conf)
|
targetManager.AddTargetsFromConfig(conf)
|
||||||
|
|
||||||
notifications := make(chan rules.NotificationReqs, *notificationQueueCapacity)
|
notifications := make(chan notification.NotificationReqs, *notificationQueueCapacity)
|
||||||
|
|
||||||
// Queue depth will need to be exposed
|
// Queue depth will need to be exposed
|
||||||
ruleManager := rules.NewRuleManager(unwrittenSamples, notifications, conf.EvaluationInterval(), ts)
|
ruleManager := rules.NewRuleManager(unwrittenSamples, notifications, conf.EvaluationInterval(), ts)
|
||||||
|
|
|
@ -17,14 +17,15 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"flag"
|
"flag"
|
||||||
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"text/template"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
clientmodel "github.com/prometheus/client_golang/model"
|
clientmodel "github.com/prometheus/client_golang/model"
|
||||||
|
|
||||||
"github.com/prometheus/prometheus/rules"
|
|
||||||
"github.com/prometheus/prometheus/utility"
|
"github.com/prometheus/prometheus/utility"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -37,6 +38,29 @@ var (
|
||||||
deadline = flag.Duration("alertmanager.httpDeadline", 10*time.Second, "Alert manager HTTP API timeout.")
|
deadline = flag.Duration("alertmanager.httpDeadline", 10*time.Second, "Alert manager HTTP API timeout.")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// A request for sending a notification to the alert manager for a single alert
|
||||||
|
// vector element.
|
||||||
|
type NotificationReq struct {
|
||||||
|
// Short-form alert summary. May contain text/template-style interpolations.
|
||||||
|
Summary string
|
||||||
|
// Longer alert description. May contain text/template-style interpolations.
|
||||||
|
Description string
|
||||||
|
// Labels associated with this alert notification, including alert name.
|
||||||
|
Labels clientmodel.LabelSet
|
||||||
|
// Current value of alert
|
||||||
|
Value clientmodel.SampleValue
|
||||||
|
// Since when this alert has been active (pending or firing).
|
||||||
|
ActiveSince time.Time
|
||||||
|
// A textual representation of the rule that triggered the alert.
|
||||||
|
RuleString string
|
||||||
|
}
|
||||||
|
|
||||||
|
type NotificationReqs []*NotificationReq
|
||||||
|
|
||||||
|
type httpPoster interface {
|
||||||
|
Post(url string, bodyType string, body io.Reader) (*http.Response, error)
|
||||||
|
}
|
||||||
|
|
||||||
// NotificationHandler is responsible for dispatching alert notifications to an
|
// NotificationHandler is responsible for dispatching alert notifications to an
|
||||||
// alert manager service.
|
// alert manager service.
|
||||||
type NotificationHandler struct {
|
type NotificationHandler struct {
|
||||||
|
@ -45,13 +69,13 @@ type NotificationHandler struct {
|
||||||
// The URL of this Prometheus instance to include in notifications.
|
// The URL of this Prometheus instance to include in notifications.
|
||||||
prometheusUrl string
|
prometheusUrl string
|
||||||
// Buffer of notifications that have not yet been sent.
|
// Buffer of notifications that have not yet been sent.
|
||||||
pendingNotifications <-chan rules.NotificationReqs
|
pendingNotifications <-chan NotificationReqs
|
||||||
// HTTP client with custom timeout settings.
|
// HTTP client with custom timeout settings.
|
||||||
httpClient http.Client
|
httpClient httpPoster
|
||||||
}
|
}
|
||||||
|
|
||||||
// Construct a new NotificationHandler.
|
// Construct a new NotificationHandler.
|
||||||
func NewNotificationHandler(alertmanagerUrl string, prometheusUrl string, notificationReqs <-chan rules.NotificationReqs) *NotificationHandler {
|
func NewNotificationHandler(alertmanagerUrl string, prometheusUrl string, notificationReqs <-chan NotificationReqs) *NotificationHandler {
|
||||||
return &NotificationHandler{
|
return &NotificationHandler{
|
||||||
alertmanagerUrl: alertmanagerUrl,
|
alertmanagerUrl: alertmanagerUrl,
|
||||||
pendingNotifications: notificationReqs,
|
pendingNotifications: notificationReqs,
|
||||||
|
@ -60,21 +84,55 @@ func NewNotificationHandler(alertmanagerUrl string, prometheusUrl string, notifi
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Interpolate alert information into summary/description templates.
|
||||||
|
func interpolateMessage(msg string, labels clientmodel.LabelSet, value clientmodel.SampleValue) string {
|
||||||
|
t := template.New("message")
|
||||||
|
|
||||||
|
// Inject some convenience variables that are easier to remember for users
|
||||||
|
// who are not used to Go's templating system.
|
||||||
|
defs :=
|
||||||
|
"{{$labels := .Labels}}" +
|
||||||
|
"{{$value := .Value}}"
|
||||||
|
|
||||||
|
if _, err := t.Parse(defs + msg); err != nil {
|
||||||
|
log.Println("Error parsing template:", err)
|
||||||
|
return msg
|
||||||
|
}
|
||||||
|
|
||||||
|
l := map[string]string{}
|
||||||
|
for k, v := range labels {
|
||||||
|
l[string(k)] = string(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
tmplData := struct {
|
||||||
|
Labels map[string]string
|
||||||
|
Value clientmodel.SampleValue
|
||||||
|
}{
|
||||||
|
Labels: l,
|
||||||
|
Value: value,
|
||||||
|
}
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
if err := t.Execute(&buf, &tmplData); err != nil {
|
||||||
|
log.Println("Error executing template:", err)
|
||||||
|
return msg
|
||||||
|
}
|
||||||
|
return buf.String()
|
||||||
|
}
|
||||||
|
|
||||||
// Send a list of notifications to the configured alert manager.
|
// Send a list of notifications to the configured alert manager.
|
||||||
func (n *NotificationHandler) sendNotifications(reqs rules.NotificationReqs) error {
|
func (n *NotificationHandler) sendNotifications(reqs NotificationReqs) error {
|
||||||
alerts := make([]map[string]interface{}, 0, len(reqs))
|
alerts := make([]map[string]interface{}, 0, len(reqs))
|
||||||
for _, req := range reqs {
|
for _, req := range reqs {
|
||||||
alerts = append(alerts, map[string]interface{}{
|
alerts = append(alerts, map[string]interface{}{
|
||||||
"Summary": req.Rule.Summary,
|
"Summary": interpolateMessage(req.Summary, req.Labels, req.Value),
|
||||||
"Description": req.Rule.Description,
|
"Description": interpolateMessage(req.Description, req.Labels, req.Value),
|
||||||
"Labels": req.ActiveAlert.Labels.Merge(clientmodel.LabelSet{
|
"Labels": req.Labels,
|
||||||
rules.AlertNameLabel: clientmodel.LabelValue(req.Rule.Name()),
|
|
||||||
}),
|
|
||||||
"Payload": map[string]interface{}{
|
"Payload": map[string]interface{}{
|
||||||
"Value": req.ActiveAlert.Value,
|
"Value": req.Value,
|
||||||
"ActiveSince": req.ActiveAlert.ActiveSince,
|
"ActiveSince": req.ActiveSince,
|
||||||
"GeneratorUrl": n.prometheusUrl,
|
"GeneratorUrl": n.prometheusUrl,
|
||||||
"AlertingRule": req.Rule.String(),
|
"AlertingRule": req.RuleString,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
// Copyright 2013 Prometheus Team
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package notification
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"io"
|
||||||
|
"io/ioutil"
|
||||||
|
"net/http"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
clientmodel "github.com/prometheus/client_golang/model"
|
||||||
|
)
|
||||||
|
|
||||||
|
type testHttpPoster struct {
|
||||||
|
message string
|
||||||
|
receivedPost chan<- bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *testHttpPoster) Post(url string, bodyType string, body io.Reader) (*http.Response, error) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
buf.ReadFrom(body)
|
||||||
|
p.message = buf.String()
|
||||||
|
p.receivedPost <- true
|
||||||
|
return &http.Response{
|
||||||
|
Body: ioutil.NopCloser(&bytes.Buffer{}),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type testNotificationScenario struct {
|
||||||
|
description string
|
||||||
|
summary string
|
||||||
|
message string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *testNotificationScenario) test(i int, t *testing.T) {
|
||||||
|
notifications := make(chan NotificationReqs)
|
||||||
|
defer close(notifications)
|
||||||
|
h := NewNotificationHandler("alertmanager_url", "prometheus_url", notifications)
|
||||||
|
|
||||||
|
receivedPost := make(chan bool, 1)
|
||||||
|
poster := testHttpPoster{receivedPost: receivedPost}
|
||||||
|
h.httpClient = &poster
|
||||||
|
|
||||||
|
go h.Run()
|
||||||
|
|
||||||
|
notifications <- NotificationReqs{
|
||||||
|
{
|
||||||
|
Summary: s.summary,
|
||||||
|
Description: s.description,
|
||||||
|
Labels: clientmodel.LabelSet{
|
||||||
|
clientmodel.LabelName("instance"): clientmodel.LabelValue("testinstance"),
|
||||||
|
},
|
||||||
|
Value: clientmodel.SampleValue(1.0 / 3.0),
|
||||||
|
ActiveSince: time.Time{},
|
||||||
|
RuleString: "Test rule string",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
<-receivedPost
|
||||||
|
if poster.message != s.message {
|
||||||
|
t.Fatalf("%d. Expected '%s', received '%s'", i, s.message, poster.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNotificationHandler(t *testing.T) {
|
||||||
|
scenarios := []testNotificationScenario{
|
||||||
|
{
|
||||||
|
// Correct message.
|
||||||
|
summary: "{{$labels.instance}} = {{$value}}",
|
||||||
|
description: "The alert value for {{$labels.instance}} is {{$value}}",
|
||||||
|
message: `[{"Description":"The alert value for testinstance is 0.3333333333333333","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"testinstance = 0.3333333333333333"}]`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Bad message referring to unknown label.
|
||||||
|
summary: "{{$labels.badlabel}} = {{$value}}",
|
||||||
|
description: "The alert value for {{$labels.badlabel}} is {{$value}}",
|
||||||
|
message: `[{"Description":"The alert value for \u003cno value\u003e is 0.3333333333333333","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"\u003cno value\u003e = 0.3333333333333333"}]`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Bad message referring to unknown variable.
|
||||||
|
summary: "{{$labels.instance}} = {{$badvar}}",
|
||||||
|
description: "The alert value for {{$labels.instance}} is {{$badvar}}",
|
||||||
|
message: `[{"Description":"The alert value for {{$labels.instance}} is {{$badvar}}","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"{{$labels.instance}} = {{$badvar}}"}]`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
// Bad message referring to unknown struct field.
|
||||||
|
summary: "{{$labels.instance}} = {{.Val}}",
|
||||||
|
description: "The alert value for {{$labels.instance}} is {{.Val}}",
|
||||||
|
message: `[{"Description":"The alert value for {{$labels.instance}} is {{.Val}}","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"{{$labels.instance}} = {{.Val}}"}]`,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for i, s := range scenarios {
|
||||||
|
s.test(i, t)
|
||||||
|
}
|
||||||
|
}
|
|
@ -136,7 +136,7 @@ type target struct {
|
||||||
// Any base labels that are added to this target and its metrics.
|
// Any base labels that are added to this target and its metrics.
|
||||||
baseLabels clientmodel.LabelSet
|
baseLabels clientmodel.LabelSet
|
||||||
// The HTTP client used to scrape the target's endpoint.
|
// The HTTP client used to scrape the target's endpoint.
|
||||||
client http.Client
|
httpClient *http.Client
|
||||||
}
|
}
|
||||||
|
|
||||||
// Furnish a reasonably configured target for querying.
|
// Furnish a reasonably configured target for querying.
|
||||||
|
@ -145,7 +145,7 @@ func NewTarget(address string, deadline time.Duration, baseLabels clientmodel.La
|
||||||
address: address,
|
address: address,
|
||||||
Deadline: deadline,
|
Deadline: deadline,
|
||||||
baseLabels: baseLabels,
|
baseLabels: baseLabels,
|
||||||
client: utility.NewDeadlineClient(deadline),
|
httpClient: utility.NewDeadlineClient(deadline),
|
||||||
}
|
}
|
||||||
|
|
||||||
scheduler := &healthScheduler{
|
scheduler := &healthScheduler{
|
||||||
|
@ -220,7 +220,7 @@ func (t *target) scrape(timestamp time.Time, results chan<- *extraction.Result)
|
||||||
}
|
}
|
||||||
req.Header.Add("Accept", acceptHeader)
|
req.Header.Add("Accept", acceptHeader)
|
||||||
|
|
||||||
resp, err := t.client.Do(req)
|
resp, err := t.httpClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,8 @@ import (
|
||||||
clientmodel "github.com/prometheus/client_golang/model"
|
clientmodel "github.com/prometheus/client_golang/model"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/extraction"
|
"github.com/prometheus/client_golang/extraction"
|
||||||
|
|
||||||
|
"github.com/prometheus/prometheus/utility"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestTargetScrapeUpdatesState(t *testing.T) {
|
func TestTargetScrapeUpdatesState(t *testing.T) {
|
||||||
|
@ -29,6 +31,7 @@ func TestTargetScrapeUpdatesState(t *testing.T) {
|
||||||
scheduler: literalScheduler{},
|
scheduler: literalScheduler{},
|
||||||
state: UNKNOWN,
|
state: UNKNOWN,
|
||||||
address: "bad schema",
|
address: "bad schema",
|
||||||
|
httpClient: utility.NewDeadlineClient(0),
|
||||||
}
|
}
|
||||||
testTarget.Scrape(time.Time{}, make(chan *extraction.Result, 2))
|
testTarget.Scrape(time.Time{}, make(chan *extraction.Result, 2))
|
||||||
if testTarget.state != UNREACHABLE {
|
if testTarget.state != UNREACHABLE {
|
||||||
|
@ -41,6 +44,7 @@ func TestTargetRecordScrapeHealth(t *testing.T) {
|
||||||
scheduler: literalScheduler{},
|
scheduler: literalScheduler{},
|
||||||
address: "http://example.url",
|
address: "http://example.url",
|
||||||
baseLabels: clientmodel.LabelSet{clientmodel.JobLabel: "testjob"},
|
baseLabels: clientmodel.LabelSet{clientmodel.JobLabel: "testjob"},
|
||||||
|
httpClient: utility.NewDeadlineClient(0),
|
||||||
}
|
}
|
||||||
|
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
|
|
|
@ -22,6 +22,7 @@ import (
|
||||||
clientmodel "github.com/prometheus/client_golang/model"
|
clientmodel "github.com/prometheus/client_golang/model"
|
||||||
|
|
||||||
"github.com/prometheus/prometheus/config"
|
"github.com/prometheus/prometheus/config"
|
||||||
|
"github.com/prometheus/prometheus/notification"
|
||||||
"github.com/prometheus/prometheus/storage/metric"
|
"github.com/prometheus/prometheus/storage/metric"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -38,29 +39,19 @@ type RuleManager interface {
|
||||||
AlertingRules() []*AlertingRule
|
AlertingRules() []*AlertingRule
|
||||||
}
|
}
|
||||||
|
|
||||||
// A request for sending an alert notification to the alert manager. This needs
|
|
||||||
// to be defined in this package to prevent a circular import between
|
|
||||||
// rules<->notification.
|
|
||||||
type NotificationReq struct {
|
|
||||||
Rule *AlertingRule
|
|
||||||
ActiveAlert Alert
|
|
||||||
}
|
|
||||||
|
|
||||||
type NotificationReqs []*NotificationReq
|
|
||||||
|
|
||||||
type ruleManager struct {
|
type ruleManager struct {
|
||||||
// Protects the rules list.
|
// Protects the rules list.
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
rules []Rule
|
rules []Rule
|
||||||
|
|
||||||
results chan<- *extraction.Result
|
results chan<- *extraction.Result
|
||||||
notifications chan<- NotificationReqs
|
notifications chan<- notification.NotificationReqs
|
||||||
done chan bool
|
done chan bool
|
||||||
interval time.Duration
|
interval time.Duration
|
||||||
storage *metric.TieredStorage
|
storage *metric.TieredStorage
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewRuleManager(results chan<- *extraction.Result, notifications chan<- NotificationReqs, interval time.Duration, storage *metric.TieredStorage) RuleManager {
|
func NewRuleManager(results chan<- *extraction.Result, notifications chan<- notification.NotificationReqs, interval time.Duration, storage *metric.TieredStorage) RuleManager {
|
||||||
manager := &ruleManager{
|
manager := &ruleManager{
|
||||||
results: results,
|
results: results,
|
||||||
notifications: notifications,
|
notifications: notifications,
|
||||||
|
@ -102,16 +93,22 @@ func (m *ruleManager) queueAlertNotifications(rule *AlertingRule) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
notifications := make(NotificationReqs, 0, len(activeAlerts))
|
notifications := make(notification.NotificationReqs, 0, len(activeAlerts))
|
||||||
for _, aa := range activeAlerts {
|
for _, aa := range activeAlerts {
|
||||||
if aa.State != FIRING {
|
if aa.State != FIRING {
|
||||||
// BUG: In the future, make AlertManager support pending alerts?
|
// BUG: In the future, make AlertManager support pending alerts?
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
notifications = append(notifications, &NotificationReq{
|
notifications = append(notifications, ¬ification.NotificationReq{
|
||||||
Rule: rule,
|
Summary: rule.Summary,
|
||||||
ActiveAlert: aa,
|
Description: rule.Description,
|
||||||
|
Labels: aa.Labels.Merge(clientmodel.LabelSet{
|
||||||
|
AlertNameLabel: clientmodel.LabelValue(rule.Name()),
|
||||||
|
}),
|
||||||
|
Value: aa.Value,
|
||||||
|
ActiveSince: aa.ActiveSince,
|
||||||
|
RuleString: rule.String(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
m.notifications <- notifications
|
m.notifications <- notifications
|
||||||
|
|
|
@ -21,8 +21,8 @@ import (
|
||||||
|
|
||||||
// NewDeadlineClient returns a new http.Client which will time out long running
|
// NewDeadlineClient returns a new http.Client which will time out long running
|
||||||
// requests.
|
// requests.
|
||||||
func NewDeadlineClient(timeout time.Duration) http.Client {
|
func NewDeadlineClient(timeout time.Duration) *http.Client {
|
||||||
return http.Client{
|
return &http.Client{
|
||||||
Transport: &http.Transport{
|
Transport: &http.Transport{
|
||||||
// We need to disable keepalive, becasue we set a deadline on the
|
// We need to disable keepalive, becasue we set a deadline on the
|
||||||
// underlying connection.
|
// underlying connection.
|
||||||
|
|
Loading…
Reference in New Issue