Add variable interpolation to notification messages.

This includes required refactorings to enable replacing the http client (for
testing) and moving the NotificationReq type definitions to the "notifications"
package, so that this package doesn't need to depend on "rules" anymore and
that it can instead use a representation of the required data which only
includes the necessary fields.
This commit is contained in:
Julius Volz 2013-08-09 19:32:55 +02:00
parent 483f2be3b3
commit 3b970c5133
7 changed files with 204 additions and 36 deletions

View File

@ -81,7 +81,7 @@ type prometheus struct {
unwrittenSamples chan *extraction.Result unwrittenSamples chan *extraction.Result
ruleManager rules.RuleManager ruleManager rules.RuleManager
notifications chan rules.NotificationReqs notifications chan notification.NotificationReqs
storage *metric.TieredStorage storage *metric.TieredStorage
} }
@ -192,7 +192,7 @@ func main() {
targetManager := retrieval.NewTargetManager(unwrittenSamples, *concurrentRetrievalAllowance) targetManager := retrieval.NewTargetManager(unwrittenSamples, *concurrentRetrievalAllowance)
targetManager.AddTargetsFromConfig(conf) targetManager.AddTargetsFromConfig(conf)
notifications := make(chan rules.NotificationReqs, *notificationQueueCapacity) notifications := make(chan notification.NotificationReqs, *notificationQueueCapacity)
// Queue depth will need to be exposed // Queue depth will need to be exposed
ruleManager := rules.NewRuleManager(unwrittenSamples, notifications, conf.EvaluationInterval(), ts) ruleManager := rules.NewRuleManager(unwrittenSamples, notifications, conf.EvaluationInterval(), ts)

View File

@ -17,14 +17,15 @@ import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"flag" "flag"
"io"
"io/ioutil" "io/ioutil"
"log" "log"
"net/http" "net/http"
"text/template"
"time" "time"
clientmodel "github.com/prometheus/client_golang/model" clientmodel "github.com/prometheus/client_golang/model"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/utility" "github.com/prometheus/prometheus/utility"
) )
@ -37,6 +38,29 @@ var (
deadline = flag.Duration("alertmanager.httpDeadline", 10*time.Second, "Alert manager HTTP API timeout.") deadline = flag.Duration("alertmanager.httpDeadline", 10*time.Second, "Alert manager HTTP API timeout.")
) )
// A request for sending a notification to the alert manager for a single alert
// vector element.
type NotificationReq struct {
// Short-form alert summary. May contain text/template-style interpolations.
Summary string
// Longer alert description. May contain text/template-style interpolations.
Description string
// Labels associated with this alert notification, including alert name.
Labels clientmodel.LabelSet
// Current value of alert
Value clientmodel.SampleValue
// Since when this alert has been active (pending or firing).
ActiveSince time.Time
// A textual representation of the rule that triggered the alert.
RuleString string
}
type NotificationReqs []*NotificationReq
type httpPoster interface {
Post(url string, bodyType string, body io.Reader) (*http.Response, error)
}
// NotificationHandler is responsible for dispatching alert notifications to an // NotificationHandler is responsible for dispatching alert notifications to an
// alert manager service. // alert manager service.
type NotificationHandler struct { type NotificationHandler struct {
@ -45,13 +69,13 @@ type NotificationHandler struct {
// The URL of this Prometheus instance to include in notifications. // The URL of this Prometheus instance to include in notifications.
prometheusUrl string prometheusUrl string
// Buffer of notifications that have not yet been sent. // Buffer of notifications that have not yet been sent.
pendingNotifications <-chan rules.NotificationReqs pendingNotifications <-chan NotificationReqs
// HTTP client with custom timeout settings. // HTTP client with custom timeout settings.
httpClient http.Client httpClient httpPoster
} }
// Construct a new NotificationHandler. // Construct a new NotificationHandler.
func NewNotificationHandler(alertmanagerUrl string, prometheusUrl string, notificationReqs <-chan rules.NotificationReqs) *NotificationHandler { func NewNotificationHandler(alertmanagerUrl string, prometheusUrl string, notificationReqs <-chan NotificationReqs) *NotificationHandler {
return &NotificationHandler{ return &NotificationHandler{
alertmanagerUrl: alertmanagerUrl, alertmanagerUrl: alertmanagerUrl,
pendingNotifications: notificationReqs, pendingNotifications: notificationReqs,
@ -60,21 +84,55 @@ func NewNotificationHandler(alertmanagerUrl string, prometheusUrl string, notifi
} }
} }
// Interpolate alert information into summary/description templates.
func interpolateMessage(msg string, labels clientmodel.LabelSet, value clientmodel.SampleValue) string {
t := template.New("message")
// Inject some convenience variables that are easier to remember for users
// who are not used to Go's templating system.
defs :=
"{{$labels := .Labels}}" +
"{{$value := .Value}}"
if _, err := t.Parse(defs + msg); err != nil {
log.Println("Error parsing template:", err)
return msg
}
l := map[string]string{}
for k, v := range labels {
l[string(k)] = string(v)
}
tmplData := struct {
Labels map[string]string
Value clientmodel.SampleValue
}{
Labels: l,
Value: value,
}
var buf bytes.Buffer
if err := t.Execute(&buf, &tmplData); err != nil {
log.Println("Error executing template:", err)
return msg
}
return buf.String()
}
// Send a list of notifications to the configured alert manager. // Send a list of notifications to the configured alert manager.
func (n *NotificationHandler) sendNotifications(reqs rules.NotificationReqs) error { func (n *NotificationHandler) sendNotifications(reqs NotificationReqs) error {
alerts := make([]map[string]interface{}, 0, len(reqs)) alerts := make([]map[string]interface{}, 0, len(reqs))
for _, req := range reqs { for _, req := range reqs {
alerts = append(alerts, map[string]interface{}{ alerts = append(alerts, map[string]interface{}{
"Summary": req.Rule.Summary, "Summary": interpolateMessage(req.Summary, req.Labels, req.Value),
"Description": req.Rule.Description, "Description": interpolateMessage(req.Description, req.Labels, req.Value),
"Labels": req.ActiveAlert.Labels.Merge(clientmodel.LabelSet{ "Labels": req.Labels,
rules.AlertNameLabel: clientmodel.LabelValue(req.Rule.Name()),
}),
"Payload": map[string]interface{}{ "Payload": map[string]interface{}{
"Value": req.ActiveAlert.Value, "Value": req.Value,
"ActiveSince": req.ActiveAlert.ActiveSince, "ActiveSince": req.ActiveSince,
"GeneratorUrl": n.prometheusUrl, "GeneratorUrl": n.prometheusUrl,
"AlertingRule": req.Rule.String(), "AlertingRule": req.RuleString,
}, },
}) })
} }

View File

@ -0,0 +1,109 @@
// Copyright 2013 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package notification
import (
"bytes"
"io"
"io/ioutil"
"net/http"
"testing"
"time"
clientmodel "github.com/prometheus/client_golang/model"
)
type testHttpPoster struct {
message string
receivedPost chan<- bool
}
func (p *testHttpPoster) Post(url string, bodyType string, body io.Reader) (*http.Response, error) {
var buf bytes.Buffer
buf.ReadFrom(body)
p.message = buf.String()
p.receivedPost <- true
return &http.Response{
Body: ioutil.NopCloser(&bytes.Buffer{}),
}, nil
}
type testNotificationScenario struct {
description string
summary string
message string
}
func (s *testNotificationScenario) test(i int, t *testing.T) {
notifications := make(chan NotificationReqs)
defer close(notifications)
h := NewNotificationHandler("alertmanager_url", "prometheus_url", notifications)
receivedPost := make(chan bool, 1)
poster := testHttpPoster{receivedPost: receivedPost}
h.httpClient = &poster
go h.Run()
notifications <- NotificationReqs{
{
Summary: s.summary,
Description: s.description,
Labels: clientmodel.LabelSet{
clientmodel.LabelName("instance"): clientmodel.LabelValue("testinstance"),
},
Value: clientmodel.SampleValue(1.0 / 3.0),
ActiveSince: time.Time{},
RuleString: "Test rule string",
},
}
<-receivedPost
if poster.message != s.message {
t.Fatalf("%d. Expected '%s', received '%s'", i, s.message, poster.message)
}
}
func TestNotificationHandler(t *testing.T) {
scenarios := []testNotificationScenario{
{
// Correct message.
summary: "{{$labels.instance}} = {{$value}}",
description: "The alert value for {{$labels.instance}} is {{$value}}",
message: `[{"Description":"The alert value for testinstance is 0.3333333333333333","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"testinstance = 0.3333333333333333"}]`,
},
{
// Bad message referring to unknown label.
summary: "{{$labels.badlabel}} = {{$value}}",
description: "The alert value for {{$labels.badlabel}} is {{$value}}",
message: `[{"Description":"The alert value for \u003cno value\u003e is 0.3333333333333333","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"\u003cno value\u003e = 0.3333333333333333"}]`,
},
{
// Bad message referring to unknown variable.
summary: "{{$labels.instance}} = {{$badvar}}",
description: "The alert value for {{$labels.instance}} is {{$badvar}}",
message: `[{"Description":"The alert value for {{$labels.instance}} is {{$badvar}}","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"{{$labels.instance}} = {{$badvar}}"}]`,
},
{
// Bad message referring to unknown struct field.
summary: "{{$labels.instance}} = {{.Val}}",
description: "The alert value for {{$labels.instance}} is {{.Val}}",
message: `[{"Description":"The alert value for {{$labels.instance}} is {{.Val}}","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"{{$labels.instance}} = {{.Val}}"}]`,
},
}
for i, s := range scenarios {
s.test(i, t)
}
}

View File

@ -136,7 +136,7 @@ type target struct {
// Any base labels that are added to this target and its metrics. // Any base labels that are added to this target and its metrics.
baseLabels clientmodel.LabelSet baseLabels clientmodel.LabelSet
// The HTTP client used to scrape the target's endpoint. // The HTTP client used to scrape the target's endpoint.
client http.Client httpClient *http.Client
} }
// Furnish a reasonably configured target for querying. // Furnish a reasonably configured target for querying.
@ -145,7 +145,7 @@ func NewTarget(address string, deadline time.Duration, baseLabels clientmodel.La
address: address, address: address,
Deadline: deadline, Deadline: deadline,
baseLabels: baseLabels, baseLabels: baseLabels,
client: utility.NewDeadlineClient(deadline), httpClient: utility.NewDeadlineClient(deadline),
} }
scheduler := &healthScheduler{ scheduler := &healthScheduler{
@ -220,7 +220,7 @@ func (t *target) scrape(timestamp time.Time, results chan<- *extraction.Result)
} }
req.Header.Add("Accept", acceptHeader) req.Header.Add("Accept", acceptHeader)
resp, err := t.client.Do(req) resp, err := t.httpClient.Do(req)
if err != nil { if err != nil {
return err return err
} }

View File

@ -22,6 +22,8 @@ import (
clientmodel "github.com/prometheus/client_golang/model" clientmodel "github.com/prometheus/client_golang/model"
"github.com/prometheus/client_golang/extraction" "github.com/prometheus/client_golang/extraction"
"github.com/prometheus/prometheus/utility"
) )
func TestTargetScrapeUpdatesState(t *testing.T) { func TestTargetScrapeUpdatesState(t *testing.T) {
@ -29,6 +31,7 @@ func TestTargetScrapeUpdatesState(t *testing.T) {
scheduler: literalScheduler{}, scheduler: literalScheduler{},
state: UNKNOWN, state: UNKNOWN,
address: "bad schema", address: "bad schema",
httpClient: utility.NewDeadlineClient(0),
} }
testTarget.Scrape(time.Time{}, make(chan *extraction.Result, 2)) testTarget.Scrape(time.Time{}, make(chan *extraction.Result, 2))
if testTarget.state != UNREACHABLE { if testTarget.state != UNREACHABLE {
@ -41,6 +44,7 @@ func TestTargetRecordScrapeHealth(t *testing.T) {
scheduler: literalScheduler{}, scheduler: literalScheduler{},
address: "http://example.url", address: "http://example.url",
baseLabels: clientmodel.LabelSet{clientmodel.JobLabel: "testjob"}, baseLabels: clientmodel.LabelSet{clientmodel.JobLabel: "testjob"},
httpClient: utility.NewDeadlineClient(0),
} }
now := time.Now() now := time.Now()

View File

@ -22,6 +22,7 @@ import (
clientmodel "github.com/prometheus/client_golang/model" clientmodel "github.com/prometheus/client_golang/model"
"github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/config"
"github.com/prometheus/prometheus/notification"
"github.com/prometheus/prometheus/storage/metric" "github.com/prometheus/prometheus/storage/metric"
) )
@ -38,29 +39,19 @@ type RuleManager interface {
AlertingRules() []*AlertingRule AlertingRules() []*AlertingRule
} }
// A request for sending an alert notification to the alert manager. This needs
// to be defined in this package to prevent a circular import between
// rules<->notification.
type NotificationReq struct {
Rule *AlertingRule
ActiveAlert Alert
}
type NotificationReqs []*NotificationReq
type ruleManager struct { type ruleManager struct {
// Protects the rules list. // Protects the rules list.
sync.Mutex sync.Mutex
rules []Rule rules []Rule
results chan<- *extraction.Result results chan<- *extraction.Result
notifications chan<- NotificationReqs notifications chan<- notification.NotificationReqs
done chan bool done chan bool
interval time.Duration interval time.Duration
storage *metric.TieredStorage storage *metric.TieredStorage
} }
func NewRuleManager(results chan<- *extraction.Result, notifications chan<- NotificationReqs, interval time.Duration, storage *metric.TieredStorage) RuleManager { func NewRuleManager(results chan<- *extraction.Result, notifications chan<- notification.NotificationReqs, interval time.Duration, storage *metric.TieredStorage) RuleManager {
manager := &ruleManager{ manager := &ruleManager{
results: results, results: results,
notifications: notifications, notifications: notifications,
@ -102,16 +93,22 @@ func (m *ruleManager) queueAlertNotifications(rule *AlertingRule) {
return return
} }
notifications := make(NotificationReqs, 0, len(activeAlerts)) notifications := make(notification.NotificationReqs, 0, len(activeAlerts))
for _, aa := range activeAlerts { for _, aa := range activeAlerts {
if aa.State != FIRING { if aa.State != FIRING {
// BUG: In the future, make AlertManager support pending alerts? // BUG: In the future, make AlertManager support pending alerts?
continue continue
} }
notifications = append(notifications, &NotificationReq{ notifications = append(notifications, &notification.NotificationReq{
Rule: rule, Summary: rule.Summary,
ActiveAlert: aa, Description: rule.Description,
Labels: aa.Labels.Merge(clientmodel.LabelSet{
AlertNameLabel: clientmodel.LabelValue(rule.Name()),
}),
Value: aa.Value,
ActiveSince: aa.ActiveSince,
RuleString: rule.String(),
}) })
} }
m.notifications <- notifications m.notifications <- notifications

View File

@ -21,8 +21,8 @@ import (
// NewDeadlineClient returns a new http.Client which will time out long running // NewDeadlineClient returns a new http.Client which will time out long running
// requests. // requests.
func NewDeadlineClient(timeout time.Duration) http.Client { func NewDeadlineClient(timeout time.Duration) *http.Client {
return http.Client{ return &http.Client{
Transport: &http.Transport{ Transport: &http.Transport{
// We need to disable keepalive, becasue we set a deadline on the // We need to disable keepalive, becasue we set a deadline on the
// underlying connection. // underlying connection.