diff --git a/main.go b/main.go index 9b350564d..31c1309bf 100644 --- a/main.go +++ b/main.go @@ -81,7 +81,7 @@ type prometheus struct { unwrittenSamples chan *extraction.Result ruleManager rules.RuleManager - notifications chan rules.NotificationReqs + notifications chan notification.NotificationReqs storage *metric.TieredStorage } @@ -192,7 +192,7 @@ func main() { targetManager := retrieval.NewTargetManager(unwrittenSamples, *concurrentRetrievalAllowance) targetManager.AddTargetsFromConfig(conf) - notifications := make(chan rules.NotificationReqs, *notificationQueueCapacity) + notifications := make(chan notification.NotificationReqs, *notificationQueueCapacity) // Queue depth will need to be exposed ruleManager := rules.NewRuleManager(unwrittenSamples, notifications, conf.EvaluationInterval(), ts) diff --git a/notification/notification.go b/notification/notification.go index 021e19a36..f13f13ba4 100644 --- a/notification/notification.go +++ b/notification/notification.go @@ -17,14 +17,15 @@ import ( "bytes" "encoding/json" "flag" + "io" "io/ioutil" "log" "net/http" + "text/template" "time" clientmodel "github.com/prometheus/client_golang/model" - "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/utility" ) @@ -37,6 +38,29 @@ var ( deadline = flag.Duration("alertmanager.httpDeadline", 10*time.Second, "Alert manager HTTP API timeout.") ) +// A request for sending a notification to the alert manager for a single alert +// vector element. +type NotificationReq struct { + // Short-form alert summary. May contain text/template-style interpolations. + Summary string + // Longer alert description. May contain text/template-style interpolations. + Description string + // Labels associated with this alert notification, including alert name. + Labels clientmodel.LabelSet + // Current value of alert + Value clientmodel.SampleValue + // Since when this alert has been active (pending or firing). + ActiveSince time.Time + // A textual representation of the rule that triggered the alert. + RuleString string +} + +type NotificationReqs []*NotificationReq + +type httpPoster interface { + Post(url string, bodyType string, body io.Reader) (*http.Response, error) +} + // NotificationHandler is responsible for dispatching alert notifications to an // alert manager service. type NotificationHandler struct { @@ -45,13 +69,13 @@ type NotificationHandler struct { // The URL of this Prometheus instance to include in notifications. prometheusUrl string // Buffer of notifications that have not yet been sent. - pendingNotifications <-chan rules.NotificationReqs + pendingNotifications <-chan NotificationReqs // HTTP client with custom timeout settings. - httpClient http.Client + httpClient httpPoster } // Construct a new NotificationHandler. -func NewNotificationHandler(alertmanagerUrl string, prometheusUrl string, notificationReqs <-chan rules.NotificationReqs) *NotificationHandler { +func NewNotificationHandler(alertmanagerUrl string, prometheusUrl string, notificationReqs <-chan NotificationReqs) *NotificationHandler { return &NotificationHandler{ alertmanagerUrl: alertmanagerUrl, pendingNotifications: notificationReqs, @@ -60,21 +84,55 @@ func NewNotificationHandler(alertmanagerUrl string, prometheusUrl string, notifi } } +// Interpolate alert information into summary/description templates. +func interpolateMessage(msg string, labels clientmodel.LabelSet, value clientmodel.SampleValue) string { + t := template.New("message") + + // Inject some convenience variables that are easier to remember for users + // who are not used to Go's templating system. + defs := + "{{$labels := .Labels}}" + + "{{$value := .Value}}" + + if _, err := t.Parse(defs + msg); err != nil { + log.Println("Error parsing template:", err) + return msg + } + + l := map[string]string{} + for k, v := range labels { + l[string(k)] = string(v) + } + + tmplData := struct { + Labels map[string]string + Value clientmodel.SampleValue + }{ + Labels: l, + Value: value, + } + + var buf bytes.Buffer + if err := t.Execute(&buf, &tmplData); err != nil { + log.Println("Error executing template:", err) + return msg + } + return buf.String() +} + // Send a list of notifications to the configured alert manager. -func (n *NotificationHandler) sendNotifications(reqs rules.NotificationReqs) error { +func (n *NotificationHandler) sendNotifications(reqs NotificationReqs) error { alerts := make([]map[string]interface{}, 0, len(reqs)) for _, req := range reqs { alerts = append(alerts, map[string]interface{}{ - "Summary": req.Rule.Summary, - "Description": req.Rule.Description, - "Labels": req.ActiveAlert.Labels.Merge(clientmodel.LabelSet{ - rules.AlertNameLabel: clientmodel.LabelValue(req.Rule.Name()), - }), + "Summary": interpolateMessage(req.Summary, req.Labels, req.Value), + "Description": interpolateMessage(req.Description, req.Labels, req.Value), + "Labels": req.Labels, "Payload": map[string]interface{}{ - "Value": req.ActiveAlert.Value, - "ActiveSince": req.ActiveAlert.ActiveSince, + "Value": req.Value, + "ActiveSince": req.ActiveSince, "GeneratorUrl": n.prometheusUrl, - "AlertingRule": req.Rule.String(), + "AlertingRule": req.RuleString, }, }) } diff --git a/notification/notification_test.go b/notification/notification_test.go new file mode 100644 index 000000000..0d707d827 --- /dev/null +++ b/notification/notification_test.go @@ -0,0 +1,109 @@ +// Copyright 2013 Prometheus Team +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package notification + +import ( + "bytes" + "io" + "io/ioutil" + "net/http" + "testing" + "time" + + clientmodel "github.com/prometheus/client_golang/model" +) + +type testHttpPoster struct { + message string + receivedPost chan<- bool +} + +func (p *testHttpPoster) Post(url string, bodyType string, body io.Reader) (*http.Response, error) { + var buf bytes.Buffer + buf.ReadFrom(body) + p.message = buf.String() + p.receivedPost <- true + return &http.Response{ + Body: ioutil.NopCloser(&bytes.Buffer{}), + }, nil +} + +type testNotificationScenario struct { + description string + summary string + message string +} + +func (s *testNotificationScenario) test(i int, t *testing.T) { + notifications := make(chan NotificationReqs) + defer close(notifications) + h := NewNotificationHandler("alertmanager_url", "prometheus_url", notifications) + + receivedPost := make(chan bool, 1) + poster := testHttpPoster{receivedPost: receivedPost} + h.httpClient = &poster + + go h.Run() + + notifications <- NotificationReqs{ + { + Summary: s.summary, + Description: s.description, + Labels: clientmodel.LabelSet{ + clientmodel.LabelName("instance"): clientmodel.LabelValue("testinstance"), + }, + Value: clientmodel.SampleValue(1.0 / 3.0), + ActiveSince: time.Time{}, + RuleString: "Test rule string", + }, + } + + <-receivedPost + if poster.message != s.message { + t.Fatalf("%d. Expected '%s', received '%s'", i, s.message, poster.message) + } +} + +func TestNotificationHandler(t *testing.T) { + scenarios := []testNotificationScenario{ + { + // Correct message. + summary: "{{$labels.instance}} = {{$value}}", + description: "The alert value for {{$labels.instance}} is {{$value}}", + message: `[{"Description":"The alert value for testinstance is 0.3333333333333333","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"testinstance = 0.3333333333333333"}]`, + }, + { + // Bad message referring to unknown label. + summary: "{{$labels.badlabel}} = {{$value}}", + description: "The alert value for {{$labels.badlabel}} is {{$value}}", + message: `[{"Description":"The alert value for \u003cno value\u003e is 0.3333333333333333","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"\u003cno value\u003e = 0.3333333333333333"}]`, + }, + { + // Bad message referring to unknown variable. + summary: "{{$labels.instance}} = {{$badvar}}", + description: "The alert value for {{$labels.instance}} is {{$badvar}}", + message: `[{"Description":"The alert value for {{$labels.instance}} is {{$badvar}}","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"{{$labels.instance}} = {{$badvar}}"}]`, + }, + { + // Bad message referring to unknown struct field. + summary: "{{$labels.instance}} = {{.Val}}", + description: "The alert value for {{$labels.instance}} is {{.Val}}", + message: `[{"Description":"The alert value for {{$labels.instance}} is {{.Val}}","Labels":{"instance":"testinstance"},"Payload":{"ActiveSince":"0001-01-01T00:00:00Z","AlertingRule":"Test rule string","GeneratorUrl":"prometheus_url","Value":"0.333333"},"Summary":"{{$labels.instance}} = {{.Val}}"}]`, + }, + } + + for i, s := range scenarios { + s.test(i, t) + } +} diff --git a/retrieval/target.go b/retrieval/target.go index 51076c1cb..8bff1d7fd 100644 --- a/retrieval/target.go +++ b/retrieval/target.go @@ -136,7 +136,7 @@ type target struct { // Any base labels that are added to this target and its metrics. baseLabels clientmodel.LabelSet // The HTTP client used to scrape the target's endpoint. - client http.Client + httpClient *http.Client } // Furnish a reasonably configured target for querying. @@ -145,7 +145,7 @@ func NewTarget(address string, deadline time.Duration, baseLabels clientmodel.La address: address, Deadline: deadline, baseLabels: baseLabels, - client: utility.NewDeadlineClient(deadline), + httpClient: utility.NewDeadlineClient(deadline), } scheduler := &healthScheduler{ @@ -220,7 +220,7 @@ func (t *target) scrape(timestamp time.Time, results chan<- *extraction.Result) } req.Header.Add("Accept", acceptHeader) - resp, err := t.client.Do(req) + resp, err := t.httpClient.Do(req) if err != nil { return err } diff --git a/retrieval/target_test.go b/retrieval/target_test.go index 606641688..aa1b93711 100644 --- a/retrieval/target_test.go +++ b/retrieval/target_test.go @@ -22,6 +22,8 @@ import ( clientmodel "github.com/prometheus/client_golang/model" "github.com/prometheus/client_golang/extraction" + + "github.com/prometheus/prometheus/utility" ) func TestTargetScrapeUpdatesState(t *testing.T) { @@ -29,6 +31,7 @@ func TestTargetScrapeUpdatesState(t *testing.T) { scheduler: literalScheduler{}, state: UNKNOWN, address: "bad schema", + httpClient: utility.NewDeadlineClient(0), } testTarget.Scrape(time.Time{}, make(chan *extraction.Result, 2)) if testTarget.state != UNREACHABLE { @@ -41,6 +44,7 @@ func TestTargetRecordScrapeHealth(t *testing.T) { scheduler: literalScheduler{}, address: "http://example.url", baseLabels: clientmodel.LabelSet{clientmodel.JobLabel: "testjob"}, + httpClient: utility.NewDeadlineClient(0), } now := time.Now() diff --git a/rules/manager.go b/rules/manager.go index 055f2aeb7..c11c658de 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -22,6 +22,7 @@ import ( clientmodel "github.com/prometheus/client_golang/model" "github.com/prometheus/prometheus/config" + "github.com/prometheus/prometheus/notification" "github.com/prometheus/prometheus/storage/metric" ) @@ -38,29 +39,19 @@ type RuleManager interface { AlertingRules() []*AlertingRule } -// A request for sending an alert notification to the alert manager. This needs -// to be defined in this package to prevent a circular import between -// rules<->notification. -type NotificationReq struct { - Rule *AlertingRule - ActiveAlert Alert -} - -type NotificationReqs []*NotificationReq - type ruleManager struct { // Protects the rules list. sync.Mutex rules []Rule results chan<- *extraction.Result - notifications chan<- NotificationReqs + notifications chan<- notification.NotificationReqs done chan bool interval time.Duration storage *metric.TieredStorage } -func NewRuleManager(results chan<- *extraction.Result, notifications chan<- NotificationReqs, interval time.Duration, storage *metric.TieredStorage) RuleManager { +func NewRuleManager(results chan<- *extraction.Result, notifications chan<- notification.NotificationReqs, interval time.Duration, storage *metric.TieredStorage) RuleManager { manager := &ruleManager{ results: results, notifications: notifications, @@ -102,16 +93,22 @@ func (m *ruleManager) queueAlertNotifications(rule *AlertingRule) { return } - notifications := make(NotificationReqs, 0, len(activeAlerts)) + notifications := make(notification.NotificationReqs, 0, len(activeAlerts)) for _, aa := range activeAlerts { if aa.State != FIRING { // BUG: In the future, make AlertManager support pending alerts? continue } - notifications = append(notifications, &NotificationReq{ - Rule: rule, - ActiveAlert: aa, + notifications = append(notifications, ¬ification.NotificationReq{ + Summary: rule.Summary, + Description: rule.Description, + Labels: aa.Labels.Merge(clientmodel.LabelSet{ + AlertNameLabel: clientmodel.LabelValue(rule.Name()), + }), + Value: aa.Value, + ActiveSince: aa.ActiveSince, + RuleString: rule.String(), }) } m.notifications <- notifications diff --git a/utility/deadline_client.go b/utility/deadline_client.go index 782c66ec5..a1745f5d5 100644 --- a/utility/deadline_client.go +++ b/utility/deadline_client.go @@ -21,8 +21,8 @@ import ( // NewDeadlineClient returns a new http.Client which will time out long running // requests. -func NewDeadlineClient(timeout time.Duration) http.Client { - return http.Client{ +func NewDeadlineClient(timeout time.Duration) *http.Client { + return &http.Client{ Transport: &http.Transport{ // We need to disable keepalive, becasue we set a deadline on the // underlying connection.