Merge pull request #1807 from prometheus/am-label
Expand alert templates at eval time.
This commit is contained in:
commit
9c3129746c
|
@ -15,13 +15,16 @@ package rules
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"html/template"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
html_template "html/template"
|
||||
|
||||
"github.com/prometheus/common/log"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/prometheus/prometheus/promql"
|
||||
"github.com/prometheus/prometheus/template"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
)
|
||||
|
||||
|
@ -63,8 +66,9 @@ func (s AlertState) String() string {
|
|||
|
||||
// Alert is the user-level representation of a single instance of an alerting rule.
|
||||
type Alert struct {
|
||||
State AlertState
|
||||
Labels model.LabelSet
|
||||
State AlertState
|
||||
Labels model.LabelSet
|
||||
Annotations model.LabelSet
|
||||
// The value at the last evaluation of the alerting expression.
|
||||
Value model.SampleValue
|
||||
// The interval during which the condition of this alert held true.
|
||||
|
@ -142,7 +146,7 @@ const resolvedRetention = 15 * time.Minute
|
|||
|
||||
// eval evaluates the rule expression and then creates pending alerts and fires
|
||||
// or removes previously pending alerts accordingly.
|
||||
func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector, error) {
|
||||
func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine, externalURLPath string) (model.Vector, error) {
|
||||
query, err := engine.NewInstantQuery(r.vector.String(), ts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -160,6 +164,53 @@ func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector,
|
|||
resultFPs := map[model.Fingerprint]struct{}{}
|
||||
|
||||
for _, smpl := range res {
|
||||
// Provide the alert information to the template.
|
||||
l := make(map[string]string, len(smpl.Metric))
|
||||
for k, v := range smpl.Metric {
|
||||
l[string(k)] = string(v)
|
||||
}
|
||||
|
||||
tmplData := struct {
|
||||
Labels map[string]string
|
||||
Value float64
|
||||
}{
|
||||
Labels: l,
|
||||
Value: float64(smpl.Value),
|
||||
}
|
||||
// Inject some convenience variables that are easier to remember for users
|
||||
// who are not used to Go's templating system.
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}"
|
||||
|
||||
expand := func(text model.LabelValue) model.LabelValue {
|
||||
tmpl := template.NewTemplateExpander(
|
||||
defs+string(text),
|
||||
"__alert_"+r.Name(),
|
||||
tmplData,
|
||||
ts,
|
||||
engine,
|
||||
externalURLPath,
|
||||
)
|
||||
result, err := tmpl.Expand()
|
||||
if err != nil {
|
||||
result = fmt.Sprintf("<error expanding template: %s>", err)
|
||||
log.Warnf("Error expanding alert template %v with data '%v': %s", r.Name(), tmplData, err)
|
||||
}
|
||||
return model.LabelValue(result)
|
||||
}
|
||||
|
||||
labels := make(model.LabelSet, len(smpl.Metric)+len(r.labels)+1)
|
||||
for ln, lv := range smpl.Metric {
|
||||
labels[ln] = lv
|
||||
}
|
||||
for ln, lv := range r.labels {
|
||||
labels[ln] = expand(lv)
|
||||
}
|
||||
labels[model.AlertNameLabel] = model.LabelValue(r.Name())
|
||||
|
||||
annotations := make(model.LabelSet, len(r.annotations))
|
||||
for an, av := range r.annotations {
|
||||
annotations[an] = expand(av)
|
||||
}
|
||||
fp := smpl.Metric.Fingerprint()
|
||||
resultFPs[fp] = struct{}{}
|
||||
|
||||
|
@ -171,10 +222,11 @@ func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector,
|
|||
delete(smpl.Metric, model.MetricNameLabel)
|
||||
|
||||
r.active[fp] = &Alert{
|
||||
Labels: model.LabelSet(smpl.Metric),
|
||||
ActiveAt: ts,
|
||||
State: StatePending,
|
||||
Value: smpl.Value,
|
||||
Labels: labels,
|
||||
Annotations: annotations,
|
||||
ActiveAt: ts,
|
||||
State: StatePending,
|
||||
Value: smpl.Value,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -243,13 +295,7 @@ func (r *AlertingRule) currentAlerts() []*Alert {
|
|||
alerts := make([]*Alert, 0, len(r.active))
|
||||
|
||||
for _, a := range r.active {
|
||||
labels := r.labels.Clone()
|
||||
for ln, lv := range a.Labels {
|
||||
labels[ln] = lv
|
||||
}
|
||||
anew := *a
|
||||
anew.Labels = labels
|
||||
|
||||
alerts = append(alerts, &anew)
|
||||
}
|
||||
return alerts
|
||||
|
@ -273,7 +319,7 @@ func (r *AlertingRule) String() string {
|
|||
// HTMLSnippet returns an HTML snippet representing this alerting rule. The
|
||||
// resulting snippet is expected to be presented in a <pre> element, so that
|
||||
// line breaks and other returned whitespace is respected.
|
||||
func (r *AlertingRule) HTMLSnippet(pathPrefix string) template.HTML {
|
||||
func (r *AlertingRule) HTMLSnippet(pathPrefix string) html_template.HTML {
|
||||
alertMetric := model.Metric{
|
||||
model.MetricNameLabel: alertMetricName,
|
||||
alertNameLabel: model.LabelValue(r.name),
|
||||
|
@ -289,5 +335,5 @@ func (r *AlertingRule) HTMLSnippet(pathPrefix string) template.HTML {
|
|||
if len(r.annotations) > 0 {
|
||||
s += fmt.Sprintf("\n ANNOTATIONS %s", r.annotations)
|
||||
}
|
||||
return template.HTML(s)
|
||||
return html_template.HTML(s)
|
||||
}
|
||||
|
|
|
@ -32,7 +32,6 @@ import (
|
|||
"github.com/prometheus/prometheus/promql"
|
||||
"github.com/prometheus/prometheus/storage"
|
||||
"github.com/prometheus/prometheus/storage/local"
|
||||
"github.com/prometheus/prometheus/template"
|
||||
"github.com/prometheus/prometheus/util/strutil"
|
||||
)
|
||||
|
||||
|
@ -106,7 +105,7 @@ const (
|
|||
type Rule interface {
|
||||
Name() string
|
||||
// eval evaluates the rule, including any associated recording or alerting actions.
|
||||
eval(model.Time, *promql.Engine) (model.Vector, error)
|
||||
eval(model.Time, *promql.Engine, string) (model.Vector, error)
|
||||
// String returns a human-readable string representation of the rule.
|
||||
String() string
|
||||
// HTMLSnippet returns a human-readable string representation of the rule,
|
||||
|
@ -257,7 +256,7 @@ func (g *Group) eval() {
|
|||
|
||||
evalTotal.WithLabelValues(rtyp).Inc()
|
||||
|
||||
vector, err := rule.eval(now, g.opts.QueryEngine)
|
||||
vector, err := rule.eval(now, g.opts.QueryEngine, g.opts.ExternalURL.Path)
|
||||
if err != nil {
|
||||
// Canceled queries are intentional termination of queries. This normally
|
||||
// happens on shutdown and thus we skip logging of any errors here.
|
||||
|
@ -310,55 +309,10 @@ func (g *Group) sendAlerts(rule *AlertingRule, timestamp model.Time) error {
|
|||
continue
|
||||
}
|
||||
|
||||
// Provide the alert information to the template.
|
||||
l := make(map[string]string, len(alert.Labels))
|
||||
for k, v := range alert.Labels {
|
||||
l[string(k)] = string(v)
|
||||
}
|
||||
|
||||
tmplData := struct {
|
||||
Labels map[string]string
|
||||
Value float64
|
||||
}{
|
||||
Labels: l,
|
||||
Value: float64(alert.Value),
|
||||
}
|
||||
// Inject some convenience variables that are easier to remember for users
|
||||
// who are not used to Go's templating system.
|
||||
defs := "{{$labels := .Labels}}{{$value := .Value}}"
|
||||
|
||||
expand := func(text model.LabelValue) model.LabelValue {
|
||||
tmpl := template.NewTemplateExpander(
|
||||
defs+string(text),
|
||||
"__alert_"+rule.Name(),
|
||||
tmplData,
|
||||
timestamp,
|
||||
g.opts.QueryEngine,
|
||||
g.opts.ExternalURL.Path,
|
||||
)
|
||||
result, err := tmpl.Expand()
|
||||
if err != nil {
|
||||
result = fmt.Sprintf("<error expanding template: %s>", err)
|
||||
log.Warnf("Error expanding alert template %v with data '%v': %s", rule.Name(), tmplData, err)
|
||||
}
|
||||
return model.LabelValue(result)
|
||||
}
|
||||
|
||||
labels := make(model.LabelSet, len(alert.Labels)+1)
|
||||
for ln, lv := range alert.Labels {
|
||||
labels[ln] = expand(lv)
|
||||
}
|
||||
labels[model.AlertNameLabel] = model.LabelValue(rule.Name())
|
||||
|
||||
annotations := make(model.LabelSet, len(rule.annotations))
|
||||
for an, av := range rule.annotations {
|
||||
annotations[an] = expand(av)
|
||||
}
|
||||
|
||||
a := &model.Alert{
|
||||
StartsAt: alert.ActiveAt.Add(rule.holdDuration).Time(),
|
||||
Labels: labels,
|
||||
Annotations: annotations,
|
||||
Labels: alert.Labels,
|
||||
Annotations: alert.Annotations,
|
||||
GeneratorURL: g.opts.ExternalURL.String() + strutil.GraphLinkForExpression(rule.vector.String()),
|
||||
}
|
||||
if alert.ResolvedAt != 0 {
|
||||
|
|
|
@ -27,8 +27,8 @@ import (
|
|||
func TestAlertingRule(t *testing.T) {
|
||||
suite, err := promql.NewTest(t, `
|
||||
load 5m
|
||||
http_requests{job="app-server", instance="0", group="canary"} 75 85 95 105 105 95 85
|
||||
http_requests{job="app-server", instance="1", group="canary"} 80 90 100 110 120 130 140
|
||||
http_requests{job="app-server", instance="0", group="canary", severity="overwrite-me"} 75 85 95 105 105 95 85
|
||||
http_requests{job="app-server", instance="1", group="canary", severity="overwrite-me"} 80 90 100 110 120 130 140
|
||||
`)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
|
@ -48,7 +48,7 @@ func TestAlertingRule(t *testing.T) {
|
|||
"HTTPRequestRateLow",
|
||||
expr,
|
||||
time.Minute,
|
||||
model.LabelSet{"severity": "critical"},
|
||||
model.LabelSet{"severity": "{{\"c\"}}ritical"},
|
||||
model.LabelSet{},
|
||||
)
|
||||
|
||||
|
@ -105,7 +105,7 @@ func TestAlertingRule(t *testing.T) {
|
|||
for i, test := range tests {
|
||||
evalTime := model.Time(0).Add(test.time)
|
||||
|
||||
res, err := rule.eval(evalTime, suite.QueryEngine())
|
||||
res, err := rule.eval(evalTime, suite.QueryEngine(), "")
|
||||
if err != nil {
|
||||
t.Fatalf("Error during alerting rule evaluation: %s", err)
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ func (rule RecordingRule) Name() string {
|
|||
}
|
||||
|
||||
// eval evaluates the rule and then overrides the metric names and labels accordingly.
|
||||
func (rule RecordingRule) eval(timestamp model.Time, engine *promql.Engine) (model.Vector, error) {
|
||||
func (rule RecordingRule) eval(timestamp model.Time, engine *promql.Engine, _ string) (model.Vector, error) {
|
||||
query, err := engine.NewInstantQuery(rule.vector.String(), timestamp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -59,7 +59,7 @@ func TestRuleEval(t *testing.T) {
|
|||
|
||||
for _, test := range suite {
|
||||
rule := NewRecordingRule(test.name, test.expr, test.labels)
|
||||
result, err := rule.eval(now, engine)
|
||||
result, err := rule.eval(now, engine, "")
|
||||
if err != nil {
|
||||
t.Fatalf("Error evaluating %s", test.name)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue