diff --git a/docs/configuration/recording_rules.md b/docs/configuration/recording_rules.md index d70ffa0cbb..eda0214b35 100644 --- a/docs/configuration/recording_rules.md +++ b/docs/configuration/recording_rules.md @@ -123,6 +123,10 @@ expr: # Alerts which have not yet fired for long enough are considered pending. [ for: | default = 0s ] +# How long an alert will continue firing after the condition that triggered it +# has cleared. +[ keep_firing_for: | default = 0s ] + # Labels to add or overwrite for each alert. labels: [ : ] diff --git a/model/rulefmt/rulefmt.go b/model/rulefmt/rulefmt.go index f1d5f39257..30b3face0d 100644 --- a/model/rulefmt/rulefmt.go +++ b/model/rulefmt/rulefmt.go @@ -143,22 +143,24 @@ type RuleGroup struct { // Rule describes an alerting or recording rule. type Rule struct { - Record string `yaml:"record,omitempty"` - Alert string `yaml:"alert,omitempty"` - Expr string `yaml:"expr"` - For model.Duration `yaml:"for,omitempty"` - Labels map[string]string `yaml:"labels,omitempty"` - Annotations map[string]string `yaml:"annotations,omitempty"` + Record string `yaml:"record,omitempty"` + Alert string `yaml:"alert,omitempty"` + Expr string `yaml:"expr"` + For model.Duration `yaml:"for,omitempty"` + KeepFiringFor model.Duration `yaml:"keep_firing_for,omitempty"` + Labels map[string]string `yaml:"labels,omitempty"` + Annotations map[string]string `yaml:"annotations,omitempty"` } // RuleNode adds yaml.v3 layer to support line and column outputs for invalid rules. type RuleNode struct { - Record yaml.Node `yaml:"record,omitempty"` - Alert yaml.Node `yaml:"alert,omitempty"` - Expr yaml.Node `yaml:"expr"` - For model.Duration `yaml:"for,omitempty"` - Labels map[string]string `yaml:"labels,omitempty"` - Annotations map[string]string `yaml:"annotations,omitempty"` + Record yaml.Node `yaml:"record,omitempty"` + Alert yaml.Node `yaml:"alert,omitempty"` + Expr yaml.Node `yaml:"expr"` + For model.Duration `yaml:"for,omitempty"` + KeepFiringFor model.Duration `yaml:"keep_firing_for,omitempty"` + Labels map[string]string `yaml:"labels,omitempty"` + Annotations map[string]string `yaml:"annotations,omitempty"` } // Validate the rule and return a list of encountered errors. @@ -208,6 +210,12 @@ func (r *RuleNode) Validate() (nodes []WrappedError) { node: &r.Record, }) } + if r.KeepFiringFor != 0 { + nodes = append(nodes, WrappedError{ + err: fmt.Errorf("invalid field 'keep_firing_for' in recording rule"), + node: &r.Record, + }) + } if !model.IsValidMetricName(model.LabelValue(r.Record.Value)) { nodes = append(nodes, WrappedError{ err: fmt.Errorf("invalid recording rule name: %s", r.Record.Value), diff --git a/model/rulefmt/rulefmt_test.go b/model/rulefmt/rulefmt_test.go index d79ee94e29..d6499538e2 100644 --- a/model/rulefmt/rulefmt_test.go +++ b/model/rulefmt/rulefmt_test.go @@ -73,12 +73,21 @@ func TestParseFileFailure(t *testing.T) { filename: "invalid_label_name.bad.yaml", errMsg: "invalid label name", }, + { + filename: "record_and_for.bad.yaml", + errMsg: "invalid field 'for' in recording rule", + }, + { + filename: "record_and_keep_firing_for.bad.yaml", + errMsg: "invalid field 'keep_firing_for' in recording rule", + }, } for _, c := range table { _, errs := ParseFile(filepath.Join("testdata", c.filename)) require.NotNil(t, errs, "Expected error parsing %s but got none", c.filename) - require.Error(t, errs[0], c.errMsg, "Expected error for %s.", c.filename) + require.Error(t, errs[0]) + require.Containsf(t, errs[0].Error(), c.errMsg, "Expected error for %s.", c.filename) } } diff --git a/model/rulefmt/testdata/record_and_for.bad.yaml b/model/rulefmt/testdata/record_and_for.bad.yaml new file mode 100644 index 0000000000..a15d428a08 --- /dev/null +++ b/model/rulefmt/testdata/record_and_for.bad.yaml @@ -0,0 +1,6 @@ +groups: + - name: yolo + rules: + - record: Hello + expr: 1 + for: 1m diff --git a/model/rulefmt/testdata/record_and_keep_firing_for.bad.yaml b/model/rulefmt/testdata/record_and_keep_firing_for.bad.yaml new file mode 100644 index 0000000000..eb8192f05c --- /dev/null +++ b/model/rulefmt/testdata/record_and_keep_firing_for.bad.yaml @@ -0,0 +1,6 @@ +groups: + - name: yolo + rules: + - record: Hello + expr: 1 + keep_firing_for: 1m diff --git a/rules/alerting.go b/rules/alerting.go index d456662669..a7c94f7a7d 100644 --- a/rules/alerting.go +++ b/rules/alerting.go @@ -83,11 +83,12 @@ type Alert struct { Value float64 // The interval during which the condition of this alert held true. // ResolvedAt will be 0 to indicate a still active alert. - ActiveAt time.Time - FiredAt time.Time - ResolvedAt time.Time - LastSentAt time.Time - ValidUntil time.Time + ActiveAt time.Time + FiredAt time.Time + ResolvedAt time.Time + LastSentAt time.Time + ValidUntil time.Time + KeepFiringSince time.Time } func (a *Alert) needsSending(ts time.Time, resendDelay time.Duration) bool { @@ -112,6 +113,9 @@ type AlertingRule struct { // The duration for which a labelset needs to persist in the expression // output vector before an alert transitions from Pending to Firing state. holdDuration time.Duration + // The amount of time that the alert should remain firing after the + // resolution. + keepFiringFor time.Duration // Extra labels to attach to the resulting alert sample vectors. labels labels.Labels // Non-identifying key/value pairs. @@ -142,7 +146,7 @@ type AlertingRule struct { // NewAlertingRule constructs a new AlertingRule. func NewAlertingRule( - name string, vec parser.Expr, hold time.Duration, + name string, vec parser.Expr, hold, keepFiringFor time.Duration, labels, annotations, externalLabels labels.Labels, externalURL string, restored bool, logger log.Logger, ) *AlertingRule { @@ -152,6 +156,7 @@ func NewAlertingRule( name: name, vector: vec, holdDuration: hold, + keepFiringFor: keepFiringFor, labels: labels, annotations: annotations, externalLabels: el, @@ -201,6 +206,12 @@ func (r *AlertingRule) HoldDuration() time.Duration { return r.holdDuration } +// KeepFiringFor returns the duration an alerting rule should keep firing for +// after resolution. +func (r *AlertingRule) KeepFiringFor() time.Duration { + return r.keepFiringFor +} + // Labels returns the labels of the alerting rule. func (r *AlertingRule) Labels() labels.Labels { return r.labels @@ -404,16 +415,36 @@ func (r *AlertingRule) Eval(ctx context.Context, ts time.Time, query QueryFunc, // Check if any pending alerts should be removed or fire now. Write out alert timeseries. for fp, a := range r.active { if _, ok := resultFPs[fp]; !ok { + // There is no firing alerts for this fingerprint. The alert is no + // longer firing. + + // Use keepFiringFor value to determine if the alert should keep + // firing. + var keepFiring bool + if a.State == StateFiring && r.keepFiringFor > 0 { + if a.KeepFiringSince.IsZero() { + a.KeepFiringSince = ts + } + if ts.Sub(a.KeepFiringSince) < r.keepFiringFor { + keepFiring = true + } + } + // If the alert was previously firing, keep it around for a given // retention time so it is reported as resolved to the AlertManager. if a.State == StatePending || (!a.ResolvedAt.IsZero() && ts.Sub(a.ResolvedAt) > resolvedRetention) { delete(r.active, fp) } - if a.State != StateInactive { + if a.State != StateInactive && !keepFiring { a.State = StateInactive a.ResolvedAt = ts } - continue + if !keepFiring { + continue + } + } else { + // The alert is firing, reset keepFiringSince. + a.KeepFiringSince = time.Time{} } numActivePending++ @@ -512,11 +543,12 @@ func (r *AlertingRule) sendAlerts(ctx context.Context, ts time.Time, resendDelay func (r *AlertingRule) String() string { ar := rulefmt.Rule{ - Alert: r.name, - Expr: r.vector.String(), - For: model.Duration(r.holdDuration), - Labels: r.labels.Map(), - Annotations: r.annotations.Map(), + Alert: r.name, + Expr: r.vector.String(), + For: model.Duration(r.holdDuration), + KeepFiringFor: model.Duration(r.keepFiringFor), + Labels: r.labels.Map(), + Annotations: r.annotations.Map(), } byt, err := yaml.Marshal(ar) diff --git a/rules/alerting_test.go b/rules/alerting_test.go index 4f5f5e683a..036bfae1cf 100644 --- a/rules/alerting_test.go +++ b/rules/alerting_test.go @@ -66,7 +66,7 @@ func TestAlertingRuleState(t *testing.T) { } for i, test := range tests { - rule := NewAlertingRule(test.name, nil, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil) + rule := NewAlertingRule(test.name, nil, 0, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil) rule.active = test.active got := rule.State() require.Equal(t, test.want, got, "test case %d unexpected AlertState, want:%d got:%d", i, test.want, got) @@ -76,7 +76,7 @@ func TestAlertingRuleState(t *testing.T) { func TestAlertingRuleLabelsUpdate(t *testing.T) { suite, err := promql.NewTest(t, ` load 1m - http_requests{job="app-server", instance="0"} 75 85 70 70 + http_requests{job="app-server", instance="0"} 75 85 70 70 stale `) require.NoError(t, err) defer suite.Close() @@ -90,6 +90,7 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) { "HTTPRequestRateLow", expr, time.Minute, + 0, // Basing alerting rule labels off of a value that can change is a very bad idea. // If an alert is going back and forth between two label values it will never fire. // Instead, you should write two alerts with constant labels. @@ -173,6 +174,10 @@ func TestAlertingRuleLabelsUpdate(t *testing.T) { require.Equal(t, result, filteredRes) } + evalTime := baseTime.Add(time.Duration(len(results)) * time.Minute) + res, err := rule.Eval(suite.Context(), evalTime, EngineQueryFunc(suite.QueryEngine(), suite.Storage()), nil, 0) + require.NoError(t, err) + require.Equal(t, 0, len(res)) } func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { @@ -192,6 +197,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { "ExternalLabelDoesNotExist", expr, time.Minute, + 0, labels.FromStrings("templated_label", "There are {{ len $externalLabels }} external Labels, of which foo is {{ $externalLabels.foo }}."), labels.EmptyLabels(), labels.EmptyLabels(), @@ -202,6 +208,7 @@ func TestAlertingRuleExternalLabelsInTemplate(t *testing.T) { "ExternalLabelExists", expr, time.Minute, + 0, labels.FromStrings("templated_label", "There are {{ len $externalLabels }} external Labels, of which foo is {{ $externalLabels.foo }}."), labels.EmptyLabels(), labels.FromStrings("foo", "bar", "dings", "bums"), @@ -286,6 +293,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { "ExternalURLDoesNotExist", expr, time.Minute, + 0, labels.FromStrings("templated_label", "The external URL is {{ $externalURL }}."), labels.EmptyLabels(), labels.EmptyLabels(), @@ -296,6 +304,7 @@ func TestAlertingRuleExternalURLInTemplate(t *testing.T) { "ExternalURLExists", expr, time.Minute, + 0, labels.FromStrings("templated_label", "The external URL is {{ $externalURL }}."), labels.EmptyLabels(), labels.EmptyLabels(), @@ -380,6 +389,7 @@ func TestAlertingRuleEmptyLabelFromTemplate(t *testing.T) { "EmptyLabel", expr, time.Minute, + 0, labels.FromStrings("empty_label", ""), labels.EmptyLabels(), labels.EmptyLabels(), @@ -436,6 +446,7 @@ func TestAlertingRuleQueryInTemplate(t *testing.T) { "ruleWithQueryInTemplate", expr, time.Minute, + 0, labels.FromStrings("label", "value"), labels.FromStrings("templated_label", `{{- with "sort(sum(http_requests) by (instance))" | query -}} {{- range $i,$v := . -}} @@ -480,7 +491,7 @@ instance: {{ $v.Labels.instance }}, value: {{ printf "%.0f" $v.Value }}; func BenchmarkAlertingRuleAtomicField(b *testing.B) { b.ReportAllocs() - rule := NewAlertingRule("bench", nil, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil) + rule := NewAlertingRule("bench", nil, 0, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil) done := make(chan struct{}) go func() { for i := 0; i < b.N; i++ { @@ -518,6 +529,7 @@ func TestAlertingRuleDuplicate(t *testing.T) { "foo", expr, time.Minute, + 0, labels.FromStrings("test", "test"), labels.EmptyLabels(), labels.EmptyLabels(), @@ -564,6 +576,7 @@ func TestAlertingRuleLimit(t *testing.T) { "foo", expr, time.Minute, + 0, labels.FromStrings("test", "test"), labels.EmptyLabels(), labels.EmptyLabels(), @@ -636,6 +649,7 @@ func TestQueryForStateSeries(t *testing.T) { "TestRule", nil, time.Minute, + 0, labels.FromStrings("severity", "critical"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) @@ -669,6 +683,7 @@ func TestSendAlertsDontAffectActiveAlerts(t *testing.T) { "TestRule", nil, time.Minute, + 0, labels.FromStrings("severity", "critical"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) @@ -712,3 +727,171 @@ func TestSendAlertsDontAffectActiveAlerts(t *testing.T) { // But the labels with the AlertingRule should not be changed. require.Equal(t, labels.FromStrings("a1", "1"), rule.active[h].Labels) } + +func TestKeepFiringFor(t *testing.T) { + suite, err := promql.NewTest(t, ` + load 1m + http_requests{job="app-server", instance="0"} 75 85 70 70 10x5 + `) + require.NoError(t, err) + defer suite.Close() + + require.NoError(t, suite.Run()) + + expr, err := parser.ParseExpr(`http_requests > 50`) + require.NoError(t, err) + + rule := NewAlertingRule( + "HTTPRequestRateHigh", + expr, + time.Minute, + time.Minute, + labels.EmptyLabels(), + labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, + ) + + results := []promql.Vector{ + { + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS", + "alertname", "HTTPRequestRateHigh", + "alertstate", "pending", + "instance", "0", + "job", "app-server", + ), + Point: promql.Point{V: 1}, + }, + }, + { + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS", + "alertname", "HTTPRequestRateHigh", + "alertstate", "firing", + "instance", "0", + "job", "app-server", + ), + Point: promql.Point{V: 1}, + }, + }, + { + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS", + "alertname", "HTTPRequestRateHigh", + "alertstate", "firing", + "instance", "0", + "job", "app-server", + ), + Point: promql.Point{V: 1}, + }, + }, + { + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS", + "alertname", "HTTPRequestRateHigh", + "alertstate", "firing", + "instance", "0", + "job", "app-server", + ), + Point: promql.Point{V: 1}, + }, + }, + // From now on the alert should keep firing. + { + promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS", + "alertname", "HTTPRequestRateHigh", + "alertstate", "firing", + "instance", "0", + "job", "app-server", + ), + Point: promql.Point{V: 1}, + }, + }, + } + + baseTime := time.Unix(0, 0) + for i, result := range results { + t.Logf("case %d", i) + evalTime := baseTime.Add(time.Duration(i) * time.Minute) + result[0].Point.T = timestamp.FromTime(evalTime) + res, err := rule.Eval(suite.Context(), evalTime, EngineQueryFunc(suite.QueryEngine(), suite.Storage()), nil, 0) + require.NoError(t, err) + + var filteredRes promql.Vector // After removing 'ALERTS_FOR_STATE' samples. + for _, smpl := range res { + smplName := smpl.Metric.Get("__name__") + if smplName == "ALERTS" { + filteredRes = append(filteredRes, smpl) + } else { + // If not 'ALERTS', it has to be 'ALERTS_FOR_STATE'. + require.Equal(t, "ALERTS_FOR_STATE", smplName) + } + } + + require.Equal(t, result, filteredRes) + } + evalTime := baseTime.Add(time.Duration(len(results)) * time.Minute) + res, err := rule.Eval(suite.Context(), evalTime, EngineQueryFunc(suite.QueryEngine(), suite.Storage()), nil, 0) + require.NoError(t, err) + require.Equal(t, 0, len(res)) +} + +func TestPendingAndKeepFiringFor(t *testing.T) { + suite, err := promql.NewTest(t, ` + load 1m + http_requests{job="app-server", instance="0"} 75 10x10 + `) + require.NoError(t, err) + defer suite.Close() + + require.NoError(t, suite.Run()) + + expr, err := parser.ParseExpr(`http_requests > 50`) + require.NoError(t, err) + + rule := NewAlertingRule( + "HTTPRequestRateHigh", + expr, + time.Minute, + time.Minute, + labels.EmptyLabels(), + labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, + ) + + result := promql.Sample{ + Metric: labels.FromStrings( + "__name__", "ALERTS", + "alertname", "HTTPRequestRateHigh", + "alertstate", "pending", + "instance", "0", + "job", "app-server", + ), + Point: promql.Point{V: 1}, + } + + baseTime := time.Unix(0, 0) + result.Point.T = timestamp.FromTime(baseTime) + res, err := rule.Eval(suite.Context(), baseTime, EngineQueryFunc(suite.QueryEngine(), suite.Storage()), nil, 0) + require.NoError(t, err) + + require.Len(t, res, 2) + for _, smpl := range res { + smplName := smpl.Metric.Get("__name__") + if smplName == "ALERTS" { + require.Equal(t, result, smpl) + } else { + // If not 'ALERTS', it has to be 'ALERTS_FOR_STATE'. + require.Equal(t, "ALERTS_FOR_STATE", smplName) + } + } + + evalTime := baseTime.Add(time.Minute) + res, err = rule.Eval(suite.Context(), evalTime, EngineQueryFunc(suite.QueryEngine(), suite.Storage()), nil, 0) + require.NoError(t, err) + require.Equal(t, 0, len(res)) +} diff --git a/rules/manager.go b/rules/manager.go index d1ad8afdc5..6f6ce2cfe4 100644 --- a/rules/manager.go +++ b/rules/manager.go @@ -1119,6 +1119,7 @@ func (m *Manager) LoadGroups( r.Alert.Value, expr, time.Duration(r.For), + time.Duration(r.KeepFiringFor), labels.FromMap(r.Labels), labels.FromMap(r.Annotations), externalLabels, diff --git a/rules/manager_test.go b/rules/manager_test.go index 788aa0af38..6f0dd0ddaa 100644 --- a/rules/manager_test.go +++ b/rules/manager_test.go @@ -66,6 +66,7 @@ func TestAlertingRule(t *testing.T) { "HTTPRequestRateLow", expr, time.Minute, + 0, labels.FromStrings("severity", "{{\"c\"}}ritical"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) @@ -209,6 +210,7 @@ func TestForStateAddSamples(t *testing.T) { "HTTPRequestRateLow", expr, time.Minute, + 0, labels.FromStrings("severity", "{{\"c\"}}ritical"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) @@ -383,6 +385,7 @@ func TestForStateRestore(t *testing.T) { "HTTPRequestRateLow", expr, alertForDuration, + 0, labels.FromStrings("severity", "critical"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil, ) @@ -449,6 +452,7 @@ func TestForStateRestore(t *testing.T) { "HTTPRequestRateLow", expr, alertForDuration, + 0, labels.FromStrings("severity", "critical"), labels.EmptyLabels(), labels.EmptyLabels(), "", false, nil, ) @@ -615,13 +619,13 @@ func readSeriesSet(ss storage.SeriesSet) (map[string][]promql.Point, error) { func TestCopyState(t *testing.T) { oldGroup := &Group{ rules: []Rule{ - NewAlertingRule("alert", nil, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil), + NewAlertingRule("alert", nil, 0, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil), NewRecordingRule("rule1", nil, labels.EmptyLabels()), NewRecordingRule("rule2", nil, labels.EmptyLabels()), NewRecordingRule("rule3", nil, labels.FromStrings("l1", "v1")), NewRecordingRule("rule3", nil, labels.FromStrings("l1", "v2")), NewRecordingRule("rule3", nil, labels.FromStrings("l1", "v3")), - NewAlertingRule("alert2", nil, 0, labels.FromStrings("l2", "v1"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil), + NewAlertingRule("alert2", nil, 0, 0, labels.FromStrings("l2", "v1"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil), }, seriesInPreviousEval: []map[string]labels.Labels{ {}, @@ -640,10 +644,10 @@ func TestCopyState(t *testing.T) { NewRecordingRule("rule3", nil, labels.FromStrings("l1", "v0")), NewRecordingRule("rule3", nil, labels.FromStrings("l1", "v1")), NewRecordingRule("rule3", nil, labels.FromStrings("l1", "v2")), - NewAlertingRule("alert", nil, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil), + NewAlertingRule("alert", nil, 0, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil), NewRecordingRule("rule1", nil, labels.EmptyLabels()), - NewAlertingRule("alert2", nil, 0, labels.FromStrings("l2", "v0"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil), - NewAlertingRule("alert2", nil, 0, labels.FromStrings("l2", "v1"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil), + NewAlertingRule("alert2", nil, 0, 0, labels.FromStrings("l2", "v0"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil), + NewAlertingRule("alert2", nil, 0, 0, labels.FromStrings("l2", "v1"), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil), NewRecordingRule("rule4", nil, labels.EmptyLabels()), }, seriesInPreviousEval: make([]map[string]labels.Labels, 8), @@ -875,7 +879,7 @@ func TestNotify(t *testing.T) { expr, err := parser.ParseExpr("a > 1") require.NoError(t, err) - rule := NewAlertingRule("aTooHigh", expr, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger()) + rule := NewAlertingRule("aTooHigh", expr, 0, 0, labels.Labels{}, labels.Labels{}, labels.EmptyLabels(), "", true, log.NewNopLogger()) group := NewGroup(GroupOptions{ Name: "alert", Interval: time.Second, @@ -1147,7 +1151,7 @@ func TestGroupHasAlertingRules(t *testing.T) { group: &Group{ name: "HasAlertingRule", rules: []Rule{ - NewAlertingRule("alert", nil, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil), + NewAlertingRule("alert", nil, 0, 0, labels.EmptyLabels(), labels.EmptyLabels(), labels.EmptyLabels(), "", true, nil), NewRecordingRule("record", nil, labels.EmptyLabels()), }, }, diff --git a/web/api/v1/api.go b/web/api/v1/api.go index 894a8666a6..f560693d40 100644 --- a/web/api/v1/api.go +++ b/web/api/v1/api.go @@ -1111,11 +1111,12 @@ type AlertDiscovery struct { // Alert has info for an alert. type Alert struct { - Labels labels.Labels `json:"labels"` - Annotations labels.Labels `json:"annotations"` - State string `json:"state"` - ActiveAt *time.Time `json:"activeAt,omitempty"` - Value string `json:"value"` + Labels labels.Labels `json:"labels"` + Annotations labels.Labels `json:"annotations"` + State string `json:"state"` + ActiveAt *time.Time `json:"activeAt,omitempty"` + KeepFiringSince *time.Time `json:"keepFiringSince,omitempty"` + Value string `json:"value"` } func (api *API) alerts(r *http.Request) apiFuncResult { @@ -1144,6 +1145,9 @@ func rulesAlertsToAPIAlerts(rulesAlerts []*rules.Alert) []*Alert { ActiveAt: &ruleAlert.ActiveAt, Value: strconv.FormatFloat(ruleAlert.Value, 'e', -1, 64), } + if !ruleAlert.KeepFiringSince.IsZero() { + apiAlerts[i].KeepFiringSince = &ruleAlert.KeepFiringSince + } } return apiAlerts @@ -1241,6 +1245,7 @@ type AlertingRule struct { Name string `json:"name"` Query string `json:"query"` Duration float64 `json:"duration"` + KeepFiringFor float64 `json:"keepFiringFor"` Labels labels.Labels `json:"labels"` Annotations labels.Labels `json:"annotations"` Alerts []*Alert `json:"alerts"` @@ -1303,6 +1308,7 @@ func (api *API) rules(r *http.Request) apiFuncResult { Name: rule.Name(), Query: rule.Query().String(), Duration: rule.HoldDuration().Seconds(), + KeepFiringFor: rule.KeepFiringFor().Seconds(), Labels: rule.Labels(), Annotations: rule.Annotations(), Alerts: rulesAlertsToAPIAlerts(rule.ActiveAlerts()), diff --git a/web/api/v1/api_test.go b/web/api/v1/api_test.go index 7e2dcbd8bb..919fad34b8 100644 --- a/web/api/v1/api_test.go +++ b/web/api/v1/api_test.go @@ -209,6 +209,7 @@ func (m rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { "test_metric3", expr1, time.Second, + 0, labels.Labels{}, labels.Labels{}, labels.Labels{}, @@ -220,6 +221,7 @@ func (m rulesRetrieverMock) AlertingRules() []*rules.AlertingRule { "test_metric4", expr2, time.Second, + 0, labels.Labels{}, labels.Labels{}, labels.Labels{}, diff --git a/web/ui/react-app/src/pages/alerts/AlertContents.tsx b/web/ui/react-app/src/pages/alerts/AlertContents.tsx index 2837399f7c..a619f69fc6 100644 --- a/web/ui/react-app/src/pages/alerts/AlertContents.tsx +++ b/web/ui/react-app/src/pages/alerts/AlertContents.tsx @@ -29,6 +29,7 @@ export interface Alert { value: string; annotations: Record; activeAt: string; + keepFiringSince: string; } interface RuleGroup { diff --git a/web/ui/react-app/src/pages/alerts/CollapsibleAlertPanel.tsx b/web/ui/react-app/src/pages/alerts/CollapsibleAlertPanel.tsx index ef45c205f9..676046efd5 100644 --- a/web/ui/react-app/src/pages/alerts/CollapsibleAlertPanel.tsx +++ b/web/ui/react-app/src/pages/alerts/CollapsibleAlertPanel.tsx @@ -43,6 +43,11 @@ const CollapsibleAlertPanel: FC = ({ rule, showAnnot
for: {formatDuration(rule.duration * 1000)}
)} + {rule.keepFiringFor > 0 && ( +
+
keep_firing_for: {formatDuration(rule.keepFiringFor * 1000)}
+
+ )} {rule.labels && Object.keys(rule.labels).length > 0 && (
labels:
@@ -91,9 +96,14 @@ const CollapsibleAlertPanel: FC = ({ rule, showAnnot
- + {alert.state} + {alert.keepFiringSince && ( + + Stabilizing + + )}
{alert.activeAt} diff --git a/web/ui/react-app/src/pages/rules/RulesContent.tsx b/web/ui/react-app/src/pages/rules/RulesContent.tsx index e7adfee39a..ef4a7ad8f8 100644 --- a/web/ui/react-app/src/pages/rules/RulesContent.tsx +++ b/web/ui/react-app/src/pages/rules/RulesContent.tsx @@ -96,6 +96,11 @@ export const RulesContent: FC = ({ response }) => { for: {formatDuration(r.duration * 1000)}
)} + {r.keepFiringFor > 0 && ( +
+ keep_firing_for: {formatDuration(r.keepFiringFor * 1000)} +
+ )} {r.labels && Object.keys(r.labels).length > 0 && (
labels: diff --git a/web/ui/react-app/src/types/types.ts b/web/ui/react-app/src/types/types.ts index 21f52b5fa2..a30439c77e 100644 --- a/web/ui/react-app/src/types/types.ts +++ b/web/ui/react-app/src/types/types.ts @@ -26,6 +26,7 @@ export type Rule = { alerts: Alert[]; annotations: Record; duration: number; + keepFiringFor: number; evaluationTime: string; health: string; labels: Record;