Implement alerts dashboard and expression console links.

This commit is contained in:
Julius Volz 2013-06-13 16:10:05 +02:00 committed by Julius Volz
parent b838f45616
commit 0226d1ac7a
20 changed files with 321 additions and 39 deletions

View File

@ -230,7 +230,7 @@ func main() {
PrometheusStatus: &web.PrometheusStatus{
BuildInfo: BuildInfo,
Config: conf.String(),
Rules: ruleManager.Rules(),
RuleManager: ruleManager,
TargetPools: targetManager.Pools(),
Flags: flags,
Birth: time.Now(),
@ -238,6 +238,10 @@ func main() {
CurationState: curationState,
}
alertsHandler := &web.AlertsHandler{
RuleManager: ruleManager,
}
databasesHandler := &web.DatabasesHandler{
Incoming: databaseStates,
}
@ -252,6 +256,7 @@ func main() {
StatusHandler: statusHandler,
MetricsHandler: metricsService,
DatabasesHandler: databasesHandler,
AlertsHandler: alertsHandler,
}
prometheus := prometheus{

View File

@ -94,6 +94,14 @@ func (l LabelSet) ToMetric() Metric {
return metric
}
func (m Metric) ToLabelSet() LabelSet {
labels := LabelSet{}
for label, value := range m {
labels[label] = value
}
return labels
}
// A Metric is similar to a LabelSet, but the key difference is that a Metric is
// a singleton and refers to one and only one stream of samples.
type Metric map[LabelName]LabelValue

View File

@ -15,19 +15,24 @@ package rules
import (
"fmt"
"html/template"
"sync"
"time"
"github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/stats"
"github.com/prometheus/prometheus/storage/metric"
"github.com/prometheus/prometheus/utility"
"time"
)
// States that active alerts can be in.
type alertState int
type AlertState int
func (s alertState) String() string {
func (s AlertState) String() string {
switch s {
case INACTIVE:
return "inactive"
case PENDING:
return "pending"
case FIRING:
@ -38,32 +43,35 @@ func (s alertState) String() string {
}
const (
PENDING alertState = iota
INACTIVE AlertState = iota
PENDING
FIRING
)
// alert is used to track active (pending/firing) alerts over time.
type alert struct {
// Alert is used to track active (pending/firing) alerts over time.
type Alert struct {
// The name of the alert.
name string
Name string
// The vector element labelset triggering this alert.
metric model.Metric
Labels model.LabelSet
// The state of the alert (PENDING or FIRING).
state alertState
State AlertState
// The time when the alert first transitioned into PENDING state.
activeSince time.Time
ActiveSince time.Time
// The value of the alert expression for this vector element.
Value model.SampleValue
}
// sample returns a Sample suitable for recording the alert.
func (a alert) sample(timestamp time.Time, value model.SampleValue) model.Sample {
func (a Alert) sample(timestamp time.Time, value model.SampleValue) model.Sample {
recordedMetric := model.Metric{}
for label, value := range a.metric {
for label, value := range a.Labels {
recordedMetric[label] = value
}
recordedMetric[model.MetricNameLabel] = model.AlertMetricName
recordedMetric[model.AlertNameLabel] = model.LabelValue(a.name)
recordedMetric[model.AlertStateLabel] = model.LabelValue(a.state.String())
recordedMetric[model.AlertNameLabel] = model.LabelValue(a.Name)
recordedMetric[model.AlertStateLabel] = model.LabelValue(a.State.String())
return model.Sample{
Metric: recordedMetric,
@ -83,37 +91,51 @@ type AlertingRule struct {
holdDuration time.Duration
// Extra labels to attach to the resulting alert sample vectors.
labels model.LabelSet
// Protects the below.
mutex sync.Mutex
// A map of alerts which are currently active (PENDING or FIRING), keyed by
// the fingerprint of the labelset they correspond to.
activeAlerts map[model.Fingerprint]*alert
activeAlerts map[model.Fingerprint]*Alert
}
func (rule AlertingRule) Name() string { return rule.name }
func (rule *AlertingRule) Name() string { return rule.name }
func (rule AlertingRule) EvalRaw(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) {
func (rule *AlertingRule) EvalRaw(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) {
return ast.EvalVectorInstant(rule.vector, timestamp, storage, stats.NewTimerGroup())
}
func (rule AlertingRule) Eval(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) {
func (rule *AlertingRule) Eval(timestamp time.Time, storage *metric.TieredStorage) (ast.Vector, error) {
// Get the raw value of the rule expression.
exprResult, err := rule.EvalRaw(timestamp, storage)
if err != nil {
return nil, err
}
rule.mutex.Lock()
defer rule.mutex.Unlock()
// Create pending alerts for any new vector elements in the alert expression.
resultFingerprints := utility.Set{}
for _, sample := range exprResult {
fp := *model.NewFingerprintFromMetric(sample.Metric)
resultFingerprints.Add(fp)
if _, ok := rule.activeAlerts[fp]; !ok {
rule.activeAlerts[fp] = &alert{
name: rule.name,
metric: sample.Metric,
state: PENDING,
activeSince: timestamp,
alert, ok := rule.activeAlerts[fp]
if !ok {
labels := sample.Metric.ToLabelSet()
if _, ok := labels[model.MetricNameLabel]; ok {
delete(labels, model.MetricNameLabel)
}
rule.activeAlerts[fp] = &Alert{
Name: rule.name,
Labels: sample.Metric.ToLabelSet(),
State: PENDING,
ActiveSince: timestamp,
Value: sample.Value,
}
} else {
alert.Value = sample.Value
}
}
@ -127,9 +149,9 @@ func (rule AlertingRule) Eval(timestamp time.Time, storage *metric.TieredStorage
continue
}
if activeAlert.state == PENDING && timestamp.Sub(activeAlert.activeSince) >= rule.holdDuration {
if activeAlert.State == PENDING && timestamp.Sub(activeAlert.ActiveSince) >= rule.holdDuration {
vector = append(vector, activeAlert.sample(timestamp, 0))
activeAlert.state = FIRING
activeAlert.State = FIRING
}
vector = append(vector, activeAlert.sample(timestamp, 1))
@ -138,7 +160,7 @@ func (rule AlertingRule) Eval(timestamp time.Time, storage *metric.TieredStorage
return vector, nil
}
func (rule AlertingRule) ToDotGraph() string {
func (rule *AlertingRule) ToDotGraph() string {
graph := fmt.Sprintf(`digraph "Rules" {
%#p[shape="box",label="ALERT %s IF FOR %s"];
%#p -> %#p;
@ -147,8 +169,47 @@ func (rule AlertingRule) ToDotGraph() string {
return graph
}
func (rule AlertingRule) String() string {
return fmt.Sprintf("ALERT %s IF %s FOR %s WITH %s\n", rule.name, rule.vector, utility.DurationToString(rule.holdDuration), rule.labels)
func (rule *AlertingRule) String() string {
return fmt.Sprintf("ALERT %s IF %s FOR %s WITH %s", rule.name, rule.vector, utility.DurationToString(rule.holdDuration), rule.labels)
}
func (rule *AlertingRule) HTMLSnippet() template.HTML {
alertMetric := model.Metric{
model.MetricNameLabel: model.AlertMetricName,
model.AlertNameLabel: model.LabelValue(rule.name),
}
return template.HTML(fmt.Sprintf(
`ALERT <a href="%s">%s</a> IF <a href="%s">%s</a> FOR %s WITH %s`,
ConsoleLinkForExpression(alertMetric.String()),
rule.name,
ConsoleLinkForExpression(rule.vector.String()),
rule.vector,
utility.DurationToString(rule.holdDuration),
rule.labels))
}
func (rule *AlertingRule) State() AlertState {
rule.mutex.Lock()
defer rule.mutex.Unlock()
maxState := INACTIVE
for _, activeAlert := range rule.activeAlerts {
if activeAlert.State > maxState {
maxState = activeAlert.State
}
}
return maxState
}
func (rule *AlertingRule) ActiveAlerts() []Alert {
rule.mutex.Lock()
defer rule.mutex.Unlock()
alerts := make([]Alert, 0, len(rule.activeAlerts))
for _, alert := range rule.activeAlerts {
alerts = append(alerts, *alert)
}
return alerts
}
// Construct a new AlertingRule.
@ -158,6 +219,6 @@ func NewAlertingRule(name string, vector ast.VectorNode, holdDuration time.Durat
vector: vector,
holdDuration: holdDuration,
labels: labels,
activeAlerts: map[model.Fingerprint]*alert{},
activeAlerts: map[model.Fingerprint]*Alert{},
}
}

View File

@ -321,7 +321,12 @@ func (node *VectorFunctionCall) String() string {
}
func (node *VectorAggregation) String() string {
return fmt.Sprintf("%s(%s) BY (%s)", node.aggrType, node.vector, node.groupBy)
aggrString := fmt.Sprintf("%s(%s)", node.aggrType, node.vector)
if len(node.groupBy) > 0 {
return fmt.Sprintf("%s BY (%s)", aggrString, node.groupBy)
} else {
return aggrString
}
}
func (node *VectorArithExpr) String() string {

View File

@ -15,6 +15,8 @@ package rules
import (
"fmt"
"html"
"github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/utility"
@ -111,3 +113,7 @@ func NewMatrix(vector ast.Node, intervalStr string) (ast.MatrixNode, error) {
vectorLiteral := vector.(*ast.VectorLiteral)
return ast.NewMatrixLiteral(vectorLiteral, interval), nil
}
func ConsoleLinkForExpression(expr string) string {
return html.EscapeString(fmt.Sprintf(`graph#[{"expr":%q,"tab":1}]`, expr))
}

View File

@ -28,9 +28,16 @@ type Result struct {
}
type RuleManager interface {
// Load and add rules from rule files specified in the configuration.
AddRulesFromConfig(config config.Config) error
// Start the rule manager's periodic rule evaluation.
Run()
// Stop the rule manager's rule evaluation cycles.
Stop()
// Return all rules.
Rules() []Rule
// Return all alerting rules.
AlertingRules() []*AlertingRule
}
type ruleManager struct {
@ -127,3 +134,16 @@ func (m *ruleManager) Rules() []Rule {
copy(rules, m.rules)
return rules
}
func (m *ruleManager) AlertingRules() []*AlertingRule {
m.Lock()
defer m.Unlock()
alerts := []*AlertingRule{}
for _, rule := range m.rules {
if alertingRule, ok := rule.(*AlertingRule); ok {
alerts = append(alerts, alertingRule)
}
}
return alerts
}

View File

@ -15,11 +15,13 @@ package rules
import (
"fmt"
"html/template"
"time"
"github.com/prometheus/prometheus/model"
"github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/stats"
"github.com/prometheus/prometheus/storage/metric"
"time"
)
// A RecordingRule records its vector expression into new timeseries.
@ -71,6 +73,17 @@ func (rule RecordingRule) String() string {
return fmt.Sprintf("%s%s = %s\n", rule.name, rule.labels, rule.vector)
}
func (rule RecordingRule) HTMLSnippet() template.HTML {
ruleExpr := rule.vector.String()
return template.HTML(fmt.Sprintf(
`<a href="%s">%s</a>%s = <a href="%s">%s</a>`,
ConsoleLinkForExpression(rule.name),
rule.name,
rule.labels,
ConsoleLinkForExpression(ruleExpr),
ruleExpr))
}
// Construct a new RecordingRule.
func NewRecordingRule(name string, labels model.LabelSet, vector ast.VectorNode, permanent bool) *RecordingRule {
return &RecordingRule{

View File

@ -14,9 +14,11 @@
package rules
import (
"html/template"
"time"
"github.com/prometheus/prometheus/rules/ast"
"github.com/prometheus/prometheus/storage/metric"
"time"
)
// A Rule encapsulates a vector expression which is evaluated at a specified
@ -33,4 +35,7 @@ type Rule interface {
ToDotGraph() string
// String returns a human-readable string representation of the rule.
String() string
// HTMLSnippet returns a human-readable string representation of the rule,
// decorated with HTML elements for use the web frontend.
HTMLSnippet() template.HTML
}

View File

@ -13,7 +13,7 @@
all: blob-stamp
blob-stamp: static/generated/protocol_buffer.descriptor
blob-stamp: static/generated/protocol_buffer.descriptor templates/*
$(MAKE) -C blob
touch $@

61
web/alerts.go Normal file
View File

@ -0,0 +1,61 @@
// Copyright 2013 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package web
import (
"github.com/prometheus/prometheus/rules"
"net/http"
"sort"
"sync"
)
type AlertStatus struct {
AlertingRules []*rules.AlertingRule
}
type AlertsHandler struct {
RuleManager rules.RuleManager
mutex sync.Mutex
}
type byAlertStateSorter struct {
alerts []*rules.AlertingRule
}
func (s byAlertStateSorter) Len() int {
return len(s.alerts)
}
func (s byAlertStateSorter) Less(i, j int) bool {
return s.alerts[i].State() > s.alerts[j].State()
}
func (s byAlertStateSorter) Swap(i, j int) {
s.alerts[i], s.alerts[j] = s.alerts[j], s.alerts[i]
}
func (h *AlertsHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
h.mutex.Lock()
defer h.mutex.Unlock()
alerts := h.RuleManager.AlertingRules()
alertsSorter := byAlertStateSorter{alerts: alerts}
sort.Sort(alertsSorter)
alertStatus := AlertStatus{
AlertingRules: alertsSorter.alerts,
}
executeTemplate(w, "alerts", alertStatus)
}

48
web/static/css/alerts.css Normal file
View File

@ -0,0 +1,48 @@
.alert_wrapper {
padding: 2px;
}
.alert_header {
padding: 3px;
cursor: pointer;
}
.alert_content {
padding: 3px;
display: none;
}
.alert_header.firing {
background-color: #ff7673;
}
.alert_header.pending {
background-color: #ffcf40;
}
.alert_header.inactive {
background-color: #92ed6b;
}
.alert_description {
margin-left: 3px;
padding: 8px 0 8px 0;
}
.alert_active_elements {
border: 1px solid #dddddd;
}
.alert_active_elements th {
background-color: #dddddd;
padding: 0 5px 0 5px;
}
.alert_active_elements td {
background-color: #eebbbb;
padding: 0 5px 0 5px;
}
.alert_active_elements tr:hover td {
background-color: #ffcf40;
}

5
web/static/js/alerts.js Normal file
View File

@ -0,0 +1,5 @@
function init() {
$(".alert_header").click(function() {$(this).next().toggle(); });
}
$(init);

View File

@ -519,6 +519,7 @@ function parseGraphOptionsFromUrl() {
return options;
}
// NOTE: This needs to be kept in sync with rules/helpers.go:ConsoleLinkForExpression!
function storeGraphOptionsInUrl() {
var allGraphsOptions = [];
for (var i = 0; i < graphs.length; i++) {
@ -559,4 +560,5 @@ function init() {
}
})
}
$(init);

View File

@ -27,7 +27,7 @@ type PrometheusStatus struct {
Config string
Curation metric.CurationState
Flags map[string]string
Rules []rules.Rule
RuleManager rules.RuleManager
TargetPools map[string]*retrieval.TargetPool
Birth time.Time

View File

@ -13,6 +13,7 @@
<a href="/graph">Graph &amp; Console</a>
<a href="/">Status</a>
<a href="/databases">Databases</a>
<a href="/alerts">Alerts</a>
{{ define "user_dashboard_link" }}{{ end }}
{{ template "user_dashboard_link" .}}

41
web/templates/alerts.html Normal file
View File

@ -0,0 +1,41 @@
{{define "head"}}
<link type="text/css" rel="stylesheet" href="/static/css/alerts.css">
<script src="/static/js/alerts.js"></script>
{{end}}
{{define "content"}}
<h2>Alerts</h2>
<div class="grouping_box">
{{range .AlertingRules}}
{{$activeAlerts := .ActiveAlerts}}
<div class="alert_wrapper">
<div class="alert_header {{.State}}">
{{.Name}} ({{len $activeAlerts}} active)
</div>
<div class="alert_content">
<div class="alert_description">
<b>Rule:</b> {{.HTMLSnippet}}
</div>
{{if $activeAlerts}}
<table class="alert_active_elements">
<tr>
<th>Labels</th>
<th>State</th>
<th>Active Since</th>
<th>Value</th>
</tr>
{{range $activeAlerts}}
<tr>
<td>{{.Labels}}</td>
<td>{{.State}}</td>
<td>{{.ActiveSince}}</td>
<td>{{.Value}}</td>
</tr>
{{end}}
</table>
{{end}}
</div>
</div>
{{end}}
</div>
{{end}}

View File

@ -1,8 +1,8 @@
{{define "head"}}<!-- nix -->{{end}}
{{define "content"}}
<div class="grouping_box">
<h2>Database Information</h2>
<div class="grouping_box">
{{range .States}}
<div class="grouping_box">
<h3>{{.Name}}</h3>

View File

@ -2,7 +2,6 @@
<script src="/static/vendor/jqueryui/jquery-ui.min.js"></script>
<link rel="stylesheet" href="/static/vendor/jqueryui/themes/base/jquery-ui.css" />
<link type="text/css" rel="stylesheet" href="/static/css/prometheus.css">
<link type="text/css" rel="stylesheet" href="/static/css/graph.css">
<script src="/static/vendor/jquery-simple-datetimepicker/jquery.simple-dtpicker.js"></script>

View File

@ -35,7 +35,7 @@
<h2>Rules</h2>
<div class="grouping_box">
<pre>{{range .Rules}}{{.String}}{{end}}</pre>
<pre>{{range .RuleManager.Rules}}{{.HTMLSnippet}}<br/>{{end}}</pre>
</div>
<h2>Targets</h2>

View File

@ -38,6 +38,7 @@ type WebService struct {
StatusHandler *StatusHandler
DatabasesHandler *DatabasesHandler
MetricsHandler *api.MetricsService
AlertsHandler *AlertsHandler
}
func (w WebService) ServeForever() error {
@ -56,6 +57,7 @@ func (w WebService) ServeForever() error {
exp.Handle("/", w.StatusHandler)
exp.Handle("/databases", w.DatabasesHandler)
exp.Handle("/alerts", w.AlertsHandler)
exp.HandleFunc("/graph", graphHandler)
exp.Handle("/api/", gorest.Handle())