Add comments, rename a method
This commit is contained in:
parent
bf6abac8f4
commit
0cf3c6a9ef
|
@ -159,7 +159,7 @@ func (n *Handler) nextBatch() []*model.Alert {
|
||||||
|
|
||||||
// Run dispatches notifications continuously.
|
// Run dispatches notifications continuously.
|
||||||
func (n *Handler) Run() {
|
func (n *Handler) Run() {
|
||||||
// Just warn one in the beginning to prevent nosiy logs.
|
// Just warn once in the beginning to prevent noisy logs.
|
||||||
if n.opts.AlertmanagerURL == "" {
|
if n.opts.AlertmanagerURL == "" {
|
||||||
log.Warnf("No AlertManager configured, not dispatching any alerts")
|
log.Warnf("No AlertManager configured, not dispatching any alerts")
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,11 +63,13 @@ func (s AlertState) String() string {
|
||||||
|
|
||||||
// Alert is the user-level representation of a single instance of an alerting rule.
|
// Alert is the user-level representation of a single instance of an alerting rule.
|
||||||
type Alert struct {
|
type Alert struct {
|
||||||
State AlertState
|
State AlertState
|
||||||
Labels model.LabelSet
|
Labels model.LabelSet
|
||||||
Value model.SampleValue
|
// The value at the last evaluation of the alerting expression.
|
||||||
ActiveAt model.Time
|
Value model.SampleValue
|
||||||
ResolvedAt model.Time
|
// The interval during which the condition of this alert held true.
|
||||||
|
// ResolvedAt will be 0 to indicate a still active alert.
|
||||||
|
ActiveAt, ResolvedAt model.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
// An AlertingRule generates alerts from its vector expression.
|
// An AlertingRule generates alerts from its vector expression.
|
||||||
|
@ -109,7 +111,6 @@ func (rule *AlertingRule) Name() string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *AlertingRule) sample(alert *Alert, ts model.Time, set bool) *model.Sample {
|
func (r *AlertingRule) sample(alert *Alert, ts model.Time, set bool) *model.Sample {
|
||||||
// Build alert labels in order they can be overwritten.
|
|
||||||
metric := model.Metric(r.labels.Clone())
|
metric := model.Metric(r.labels.Clone())
|
||||||
|
|
||||||
for ln, lv := range alert.Labels {
|
for ln, lv := range alert.Labels {
|
||||||
|
@ -180,7 +181,7 @@ func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector,
|
||||||
if a.State != StateInactive {
|
if a.State != StateInactive {
|
||||||
vec = append(vec, r.sample(a, ts, false))
|
vec = append(vec, r.sample(a, ts, false))
|
||||||
}
|
}
|
||||||
// If the alert was previously firing, keep it aroud for a given
|
// If the alert was previously firing, keep it around for a given
|
||||||
// retention time so it is reported as resolved to the AlertManager.
|
// retention time so it is reported as resolved to the AlertManager.
|
||||||
if a.State == StatePending || (a.ResolvedAt != 0 && ts.Sub(a.ResolvedAt) > resolvedRetention) {
|
if a.State == StatePending || (a.ResolvedAt != 0 && ts.Sub(a.ResolvedAt) > resolvedRetention) {
|
||||||
delete(r.active, fp)
|
delete(r.active, fp)
|
||||||
|
@ -203,6 +204,8 @@ func (r *AlertingRule) eval(ts model.Time, engine *promql.Engine) (model.Vector,
|
||||||
return vec, nil
|
return vec, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// State returns the maximum state of alert instances for this rule.
|
||||||
|
// StateFiring > StatePending > StateInactive
|
||||||
func (r *AlertingRule) State() AlertState {
|
func (r *AlertingRule) State() AlertState {
|
||||||
r.mtx.Lock()
|
r.mtx.Lock()
|
||||||
defer r.mtx.Unlock()
|
defer r.mtx.Unlock()
|
||||||
|
@ -219,7 +222,7 @@ func (r *AlertingRule) State() AlertState {
|
||||||
// ActiveAlerts returns a slice of active alerts.
|
// ActiveAlerts returns a slice of active alerts.
|
||||||
func (r *AlertingRule) ActiveAlerts() []*Alert {
|
func (r *AlertingRule) ActiveAlerts() []*Alert {
|
||||||
var res []*Alert
|
var res []*Alert
|
||||||
for _, a := range r.recentAlerts() {
|
for _, a := range r.currentAlerts() {
|
||||||
if a.ResolvedAt == 0 {
|
if a.ResolvedAt == 0 {
|
||||||
res = append(res, a)
|
res = append(res, a)
|
||||||
}
|
}
|
||||||
|
@ -227,7 +230,9 @@ func (r *AlertingRule) ActiveAlerts() []*Alert {
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *AlertingRule) recentAlerts() []*Alert {
|
// currentAlerts returns all instances of alerts for this rule. This may include
|
||||||
|
// inactive alerts that were previously firing.
|
||||||
|
func (r *AlertingRule) currentAlerts() []*Alert {
|
||||||
r.mtx.Lock()
|
r.mtx.Lock()
|
||||||
defer r.mtx.Unlock()
|
defer r.mtx.Unlock()
|
||||||
|
|
||||||
|
|
|
@ -99,6 +99,7 @@ type Rule interface {
|
||||||
HTMLSnippet(pathPrefix string) html_template.HTML
|
HTMLSnippet(pathPrefix string) html_template.HTML
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Group is a set of rules that have a logical relation.
|
||||||
type Group struct {
|
type Group struct {
|
||||||
name string
|
name string
|
||||||
interval time.Duration
|
interval time.Duration
|
||||||
|
@ -160,6 +161,7 @@ func (g *Group) fingerprint() model.Fingerprint {
|
||||||
return l.Fingerprint()
|
return l.Fingerprint()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// offset returns until the next consistently slotted evaluation interval.
|
||||||
func (g *Group) offset() time.Duration {
|
func (g *Group) offset() time.Duration {
|
||||||
now := time.Now().UnixNano()
|
now := time.Now().UnixNano()
|
||||||
|
|
||||||
|
@ -175,6 +177,7 @@ func (g *Group) offset() time.Duration {
|
||||||
return time.Duration(next - now)
|
return time.Duration(next - now)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// copyState copies the alerting rule state from the given group.
|
||||||
func (g *Group) copyState(from *Group) {
|
func (g *Group) copyState(from *Group) {
|
||||||
for _, fromRule := range from.rules {
|
for _, fromRule := range from.rules {
|
||||||
far, ok := fromRule.(*AlertingRule)
|
far, ok := fromRule.(*AlertingRule)
|
||||||
|
@ -193,6 +196,9 @@ func (g *Group) copyState(from *Group) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// eval runs a single evaluation cycle in which all rules are evaluated in parallel.
|
||||||
|
// In the future a single group will be evaluated sequentially to properly handle
|
||||||
|
// rule dependency.
|
||||||
func (g *Group) eval() {
|
func (g *Group) eval() {
|
||||||
var (
|
var (
|
||||||
now = model.Now()
|
now = model.Now()
|
||||||
|
@ -239,10 +245,11 @@ func (g *Group) eval() {
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// sendAlerts sends alert notifications for the given rule.
|
||||||
func (g *Group) sendAlerts(rule *AlertingRule, timestamp model.Time) error {
|
func (g *Group) sendAlerts(rule *AlertingRule, timestamp model.Time) error {
|
||||||
var alerts model.Alerts
|
var alerts model.Alerts
|
||||||
|
|
||||||
for _, alert := range rule.recentAlerts() {
|
for _, alert := range rule.currentAlerts() {
|
||||||
// Only send actually firing alerts.
|
// Only send actually firing alerts.
|
||||||
if alert.State == StatePending {
|
if alert.State == StatePending {
|
||||||
continue
|
continue
|
||||||
|
@ -407,6 +414,9 @@ func (m *Manager) ApplyConfig(conf *config.Config) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// loadGroups reads groups from a list of files.
|
||||||
|
// As there's currently no group syntax a single group named "default" containing
|
||||||
|
// all rules will be returned.
|
||||||
func (m *Manager) loadGroups(filenames ...string) (map[string]*Group, error) {
|
func (m *Manager) loadGroups(filenames ...string) (map[string]*Group, error) {
|
||||||
groups := map[string]*Group{}
|
groups := map[string]*Group{}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue