Log snapshot sizes on maintenance (#1155)

* Log snapshot sizes on maintenance

* Add metrics for snapshot sizes

This change adds 2 new gauges for tracking the last snapshots' sizes:

  - alertmanager_nflog_snapshot_size_bytes
  - alertmanager_silences_snapshot_size_bytes
This commit is contained in:
pasquier-s 2018-01-10 14:53:57 +01:00 committed by stuart nelson
parent 7b787dab05
commit a7d4e4ea7c
2 changed files with 24 additions and 7 deletions

View File

@ -14,7 +14,7 @@
// Package nflog implements a garbage-collected and snapshottable append-only log of
// active/resolved notifications. Each log entry stores the active/resolved state,
// the notified receiver, and a hash digest of the notification's identifying contents.
// The log can be queried along different paramters.
// The log can be queried along different parameters.
package nflog
import (
@ -117,6 +117,7 @@ type nlog struct {
type metrics struct {
gcDuration prometheus.Summary
snapshotDuration prometheus.Summary
snapshotSize prometheus.Gauge
queriesTotal prometheus.Counter
queryErrorsTotal prometheus.Counter
queryDuration prometheus.Histogram
@ -133,6 +134,10 @@ func newMetrics(r prometheus.Registerer) *metrics {
Name: "alertmanager_nflog_snapshot_duration_seconds",
Help: "Duration of the last notification log snapshot.",
})
m.snapshotSize = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "alertmanager_nflog_snapshot_size_bytes",
Help: "Size of the last notification log snapshot in bytes.",
})
m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{
Name: "alertmanager_nflog_queries_total",
Help: "Number of notification log queries were received.",
@ -284,8 +289,12 @@ func (l *nlog) run() {
f := func() error {
start := l.now()
var size int
level.Info(l.logger).Log("msg", "Running maintenance")
defer level.Info(l.logger).Log("msg", "Maintenance done", "duration", l.now().Sub(start))
defer func() {
level.Info(l.logger).Log("msg", "Maintenance done", "duration", l.now().Sub(start), "size", size)
l.metrics.snapshotSize.Set(float64(size))
}()
if _, err := l.GC(); err != nil {
return err
@ -297,8 +306,7 @@ func (l *nlog) run() {
if err != nil {
return err
}
// TODO(fabxc): potentially expose snapshot size in log message.
if _, err := l.Snapshot(f); err != nil {
if size, err = l.Snapshot(f); err != nil {
return err
}
return f.Close()

View File

@ -111,6 +111,7 @@ type Silences struct {
type metrics struct {
gcDuration prometheus.Summary
snapshotDuration prometheus.Summary
snapshotSize prometheus.Gauge
queriesTotal prometheus.Counter
queryErrorsTotal prometheus.Counter
queryDuration prometheus.Histogram
@ -147,6 +148,10 @@ func newMetrics(r prometheus.Registerer, s *Silences) *metrics {
Name: "alertmanager_silences_snapshot_duration_seconds",
Help: "Duration of the last silence snapshot.",
})
m.snapshotSize = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "alertmanager_silences_snapshot_size_bytes",
Help: "Size of the last silence snapshot in bytes.",
})
m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{
Name: "alertmanager_silences_queries_total",
Help: "How many silence queries were received.",
@ -169,6 +174,7 @@ func newMetrics(r prometheus.Registerer, s *Silences) *metrics {
r.MustRegister(
m.gcDuration,
m.snapshotDuration,
m.snapshotSize,
m.queriesTotal,
m.queryErrorsTotal,
m.queryDuration,
@ -259,8 +265,12 @@ func (s *Silences) Maintenance(interval time.Duration, snapf string, stopc <-cha
f := func() error {
start := s.now()
var size int
level.Info(s.logger).Log("msg", "Running maintenance")
defer level.Info(s.logger).Log("msg", "Maintenance done", "duration", s.now().Sub(start))
defer func() {
level.Info(s.logger).Log("msg", "Maintenance done", "duration", s.now().Sub(start), "size", size)
s.metrics.snapshotSize.Set(float64(size))
}()
if _, err := s.GC(); err != nil {
return err
@ -272,8 +282,7 @@ func (s *Silences) Maintenance(interval time.Duration, snapf string, stopc <-cha
if err != nil {
return err
}
// TODO(fabxc): potentially expose snapshot size in log message.
if _, err := s.Snapshot(f); err != nil {
if size, err = s.Snapshot(f); err != nil {
return err
}
return f.Close()