diff --git a/nflog/nflog.go b/nflog/nflog.go index 5066f918..4eb67a32 100644 --- a/nflog/nflog.go +++ b/nflog/nflog.go @@ -14,7 +14,7 @@ // Package nflog implements a garbage-collected and snapshottable append-only log of // active/resolved notifications. Each log entry stores the active/resolved state, // the notified receiver, and a hash digest of the notification's identifying contents. -// The log can be queried along different paramters. +// The log can be queried along different parameters. package nflog import ( @@ -117,6 +117,7 @@ type nlog struct { type metrics struct { gcDuration prometheus.Summary snapshotDuration prometheus.Summary + snapshotSize prometheus.Gauge queriesTotal prometheus.Counter queryErrorsTotal prometheus.Counter queryDuration prometheus.Histogram @@ -133,6 +134,10 @@ func newMetrics(r prometheus.Registerer) *metrics { Name: "alertmanager_nflog_snapshot_duration_seconds", Help: "Duration of the last notification log snapshot.", }) + m.snapshotSize = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "alertmanager_nflog_snapshot_size_bytes", + Help: "Size of the last notification log snapshot in bytes.", + }) m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{ Name: "alertmanager_nflog_queries_total", Help: "Number of notification log queries were received.", @@ -284,8 +289,12 @@ func (l *nlog) run() { f := func() error { start := l.now() + var size int level.Info(l.logger).Log("msg", "Running maintenance") - defer level.Info(l.logger).Log("msg", "Maintenance done", "duration", l.now().Sub(start)) + defer func() { + level.Info(l.logger).Log("msg", "Maintenance done", "duration", l.now().Sub(start), "size", size) + l.metrics.snapshotSize.Set(float64(size)) + }() if _, err := l.GC(); err != nil { return err @@ -297,8 +306,7 @@ func (l *nlog) run() { if err != nil { return err } - // TODO(fabxc): potentially expose snapshot size in log message. - if _, err := l.Snapshot(f); err != nil { + if size, err = l.Snapshot(f); err != nil { return err } return f.Close() diff --git a/silence/silence.go b/silence/silence.go index 020d8469..8dcfe335 100644 --- a/silence/silence.go +++ b/silence/silence.go @@ -111,6 +111,7 @@ type Silences struct { type metrics struct { gcDuration prometheus.Summary snapshotDuration prometheus.Summary + snapshotSize prometheus.Gauge queriesTotal prometheus.Counter queryErrorsTotal prometheus.Counter queryDuration prometheus.Histogram @@ -147,6 +148,10 @@ func newMetrics(r prometheus.Registerer, s *Silences) *metrics { Name: "alertmanager_silences_snapshot_duration_seconds", Help: "Duration of the last silence snapshot.", }) + m.snapshotSize = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "alertmanager_silences_snapshot_size_bytes", + Help: "Size of the last silence snapshot in bytes.", + }) m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{ Name: "alertmanager_silences_queries_total", Help: "How many silence queries were received.", @@ -169,6 +174,7 @@ func newMetrics(r prometheus.Registerer, s *Silences) *metrics { r.MustRegister( m.gcDuration, m.snapshotDuration, + m.snapshotSize, m.queriesTotal, m.queryErrorsTotal, m.queryDuration, @@ -259,8 +265,12 @@ func (s *Silences) Maintenance(interval time.Duration, snapf string, stopc <-cha f := func() error { start := s.now() + var size int level.Info(s.logger).Log("msg", "Running maintenance") - defer level.Info(s.logger).Log("msg", "Maintenance done", "duration", s.now().Sub(start)) + defer func() { + level.Info(s.logger).Log("msg", "Maintenance done", "duration", s.now().Sub(start), "size", size) + s.metrics.snapshotSize.Set(float64(size)) + }() if _, err := s.GC(); err != nil { return err @@ -272,8 +282,7 @@ func (s *Silences) Maintenance(interval time.Duration, snapf string, stopc <-cha if err != nil { return err } - // TODO(fabxc): potentially expose snapshot size in log message. - if _, err := s.Snapshot(f); err != nil { + if size, err = s.Snapshot(f); err != nil { return err } return f.Close()