Log snapshot sizes on maintenance (#1155)
* Log snapshot sizes on maintenance * Add metrics for snapshot sizes This change adds 2 new gauges for tracking the last snapshots' sizes: - alertmanager_nflog_snapshot_size_bytes - alertmanager_silences_snapshot_size_bytes
This commit is contained in:
parent
7b787dab05
commit
a7d4e4ea7c
|
@ -14,7 +14,7 @@
|
|||
// Package nflog implements a garbage-collected and snapshottable append-only log of
|
||||
// active/resolved notifications. Each log entry stores the active/resolved state,
|
||||
// the notified receiver, and a hash digest of the notification's identifying contents.
|
||||
// The log can be queried along different paramters.
|
||||
// The log can be queried along different parameters.
|
||||
package nflog
|
||||
|
||||
import (
|
||||
|
@ -117,6 +117,7 @@ type nlog struct {
|
|||
type metrics struct {
|
||||
gcDuration prometheus.Summary
|
||||
snapshotDuration prometheus.Summary
|
||||
snapshotSize prometheus.Gauge
|
||||
queriesTotal prometheus.Counter
|
||||
queryErrorsTotal prometheus.Counter
|
||||
queryDuration prometheus.Histogram
|
||||
|
@ -133,6 +134,10 @@ func newMetrics(r prometheus.Registerer) *metrics {
|
|||
Name: "alertmanager_nflog_snapshot_duration_seconds",
|
||||
Help: "Duration of the last notification log snapshot.",
|
||||
})
|
||||
m.snapshotSize = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "alertmanager_nflog_snapshot_size_bytes",
|
||||
Help: "Size of the last notification log snapshot in bytes.",
|
||||
})
|
||||
m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "alertmanager_nflog_queries_total",
|
||||
Help: "Number of notification log queries were received.",
|
||||
|
@ -284,8 +289,12 @@ func (l *nlog) run() {
|
|||
|
||||
f := func() error {
|
||||
start := l.now()
|
||||
var size int
|
||||
level.Info(l.logger).Log("msg", "Running maintenance")
|
||||
defer level.Info(l.logger).Log("msg", "Maintenance done", "duration", l.now().Sub(start))
|
||||
defer func() {
|
||||
level.Info(l.logger).Log("msg", "Maintenance done", "duration", l.now().Sub(start), "size", size)
|
||||
l.metrics.snapshotSize.Set(float64(size))
|
||||
}()
|
||||
|
||||
if _, err := l.GC(); err != nil {
|
||||
return err
|
||||
|
@ -297,8 +306,7 @@ func (l *nlog) run() {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// TODO(fabxc): potentially expose snapshot size in log message.
|
||||
if _, err := l.Snapshot(f); err != nil {
|
||||
if size, err = l.Snapshot(f); err != nil {
|
||||
return err
|
||||
}
|
||||
return f.Close()
|
||||
|
|
|
@ -111,6 +111,7 @@ type Silences struct {
|
|||
type metrics struct {
|
||||
gcDuration prometheus.Summary
|
||||
snapshotDuration prometheus.Summary
|
||||
snapshotSize prometheus.Gauge
|
||||
queriesTotal prometheus.Counter
|
||||
queryErrorsTotal prometheus.Counter
|
||||
queryDuration prometheus.Histogram
|
||||
|
@ -147,6 +148,10 @@ func newMetrics(r prometheus.Registerer, s *Silences) *metrics {
|
|||
Name: "alertmanager_silences_snapshot_duration_seconds",
|
||||
Help: "Duration of the last silence snapshot.",
|
||||
})
|
||||
m.snapshotSize = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "alertmanager_silences_snapshot_size_bytes",
|
||||
Help: "Size of the last silence snapshot in bytes.",
|
||||
})
|
||||
m.queriesTotal = prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "alertmanager_silences_queries_total",
|
||||
Help: "How many silence queries were received.",
|
||||
|
@ -169,6 +174,7 @@ func newMetrics(r prometheus.Registerer, s *Silences) *metrics {
|
|||
r.MustRegister(
|
||||
m.gcDuration,
|
||||
m.snapshotDuration,
|
||||
m.snapshotSize,
|
||||
m.queriesTotal,
|
||||
m.queryErrorsTotal,
|
||||
m.queryDuration,
|
||||
|
@ -259,8 +265,12 @@ func (s *Silences) Maintenance(interval time.Duration, snapf string, stopc <-cha
|
|||
|
||||
f := func() error {
|
||||
start := s.now()
|
||||
var size int
|
||||
level.Info(s.logger).Log("msg", "Running maintenance")
|
||||
defer level.Info(s.logger).Log("msg", "Maintenance done", "duration", s.now().Sub(start))
|
||||
defer func() {
|
||||
level.Info(s.logger).Log("msg", "Maintenance done", "duration", s.now().Sub(start), "size", size)
|
||||
s.metrics.snapshotSize.Set(float64(size))
|
||||
}()
|
||||
|
||||
if _, err := s.GC(); err != nil {
|
||||
return err
|
||||
|
@ -272,8 +282,7 @@ func (s *Silences) Maintenance(interval time.Duration, snapf string, stopc <-cha
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// TODO(fabxc): potentially expose snapshot size in log message.
|
||||
if _, err := s.Snapshot(f); err != nil {
|
||||
if size, err = s.Snapshot(f); err != nil {
|
||||
return err
|
||||
}
|
||||
return f.Close()
|
||||
|
|
Loading…
Reference in New Issue