Storage: Add crash recovery metric 'started_dirty'

...to indicate when crash recovery was invoked during Prometheus startup. Fixes #1918.
2025-02-21 23:26:59 +00:00 · 2016-08-27 21:39:27 +02:00 · 2016-08-27 21:39:27 +02:00 · e618af5d0b
commit e618af5d0b
parent 0ac2dbe6aa
1 changed files with 12 additions and 0 deletions
--- a/storage/local/persistence.go
+++ b/storage/local/persistence.go
@ -115,6 +115,7 @@ type persistence struct {
 	indexingBatchDuration prometheus.Summary
 	checkpointDuration    prometheus.Gauge
 	dirtyCounter          prometheus.Counter
+	startedDirty          prometheus.Gauge

 	dirtyMtx       sync.Mutex     // Protects dirty and becameDirty.
 	dirty          bool           // true if persistence was started in dirty state.
@ -245,6 +246,12 @@ func newPersistence(
 			Name:      "inconsistencies_total",
 			Help:      "A counter incremented each time an inconsistency in the local storage is detected. If this is greater zero, restart the server as soon as possible.",
 		}),
+		startedDirty: prometheus.NewGauge(prometheus.GaugeOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "started_dirty",
+			Help:      "Whether the local storage was found to be dirty (and crash recovery occurred) during Prometheus startup.",
+		}),
 		dirty:          dirty,
 		pedanticChecks: pedanticChecks,
 		dirtyFileName:  dirtyPath,
@ -291,6 +298,7 @@ func (p *persistence) Describe(ch chan<- *prometheus.Desc) {
 	p.indexingBatchDuration.Describe(ch)
 	ch <- p.checkpointDuration.Desc()
 	ch <- p.dirtyCounter.Desc()
+	ch <- p.startedDirty.Desc()
 }

 // Collect implements prometheus.Collector.
@ -303,6 +311,7 @@ func (p *persistence) Collect(ch chan<- prometheus.Metric) {
 	p.indexingBatchDuration.Collect(ch)
 	ch <- p.checkpointDuration
 	ch <- p.dirtyCounter
+	ch <- p.startedDirty
 }

 // isDirty returns the dirty flag in a goroutine-safe way.
@ -700,10 +709,13 @@ func (p *persistence) loadSeriesMapAndHeads() (sm *seriesMap, chunksToPersist in
 	defer func() {
 		if p.dirty {
 			log.Warn("Persistence layer appears dirty.")
+			p.startedDirty.Set(1)
 			err = p.recoverFromCrash(fingerprintToSeries)
 			if err != nil {
 				sm = nil
 			}
+		} else {
+			p.startedDirty.Set(0)
 		}
 	}()