From ed5f68f3824aadfbf3eda6de5d95d043da6db438 Mon Sep 17 00:00:00 2001 From: beorn7 Date: Thu, 6 Apr 2017 01:36:29 +0200 Subject: [PATCH] storage: Increment s.persistErrors on all persist errors Fixes #2091 --- storage/local/storage.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/storage/local/storage.go b/storage/local/storage.go index 574808055..d96185c08 100644 --- a/storage/local/storage.go +++ b/storage/local/storage.go @@ -231,7 +231,7 @@ func NewMemorySeriesStorage(o *MemorySeriesStorageOptions) *MemorySeriesStorage Namespace: namespace, Subsystem: subsystem, Name: "persist_errors_total", - Help: "The total number of errors while persisting chunks.", + Help: "The total number of errors while writing to the persistence layer.", }), queuedChunksToPersist: prometheus.NewCounter(prometheus.CounterOpts{ Namespace: namespace, @@ -1449,6 +1449,7 @@ func (s *MemorySeriesStorage) loop() { s.dirtySeries.Set(0) err := s.persistence.checkpointSeriesMapAndHeads(s.fpToSeries, s.fpLocker) if err != nil { + s.persistErrors.Inc() log.Errorln("Error while checkpointing:", err) } // If a checkpoint takes longer than checkpointInterval, unluckily timed @@ -1713,14 +1714,20 @@ func (s *MemorySeriesStorage) maintainArchivedSeries(fp model.Fingerprint, befor newFirstTime, _, _, allDropped, err := s.persistence.dropAndPersistChunks(fp, beforeTime, nil) if err != nil { + // TODO(beorn7): Should quarantine the series. + s.persistErrors.Inc() log.Error("Error dropping persisted chunks: ", err) } if allDropped { - s.persistence.purgeArchivedMetric(fp) // Ignoring error. Nothing we can do. + if err := s.persistence.purgeArchivedMetric(fp); err != nil { + s.persistErrors.Inc() + // purgeArchivedMetric logs the error already. + } s.seriesOps.WithLabelValues(archivePurge).Inc() return } if err := s.persistence.updateArchivedTimeRange(fp, newFirstTime, lastTime); err != nil { + s.persistErrors.Inc() log.Errorf("Error updating archived time range for fingerprint %v: %s", fp, err) } }