tsdb/wlog: unregister metrics on WL close

Thanos can create and destroy TSDBs dynamically, and once a TSDB
disappears its files are deleted. Calculating the size of the
WAL then fails with errors like:

```
msg: "Failed to calculate size of "wal" dir", "err": "lstat
/tsdbdir/wal: no such file or directory", "caller": "wlog.go:271"
```

Signed-off-by: Giedrius Statkevičius <giedrius.statkevicius@vinted.com>
This commit is contained in:
Giedrius Statkevičius 2024-04-11 11:30:05 +03:00
parent 8b72ed77f8
commit 3b8fe00767
1 changed files with 21 additions and 1 deletions

View File

@ -228,10 +228,28 @@ type wlMetrics struct {
currentSegment prometheus.Gauge currentSegment prometheus.Gauge
writesFailed prometheus.Counter writesFailed prometheus.Counter
walFileSize prometheus.GaugeFunc walFileSize prometheus.GaugeFunc
r prometheus.Registerer
}
func (w *wlMetrics) Unregister() {
if w.r == nil {
return
}
w.r.Unregister(w.fsyncDuration)
w.r.Unregister(w.pageFlushes)
w.r.Unregister(w.pageCompletions)
w.r.Unregister(w.truncateFail)
w.r.Unregister(w.truncateTotal)
w.r.Unregister(w.currentSegment)
w.r.Unregister(w.writesFailed)
w.r.Unregister(w.walFileSize)
} }
func newWLMetrics(w *WL, r prometheus.Registerer) *wlMetrics { func newWLMetrics(w *WL, r prometheus.Registerer) *wlMetrics {
m := &wlMetrics{} m := &wlMetrics{
r: r,
}
m.fsyncDuration = prometheus.NewSummary(prometheus.SummaryOpts{ m.fsyncDuration = prometheus.NewSummary(prometheus.SummaryOpts{
Name: "fsync_duration_seconds", Name: "fsync_duration_seconds",
@ -877,6 +895,8 @@ func (w *WL) Close() (err error) {
if err := w.segment.Close(); err != nil { if err := w.segment.Close(); err != nil {
level.Error(w.logger).Log("msg", "close previous segment", "err", err) level.Error(w.logger).Log("msg", "close previous segment", "err", err)
} }
w.metrics.Unregister()
w.closed = true w.closed = true
return nil return nil
} }