Delete temp checkpoint folder on error. (#415)

This commit is contained in:
Krasi Georgiev 2019-01-07 11:43:33 +03:00 committed by GitHub
parent 296f943ec4
commit 8d991bdc1e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 43 additions and 2 deletions

View File

@ -128,7 +128,7 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64)
defer sgmReader.Close() defer sgmReader.Close()
} }
cpdir := filepath.Join(w.Dir(), fmt.Sprintf("checkpoint.%06d", to)) cpdir := filepath.Join(w.Dir(), fmt.Sprintf(checkpointPrefix+"%06d", to))
cpdirtmp := cpdir + ".tmp" cpdirtmp := cpdir + ".tmp"
if err := os.MkdirAll(cpdirtmp, 0777); err != nil { if err := os.MkdirAll(cpdirtmp, 0777); err != nil {
@ -139,6 +139,12 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64)
return nil, errors.Wrap(err, "open checkpoint") return nil, errors.Wrap(err, "open checkpoint")
} }
// Ensures that an early return caused by an error doesn't leave any tmp files.
defer func() {
cp.Close()
os.RemoveAll(cpdirtmp)
}()
r := wal.NewReader(sgmReader) r := wal.NewReader(sgmReader)
var ( var (

View File

@ -15,11 +15,14 @@
package tsdb package tsdb
import ( import (
"fmt"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"testing" "testing"
"github.com/pkg/errors"
"github.com/prometheus/tsdb/fileutil" "github.com/prometheus/tsdb/fileutil"
"github.com/prometheus/tsdb/labels" "github.com/prometheus/tsdb/labels"
"github.com/prometheus/tsdb/testutil" "github.com/prometheus/tsdb/testutil"
@ -180,3 +183,30 @@ func TestCheckpoint(t *testing.T) {
{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")}, {Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
}, series) }, series)
} }
func TestCheckpointNoTmpFolderAfterError(t *testing.T) {
// Create a new wal with an invalid records.
dir, err := ioutil.TempDir("", "test_checkpoint")
testutil.Ok(t, err)
defer os.RemoveAll(dir)
w, err := wal.NewSize(nil, nil, dir, 64*1024)
testutil.Ok(t, err)
testutil.Ok(t, w.Log([]byte{99}))
w.Close()
// Run the checkpoint and since the wal contains an invalid records this should return an error.
_, err = Checkpoint(w, 0, 1, nil, 0)
testutil.NotOk(t, err)
// Walk the wal dir to make sure there are no tmp folder left behind after the error.
err = filepath.Walk(w.Dir(), func(path string, info os.FileInfo, err error) error {
if err != nil {
return errors.Wrapf(err, "access err %q: %v\n", path, err)
}
if info.IsDir() && strings.HasSuffix(info.Name(), ".tmp") {
return fmt.Errorf("wal dir contains temporary folder:%s", info.Name())
}
return nil
})
testutil.Ok(t, err)
}

View File

@ -164,6 +164,7 @@ type WAL struct {
page *page // active page page *page // active page
stopc chan chan struct{} stopc chan chan struct{}
actorc chan func() actorc chan func()
closed bool // To allow calling Close() more than once without blocking.
fsyncDuration prometheus.Summary fsyncDuration prometheus.Summary
pageFlushes prometheus.Counter pageFlushes prometheus.Counter
@ -584,6 +585,10 @@ func (w *WAL) Close() (err error) {
w.mtx.Lock() w.mtx.Lock()
defer w.mtx.Unlock() defer w.mtx.Unlock()
if w.closed {
return nil
}
// Flush the last page and zero out all its remaining size. // Flush the last page and zero out all its remaining size.
// We must not flush an empty page as it would falsely signal // We must not flush an empty page as it would falsely signal
// the segment is done if we start writing to it again after opening. // the segment is done if we start writing to it again after opening.
@ -603,7 +608,7 @@ func (w *WAL) Close() (err error) {
if err := w.segment.Close(); err != nil { if err := w.segment.Close(); err != nil {
level.Error(w.logger).Log("msg", "close previous segment", "err", err) level.Error(w.logger).Log("msg", "close previous segment", "err", err)
} }
w.closed = true
return nil return nil
} }