Delete temp checkpoint folder on error. (#415)
This commit is contained in:
parent
296f943ec4
commit
8d991bdc1e
|
@ -128,7 +128,7 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64)
|
||||||
defer sgmReader.Close()
|
defer sgmReader.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
cpdir := filepath.Join(w.Dir(), fmt.Sprintf("checkpoint.%06d", to))
|
cpdir := filepath.Join(w.Dir(), fmt.Sprintf(checkpointPrefix+"%06d", to))
|
||||||
cpdirtmp := cpdir + ".tmp"
|
cpdirtmp := cpdir + ".tmp"
|
||||||
|
|
||||||
if err := os.MkdirAll(cpdirtmp, 0777); err != nil {
|
if err := os.MkdirAll(cpdirtmp, 0777); err != nil {
|
||||||
|
@ -139,6 +139,12 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64)
|
||||||
return nil, errors.Wrap(err, "open checkpoint")
|
return nil, errors.Wrap(err, "open checkpoint")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensures that an early return caused by an error doesn't leave any tmp files.
|
||||||
|
defer func() {
|
||||||
|
cp.Close()
|
||||||
|
os.RemoveAll(cpdirtmp)
|
||||||
|
}()
|
||||||
|
|
||||||
r := wal.NewReader(sgmReader)
|
r := wal.NewReader(sgmReader)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
|
@ -15,11 +15,14 @@
|
||||||
package tsdb
|
package tsdb
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/pkg/errors"
|
||||||
"github.com/prometheus/tsdb/fileutil"
|
"github.com/prometheus/tsdb/fileutil"
|
||||||
"github.com/prometheus/tsdb/labels"
|
"github.com/prometheus/tsdb/labels"
|
||||||
"github.com/prometheus/tsdb/testutil"
|
"github.com/prometheus/tsdb/testutil"
|
||||||
|
@ -180,3 +183,30 @@ func TestCheckpoint(t *testing.T) {
|
||||||
{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
|
{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
|
||||||
}, series)
|
}, series)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCheckpointNoTmpFolderAfterError(t *testing.T) {
|
||||||
|
// Create a new wal with an invalid records.
|
||||||
|
dir, err := ioutil.TempDir("", "test_checkpoint")
|
||||||
|
testutil.Ok(t, err)
|
||||||
|
defer os.RemoveAll(dir)
|
||||||
|
w, err := wal.NewSize(nil, nil, dir, 64*1024)
|
||||||
|
testutil.Ok(t, err)
|
||||||
|
testutil.Ok(t, w.Log([]byte{99}))
|
||||||
|
w.Close()
|
||||||
|
|
||||||
|
// Run the checkpoint and since the wal contains an invalid records this should return an error.
|
||||||
|
_, err = Checkpoint(w, 0, 1, nil, 0)
|
||||||
|
testutil.NotOk(t, err)
|
||||||
|
|
||||||
|
// Walk the wal dir to make sure there are no tmp folder left behind after the error.
|
||||||
|
err = filepath.Walk(w.Dir(), func(path string, info os.FileInfo, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return errors.Wrapf(err, "access err %q: %v\n", path, err)
|
||||||
|
}
|
||||||
|
if info.IsDir() && strings.HasSuffix(info.Name(), ".tmp") {
|
||||||
|
return fmt.Errorf("wal dir contains temporary folder:%s", info.Name())
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
testutil.Ok(t, err)
|
||||||
|
}
|
||||||
|
|
|
@ -164,6 +164,7 @@ type WAL struct {
|
||||||
page *page // active page
|
page *page // active page
|
||||||
stopc chan chan struct{}
|
stopc chan chan struct{}
|
||||||
actorc chan func()
|
actorc chan func()
|
||||||
|
closed bool // To allow calling Close() more than once without blocking.
|
||||||
|
|
||||||
fsyncDuration prometheus.Summary
|
fsyncDuration prometheus.Summary
|
||||||
pageFlushes prometheus.Counter
|
pageFlushes prometheus.Counter
|
||||||
|
@ -584,6 +585,10 @@ func (w *WAL) Close() (err error) {
|
||||||
w.mtx.Lock()
|
w.mtx.Lock()
|
||||||
defer w.mtx.Unlock()
|
defer w.mtx.Unlock()
|
||||||
|
|
||||||
|
if w.closed {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Flush the last page and zero out all its remaining size.
|
// Flush the last page and zero out all its remaining size.
|
||||||
// We must not flush an empty page as it would falsely signal
|
// We must not flush an empty page as it would falsely signal
|
||||||
// the segment is done if we start writing to it again after opening.
|
// the segment is done if we start writing to it again after opening.
|
||||||
|
@ -603,7 +608,7 @@ func (w *WAL) Close() (err error) {
|
||||||
if err := w.segment.Close(); err != nil {
|
if err := w.segment.Close(); err != nil {
|
||||||
level.Error(w.logger).Log("msg", "close previous segment", "err", err)
|
level.Error(w.logger).Log("msg", "close previous segment", "err", err)
|
||||||
}
|
}
|
||||||
|
w.closed = true
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue