prometheus/wal/wal_test.go

723 lines
16 KiB
Go

// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package wal
import (
"bytes"
"encoding/binary"
"fmt"
"hash/crc32"
"io"
"io/ioutil"
"math/rand"
"os"
"path"
"path/filepath"
"sync"
"testing"
"time"
"github.com/prometheus/tsdb/testutil"
)
type record struct {
t recType
b []byte
}
var data = make([]byte, 100000)
var testReaderCases = []struct {
t []record
exp [][]byte
fail bool
}{
// Sequence of valid records.
{
t: []record{
{recFull, data[0:200]},
{recFirst, data[200:300]},
{recLast, data[300:400]},
{recFirst, data[400:800]},
{recMiddle, data[800:900]},
{recPageTerm, make([]byte, pageSize-900-recordHeaderSize*5-1)}, // exactly lines up with page boundary.
{recLast, data[900:900]},
{recFirst, data[900:1000]},
{recMiddle, data[1000:1200]},
{recMiddle, data[1200:30000]},
{recMiddle, data[30000:30001]},
{recMiddle, data[30001:30001]},
{recLast, data[30001:32000]},
},
exp: [][]byte{
data[0:200],
data[200:400],
data[400:900],
data[900:32000],
},
},
// Exactly at the limit of one page minus the header size
{
t: []record{
{recFull, data[0 : pageSize-recordHeaderSize]},
},
exp: [][]byte{
data[:pageSize-recordHeaderSize],
},
},
// More than a full page, this exceeds our buffer and can never happen
// when written by the WAL.
{
t: []record{
{recFull, data[0 : pageSize+1]},
},
fail: true,
},
// Invalid orders of record types.
{
t: []record{{recMiddle, data[:200]}},
fail: true,
},
{
t: []record{{recLast, data[:200]}},
fail: true,
},
{
t: []record{
{recFirst, data[:200]},
{recFull, data[200:400]},
},
fail: true,
},
{
t: []record{
{recFirst, data[:100]},
{recMiddle, data[100:200]},
{recFull, data[200:400]},
},
fail: true,
},
// Non-zero data after page termination.
{
t: []record{
{recFull, data[:100]},
{recPageTerm, append(make([]byte, 1000), 1)},
},
exp: [][]byte{data[:100]},
fail: true,
},
}
func encodedRecord(t recType, b []byte) []byte {
if t == recPageTerm {
return append([]byte{0}, b...)
}
r := make([]byte, recordHeaderSize)
r[0] = byte(t)
binary.BigEndian.PutUint16(r[1:], uint16(len(b)))
binary.BigEndian.PutUint32(r[3:], crc32.Checksum(b, castagnoliTable))
return append(r, b...)
}
// TestReader feeds the reader a stream of encoded records with different types.
func TestReader(t *testing.T) {
for i, c := range testReaderCases {
t.Logf("test %d", i)
var buf []byte
for _, r := range c.t {
buf = append(buf, encodedRecord(r.t, r.b)...)
}
r := NewReader(bytes.NewReader(buf))
for j := 0; r.Next(); j++ {
t.Logf("record %d", j)
rec := r.Record()
if j >= len(c.exp) {
t.Fatal("received more records than inserted")
}
testutil.Equals(t, c.exp[j], rec, "Bytes within record did not match expected Bytes")
}
if !c.fail && r.Err() != nil {
t.Fatalf("unexpected error: %s", r.Err())
}
if c.fail && r.Err() == nil {
t.Fatalf("expected error but got none")
}
}
}
func TestReader_Live(t *testing.T) {
for i, c := range testReaderCases {
t.Logf("test %d", i)
dir, err := ioutil.TempDir("", fmt.Sprintf("live_reader_%d", i))
t.Logf("created dir %s", dir)
testutil.Ok(t, err)
defer os.RemoveAll(dir)
// we're never going to have more than a single segment file per test case right now
f, err := os.Create(path.Join(dir, "00000000"))
testutil.Ok(t, err)
// live reader doesn't work on readers created from bytes buffers,
// since we need to be able to write more data to the thing we're
// reading from after the reader has been created
wg := sync.WaitGroup{}
// make sure the reader doesn't start until at least one record is written
wg.Add(1)
go func() {
for i, rec := range c.t {
rec := encodedRecord(rec.t, rec.b)
n, err := f.Write(rec)
testutil.Ok(t, err)
testutil.Assert(t, n > 0, "no bytes were written to wal")
if i == 0 {
wg.Done()
}
}
}()
sr, err := OpenReadSegment(SegmentName(dir, 0))
testutil.Ok(t, err)
lr := NewLiveReader(sr)
j := 0
wg.Wait()
caseLoop:
for {
for ; lr.Next(); j++ {
rec := lr.Record()
t.Log("j: ", j)
testutil.Equals(t, c.exp[j], rec, "Bytes within record did not match expected Bytes")
if j == len(c.exp)-1 {
break caseLoop
}
}
// Because reads and writes are happening concurrently, unless we get an error we should
// attempt to read records again.
if j == 0 && lr.Err() == nil {
continue
}
if !c.fail && lr.Err() != nil {
t.Fatalf("unexpected error: %s", lr.Err())
}
if c.fail && lr.Err() == nil {
t.Fatalf("expected error but got none:\n\tinput: %+v", c.t)
}
if lr.Err() != nil {
t.Log("err: ", lr.Err())
break
}
}
}
}
func TestWAL_FuzzWriteRead_Live(t *testing.T) {
const count = 500
var input [][]byte
lock := sync.RWMutex{}
var recs [][]byte
var index int
// Get size of segment.
getSegmentSize := func(dir string, index int) (int64, error) {
i := int64(-1)
fi, err := os.Stat(SegmentName(dir, index))
if err == nil {
i = fi.Size()
}
return i, err
}
readSegment := func(r *LiveReader) {
for r.Next() {
rec := r.Record()
lock.RLock()
l := len(input)
lock.RUnlock()
if index >= l {
t.Fatalf("read too many records")
}
lock.RLock()
if !bytes.Equal(input[index], rec) {
t.Fatalf("record %d (len %d) does not match (expected len %d)",
index, len(rec), len(input[index]))
}
lock.RUnlock()
index++
}
if r.Err() != io.EOF {
testutil.Ok(t, r.Err())
}
}
dir, err := ioutil.TempDir("", "wal_fuzz_live")
t.Log("created dir: ", dir)
testutil.Ok(t, err)
defer func() {
os.RemoveAll(dir)
}()
w, err := NewSize(nil, nil, dir, 128*pageSize)
testutil.Ok(t, err)
go func() {
for i := 0; i < count; i++ {
var sz int64
switch i % 5 {
case 0, 1:
sz = 50
case 2, 3:
sz = pageSize
default:
sz = pageSize * 8
}
rec := make([]byte, rand.Int63n(sz))
_, err := rand.Read(rec)
testutil.Ok(t, err)
lock.Lock()
input = append(input, rec)
lock.Unlock()
recs = append(recs, rec)
// Randomly batch up records.
if rand.Intn(4) < 3 {
testutil.Ok(t, w.Log(recs...))
recs = recs[:0]
}
}
testutil.Ok(t, w.Log(recs...))
}()
m, _, err := w.Segments()
testutil.Ok(t, err)
seg, err := OpenReadSegment(SegmentName(dir, m))
testutil.Ok(t, err)
r := NewLiveReader(seg)
segmentTicker := time.NewTicker(100 * time.Millisecond)
readTicker := time.NewTicker(10 * time.Millisecond)
for {
select {
case <-segmentTicker.C:
// check if new segments exist
_, last, err := w.Segments()
testutil.Ok(t, err)
if last > seg.i {
for {
readSegment(r)
if r.Err() != io.EOF {
testutil.Ok(t, r.Err())
}
size, err := getSegmentSize(dir, seg.i)
testutil.Ok(t, err)
// make sure we've read all of the current segment before rotating
if r.TotalRead() == size {
break
}
}
seg, err = OpenReadSegment(SegmentName(dir, seg.i+1))
testutil.Ok(t, err)
r = NewLiveReader(seg)
}
case <-readTicker.C:
readSegment(r)
}
if index == count {
break
}
}
testutil.Ok(t, r.Err())
}
func TestWAL_FuzzWriteRead(t *testing.T) {
const count = 25000
dir, err := ioutil.TempDir("", "walfuzz")
testutil.Ok(t, err)
defer os.RemoveAll(dir)
w, err := NewSize(nil, nil, dir, 128*pageSize)
testutil.Ok(t, err)
var input [][]byte
var recs [][]byte
for i := 0; i < count; i++ {
var sz int
switch i % 5 {
case 0, 1:
sz = 50
case 2, 3:
sz = pageSize
default:
sz = 8 * pageSize
}
rec := make([]byte, rand.Intn(sz))
_, err := rand.Read(rec)
testutil.Ok(t, err)
input = append(input, rec)
recs = append(recs, rec)
// Randomly batch up records.
if rand.Intn(4) < 3 {
testutil.Ok(t, w.Log(recs...))
recs = recs[:0]
}
}
testutil.Ok(t, w.Log(recs...))
m, n, err := w.Segments()
testutil.Ok(t, err)
rc, err := NewSegmentsRangeReader(SegmentRange{Dir: dir, First: m, Last: n})
testutil.Ok(t, err)
defer rc.Close()
rdr := NewReader(rc)
for i := 0; rdr.Next(); i++ {
rec := rdr.Record()
if i >= len(input) {
t.Fatal("read too many records")
}
if !bytes.Equal(input[i], rec) {
t.Fatalf("record %d (len %d) does not match (expected len %d)",
i, len(rec), len(input[i]))
}
}
testutil.Ok(t, rdr.Err())
}
func TestWAL_Repair(t *testing.T) {
for name, test := range map[string]struct {
corrSgm int // Which segment to corrupt.
corrFunc func(f *os.File) // Func that applies the corruption.
intactRecs int // Total expected records left after the repair.
}{
"torn_last_record": {
2,
func(f *os.File) {
_, err := f.Seek(pageSize*2, 0)
testutil.Ok(t, err)
_, err = f.Write([]byte{byte(recFirst)})
testutil.Ok(t, err)
},
8,
},
// Ensures that the page buffer is big enough to fit
// an entire page size without panicing.
// https://github.com/prometheus/tsdb/pull/414
"bad_header": {
1,
func(f *os.File) {
_, err := f.Seek(pageSize, 0)
testutil.Ok(t, err)
_, err = f.Write([]byte{byte(recPageTerm)})
testutil.Ok(t, err)
},
4,
},
"bad_fragment_sequence": {
1,
func(f *os.File) {
_, err := f.Seek(pageSize, 0)
testutil.Ok(t, err)
_, err = f.Write([]byte{byte(recLast)})
testutil.Ok(t, err)
},
4,
},
"bad_fragment_flag": {
1,
func(f *os.File) {
_, err := f.Seek(pageSize, 0)
testutil.Ok(t, err)
_, err = f.Write([]byte{123})
testutil.Ok(t, err)
},
4,
},
"bad_checksum": {
1,
func(f *os.File) {
_, err := f.Seek(pageSize+4, 0)
testutil.Ok(t, err)
_, err = f.Write([]byte{0})
testutil.Ok(t, err)
},
4,
},
"bad_length": {
1,
func(f *os.File) {
_, err := f.Seek(pageSize+2, 0)
testutil.Ok(t, err)
_, err = f.Write([]byte{0})
testutil.Ok(t, err)
},
4,
},
"bad_content": {
1,
func(f *os.File) {
_, err := f.Seek(pageSize+100, 0)
testutil.Ok(t, err)
_, err = f.Write([]byte("beef"))
testutil.Ok(t, err)
},
4,
},
} {
t.Run(name, func(t *testing.T) {
dir, err := ioutil.TempDir("", "wal_repair")
testutil.Ok(t, err)
defer os.RemoveAll(dir)
// We create 3 segments with 3 records each and
// then corrupt a given record in a given segment.
// As a result we want a repaired WAL with given intact records.
w, err := NewSize(nil, nil, dir, 3*pageSize)
testutil.Ok(t, err)
var records [][]byte
for i := 1; i <= 9; i++ {
b := make([]byte, pageSize-recordHeaderSize)
b[0] = byte(i)
records = append(records, b)
testutil.Ok(t, w.Log(b))
}
testutil.Ok(t, w.Close())
f, err := os.OpenFile(SegmentName(dir, test.corrSgm), os.O_RDWR, 0666)
testutil.Ok(t, err)
// Apply corruption function.
test.corrFunc(f)
testutil.Ok(t, f.Close())
w, err = New(nil, nil, dir)
testutil.Ok(t, err)
sr, err := NewSegmentsReader(dir)
testutil.Ok(t, err)
r := NewReader(sr)
for r.Next() {
}
testutil.NotOk(t, r.Err())
testutil.Ok(t, sr.Close())
testutil.Ok(t, w.Repair(r.Err()))
sr, err = NewSegmentsReader(dir)
testutil.Ok(t, err)
r = NewReader(sr)
var result [][]byte
for r.Next() {
var b []byte
result = append(result, append(b, r.Record()...))
}
testutil.Ok(t, r.Err())
testutil.Equals(t, test.intactRecs, len(result), "Wrong number of intact records")
for i, r := range result {
if !bytes.Equal(records[i], r) {
t.Fatalf("record %d diverges: want %x, got %x", i, records[i][:10], r[:10])
}
}
})
}
}
// TestCorruptAndCarryOn writes a multi-segment WAL; corrupts the first segment and
// ensures that an error during reading that segment are correctly repaired before
// moving to write more records to the WAL.
func TestCorruptAndCarryOn(t *testing.T) {
dir, err := ioutil.TempDir("", "wal_repair")
testutil.Ok(t, err)
defer os.RemoveAll(dir)
var (
logger = testutil.NewLogger(t)
segmentSize = pageSize * 3
recordSize = (pageSize / 3) - recordHeaderSize
)
// Produce a WAL with a two segments of 3 pages with 3 records each,
// so when we truncate the file we're guaranteed to split a record.
{
w, err := NewSize(logger, nil, dir, segmentSize)
testutil.Ok(t, err)
for i := 0; i < 18; i++ {
buf := make([]byte, recordSize)
_, err := rand.Read(buf)
testutil.Ok(t, err)
err = w.Log(buf)
testutil.Ok(t, err)
}
err = w.Close()
testutil.Ok(t, err)
}
// Check all the segments are the correct size.
{
segments, err := listSegments(dir)
testutil.Ok(t, err)
for _, segment := range segments {
f, err := os.OpenFile(filepath.Join(dir, fmt.Sprintf("%08d", segment.index)), os.O_RDONLY, 0666)
testutil.Ok(t, err)
fi, err := f.Stat()
testutil.Ok(t, err)
t.Log("segment", segment.index, "size", fi.Size())
testutil.Equals(t, int64(segmentSize), fi.Size())
err = f.Close()
testutil.Ok(t, err)
}
}
// Truncate the first file, splitting the middle record in the second
// page in half, leaving 4 valid records.
{
f, err := os.OpenFile(filepath.Join(dir, fmt.Sprintf("%08d", 0)), os.O_RDWR, 0666)
testutil.Ok(t, err)
fi, err := f.Stat()
testutil.Ok(t, err)
testutil.Equals(t, int64(segmentSize), fi.Size())
err = f.Truncate(int64(segmentSize / 2))
testutil.Ok(t, err)
err = f.Close()
testutil.Ok(t, err)
}
// Now try and repair this WAL, and write 5 more records to it.
{
sr, err := NewSegmentsReader(dir)
testutil.Ok(t, err)
reader := NewReader(sr)
i := 0
for ; i < 4 && reader.Next(); i++ {
testutil.Equals(t, recordSize, len(reader.Record()))
}
testutil.Equals(t, 4, i, "not enough records")
testutil.Assert(t, !reader.Next(), "unexpected record")
corruptionErr := reader.Err()
testutil.Assert(t, corruptionErr != nil, "expected error")
err = sr.Close()
testutil.Ok(t, err)
w, err := NewSize(logger, nil, dir, segmentSize)
testutil.Ok(t, err)
err = w.Repair(corruptionErr)
testutil.Ok(t, err)
for i := 0; i < 5; i++ {
buf := make([]byte, recordSize)
_, err := rand.Read(buf)
testutil.Ok(t, err)
err = w.Log(buf)
testutil.Ok(t, err)
}
err = w.Close()
testutil.Ok(t, err)
}
// Replay the WAL. Should get 9 records.
{
sr, err := NewSegmentsReader(dir)
testutil.Ok(t, err)
reader := NewReader(sr)
i := 0
for ; i < 9 && reader.Next(); i++ {
testutil.Equals(t, recordSize, len(reader.Record()))
}
testutil.Equals(t, 9, i, "wrong number of records")
testutil.Assert(t, !reader.Next(), "unexpected record")
testutil.Equals(t, nil, reader.Err())
}
}
func BenchmarkWAL_LogBatched(b *testing.B) {
dir, err := ioutil.TempDir("", "bench_logbatch")
testutil.Ok(b, err)
defer os.RemoveAll(dir)
w, err := New(nil, nil, "testdir")
testutil.Ok(b, err)
defer w.Close()
var buf [2048]byte
var recs [][]byte
b.SetBytes(2048)
for i := 0; i < b.N; i++ {
recs = append(recs, buf[:])
if len(recs) < 1000 {
continue
}
err := w.Log(recs...)
testutil.Ok(b, err)
recs = recs[:0]
}
// Stop timer to not count fsync time on close.
// If it's counted batched vs. single benchmarks are very similar but
// do not show burst throughput well.
b.StopTimer()
}
func BenchmarkWAL_Log(b *testing.B) {
dir, err := ioutil.TempDir("", "bench_logsingle")
testutil.Ok(b, err)
defer os.RemoveAll(dir)
w, err := New(nil, nil, "testdir")
testutil.Ok(b, err)
defer w.Close()
var buf [2048]byte
b.SetBytes(2048)
for i := 0; i < b.N; i++ {
err := w.Log(buf[:])
testutil.Ok(b, err)
}
// Stop timer to not count fsync time on close.
// If it's counted batched vs. single benchmarks are very similar but
// do not show burst throughput well.
b.StopTimer()
}