From bb0e74b3433ed254d28972334174dcad53422777 Mon Sep 17 00:00:00 2001 From: Goutham Veeramachaneni Date: Fri, 2 Feb 2018 13:16:23 +0530 Subject: [PATCH] Fix reader for large index files. Currently the offsets are cast into uint32 even though the index can grow larger than 4GiB. Signed-off-by: Goutham Veeramachaneni --- index/index.go | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/index/index.go b/index/index.go index f6b680b36..a880dbff0 100644 --- a/index/index.go +++ b/index/index.go @@ -143,7 +143,11 @@ func NewWriter(fn string) (*Writer, error) { if err != nil { return nil, err } - defer df.Close() // close for flatform windows + defer df.Close() // Close for platform windows. + + if err := os.RemoveAll(fn); err != nil { + return nil, errors.Wrap(err, "remove any existing index at path") + } f, err := os.OpenFile(fn, os.O_CREATE|os.O_WRONLY, 0666) if err != nil { @@ -530,8 +534,8 @@ type Reader struct { c io.Closer // Cached hashmaps of section offsets. - labels map[string]uint32 - postings map[labels.Label]uint32 + labels map[string]uint64 + postings map[labels.Label]uint64 // Cache of read symbols. Strings that are returned when reading from the // block are always backed by true strings held in here rather than // strings that are backed by byte slices from the mmap'd index file. This @@ -595,8 +599,8 @@ func newReader(b ByteSlice, c io.Closer, version int) (*Reader, error) { b: b, c: c, symbols: map[uint32]string{}, - labels: map[string]uint32{}, - postings: map[labels.Label]uint32{}, + labels: map[string]uint64{}, + postings: map[labels.Label]uint64{}, crc32: newCRC32(), version: version, } @@ -617,7 +621,7 @@ func newReader(b ByteSlice, c io.Closer, version int) (*Reader, error) { } var err error - err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint32) error { + err = r.readOffsetTable(r.toc.labelIndicesTable, func(key []string, off uint64) error { if len(key) != 1 { return errors.Errorf("unexpected key length %d", len(key)) } @@ -627,7 +631,7 @@ func newReader(b ByteSlice, c io.Closer, version int) (*Reader, error) { if err != nil { return nil, errors.Wrap(err, "read label index table") } - err = r.readOffsetTable(r.toc.postingsTable, func(key []string, off uint32) error { + err = r.readOffsetTable(r.toc.postingsTable, func(key []string, off uint64) error { if len(key) != 2 { return errors.Errorf("unexpected key length %d", len(key)) } @@ -780,7 +784,7 @@ func (r *Reader) readSymbols(off int) error { // readOffsetTable reads an offset table at the given position calls f for each // found entry.f // If f returns an error it stops decoding and returns the received error, -func (r *Reader) readOffsetTable(off uint64, f func([]string, uint32) error) error { +func (r *Reader) readOffsetTable(off uint64, f func([]string, uint64) error) error { d := r.decbufAt(int(off)) cnt := d.be32() @@ -791,7 +795,7 @@ func (r *Reader) readOffsetTable(off uint64, f func([]string, uint32) error) err for i := 0; i < keyCount; i++ { keys = append(keys, d.uvarintStr()) } - o := uint32(d.uvarint()) + o := d.uvarint64() if d.err() != nil { break }