Make floats exact again.

This should do the right thing for the old delta chunks, too.
2024-12-26 00:23:18 +00:00 · 2015-03-06 16:03:03 +01:00 · 2015-03-06 16:03:03 +01:00 · 23ba8a5516
commit 23ba8a5516
parent a8d4f8af9a
4 changed files with 60 additions and 77 deletions
--- a/storage/local/delta.go
+++ b/storage/local/delta.go
@ -124,10 +124,11 @@ func (c deltaEncodedChunk) add(s *metric.SamplePair) []chunk {
 		return []chunk{&c, overflowChunks[0]}
 	}

+	baseValue := c.baseValue()
 	// TODO(beorn7): Once https://github.com/prometheus/prometheus/issues/481 is
 	// fixed, we should panic here if dt is negative.
 	dt := s.Timestamp - c.baseTime()
-	dv := s.Value - c.baseValue()
+	dv := s.Value - baseValue
 	tb := c.timeBytes()
 	vb := c.valueBytes()

@ -139,18 +140,23 @@ func (c deltaEncodedChunk) add(s *metric.SamplePair) []chunk {
 		return transcodeAndAdd(newDeltaEncodedChunk(tb, d4, false, cap(c)), &c, s)
 	}
 	// float32->float64.
-	if !c.isInt() && vb == d4 && !isFloat32(dv) {
+	if !c.isInt() && vb == d4 && baseValue+clientmodel.SampleValue(float32(dv)) != s.Value {
 		return transcodeAndAdd(newDeltaEncodedChunk(tb, d8, false, cap(c)), &c, s)
 	}
-	if tb < d8 || vb < d8 {
-		// Maybe more bytes per sample.
-		ntb := bytesNeededForUnsignedTimestampDelta(dt)
-		nvb := bytesNeededForSampleValueDelta(dv, c.isInt())
-		if ntb > tb || nvb > vb {
-			ntb = max(ntb, tb)
-			nvb = max(nvb, vb)
-			return transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, c.isInt(), cap(c)), &c, s)
-		}
+
+	var ntb, nvb deltaBytes
+	if tb < d8 {
+		// Maybe more bytes for timestamp.
+		ntb = bytesNeededForUnsignedTimestampDelta(dt)
+	}
+	if c.isInt() && vb < d8 {
+		// Maybe more bytes for sample value.
+		nvb = bytesNeededForIntegerSampleValueDelta(dv)
+	}
+	if ntb > tb || nvb > vb {
+		ntb = max(ntb, tb)
+		nvb = max(nvb, vb)
+		return transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, c.isInt(), cap(c)), &c, s)
 	}
 	offset := len(c)
 	c = c[:offset+sampleSize]
--- a/storage/local/delta_helpers.go
+++ b/storage/local/delta_helpers.go
@ -55,25 +55,19 @@ func bytesNeededForSignedTimestampDelta(deltaT clientmodel.Timestamp) deltaBytes
 	}
 }

-func bytesNeededForSampleValueDelta(deltaV clientmodel.SampleValue, isInt bool) deltaBytes {
-	if isInt {
-		switch {
-		case deltaV < math.MinInt32 || deltaV > math.MaxInt32:
-			return d8
-		case deltaV < math.MinInt16 || deltaV > math.MaxInt16:
-			return d4
-		case deltaV < math.MinInt8 || deltaV > math.MaxInt8:
-			return d2
-		case deltaV != 0:
-			return d1
-		default:
-			return d0
-		}
-	}
-	if clientmodel.SampleValue(float32(deltaV)) != deltaV {
+func bytesNeededForIntegerSampleValueDelta(deltaV clientmodel.SampleValue) deltaBytes {
+	switch {
+	case deltaV < math.MinInt32 || deltaV > math.MaxInt32:
 		return d8
+	case deltaV < math.MinInt16 || deltaV > math.MaxInt16:
+		return d4
+	case deltaV < math.MinInt8 || deltaV > math.MaxInt8:
+		return d2
+	case deltaV != 0:
+		return d1
+	default:
+		return d0
 	}
-	return d4
 }

 func max(a, b deltaBytes) deltaBytes {
@ -88,8 +82,3 @@ func isInt64(v clientmodel.SampleValue) bool {
 	// Note: Using math.Modf is slower than the conversion approach below.
 	return clientmodel.SampleValue(int64(v)) == v
 }
-
-// isFloat32 returns true if v can be represented as an float32.
-func isFloat32(v clientmodel.SampleValue) bool {
-	return clientmodel.SampleValue(float32(v)) == v
-}
--- a/storage/local/doubledelta.go
+++ b/storage/local/doubledelta.go
@ -171,40 +171,49 @@ func (c doubleDeltaEncodedChunk) add(s *metric.SamplePair) []chunk {
 		return []chunk{&c, overflowChunks[0]}
 	}

-	dt := s.Timestamp - c.baseTime() - clientmodel.Timestamp(c.len())*c.baseTimeDelta()
-	dv := s.Value - c.baseValue() - clientmodel.SampleValue(c.len())*c.baseValueDelta()
+	projectedTime := c.baseTime() + clientmodel.Timestamp(c.len())*c.baseTimeDelta()
+	ddt := s.Timestamp - projectedTime
+
+	projectedValue := c.baseValue() + clientmodel.SampleValue(c.len())*c.baseValueDelta()
+	ddv := s.Value - projectedValue

 	// If the new sample is incompatible with the current encoding, reencode the
 	// existing chunk data into new chunk(s).
 	//
 	// int->float.
-	if c.isInt() && !isInt64(dv) {
+	if c.isInt() && !isInt64(ddv) {
 		return transcodeAndAdd(newDoubleDeltaEncodedChunk(tb, d4, false, cap(c)), &c, s)
 	}
 	// float32->float64.
-	if !c.isInt() && vb == d4 && !isFloat32(dv) {
+	if !c.isInt() && vb == d4 && projectedValue+clientmodel.SampleValue(float32(ddv)) != s.Value {
 		return transcodeAndAdd(newDoubleDeltaEncodedChunk(tb, d8, false, cap(c)), &c, s)
 	}
-	if tb < d8 || vb < d8 {
-		// Maybe more bytes per sample.
-		ntb := bytesNeededForSignedTimestampDelta(dt)
-		nvb := bytesNeededForSampleValueDelta(dv, c.isInt())
-		if ntb > tb || nvb > vb {
-			ntb = max(ntb, tb)
-			nvb = max(nvb, vb)
-			return transcodeAndAdd(newDoubleDeltaEncodedChunk(ntb, nvb, c.isInt(), cap(c)), &c, s)
-		}
+
+	var ntb, nvb deltaBytes
+	if tb < d8 {
+		// Maybe more bytes for timestamp.
+		ntb = bytesNeededForSignedTimestampDelta(ddt)
 	}
+	if c.isInt() && vb < d8 {
+		// Maybe more bytes for sample value.
+		nvb = bytesNeededForIntegerSampleValueDelta(ddv)
+	}
+	if ntb > tb || nvb > vb {
+		ntb = max(ntb, tb)
+		nvb = max(nvb, vb)
+		return transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, c.isInt(), cap(c)), &c, s)
+	}
+
 	offset := len(c)
 	c = c[:offset+sampleSize]

 	switch tb {
 	case d1:
-		c[offset] = byte(dt)
+		c[offset] = byte(ddt)
 	case d2:
-		binary.LittleEndian.PutUint16(c[offset:], uint16(dt))
+		binary.LittleEndian.PutUint16(c[offset:], uint16(ddt))
 	case d4:
-		binary.LittleEndian.PutUint32(c[offset:], uint32(dt))
+		binary.LittleEndian.PutUint32(c[offset:], uint32(ddt))
 	case d8:
 		// Store the absolute value (no delta) in case of d8.
 		binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
@ -219,11 +228,11 @@ func (c doubleDeltaEncodedChunk) add(s *metric.SamplePair) []chunk {
 		case d0:
 			// No-op. Constant delta is stored as base value.
 		case d1:
-			c[offset] = byte(dv)
+			c[offset] = byte(ddv)
 		case d2:
-			binary.LittleEndian.PutUint16(c[offset:], uint16(dv))
+			binary.LittleEndian.PutUint16(c[offset:], uint16(ddv))
 		case d4:
-			binary.LittleEndian.PutUint32(c[offset:], uint32(dv))
+			binary.LittleEndian.PutUint32(c[offset:], uint32(ddv))
 		// d8 must not happen. Those samples are encoded as float64.
 		default:
 			panic("invalid number of bytes for integer delta")
@ -231,7 +240,7 @@ func (c doubleDeltaEncodedChunk) add(s *metric.SamplePair) []chunk {
 	} else {
 		switch vb {
 		case d4:
-			binary.LittleEndian.PutUint32(c[offset:], math.Float32bits(float32(dv)))
+			binary.LittleEndian.PutUint32(c[offset:], math.Float32bits(float32(ddv)))
 		case d8:
 			// Store the absolute value (no delta) in case of d8.
 			binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
--- a/storage/local/storage_test.go
+++ b/storage/local/storage_test.go
@ -15,7 +15,6 @@ package local

 import (
 	"fmt"
-	"math"
 	"math/rand"
 	"reflect"
 	"testing"
@ -30,14 +29,6 @@ import (
 	"github.com/prometheus/prometheus/utility/test"
 )

-const (
-	epsilon = 0.000001 // Relative error allowed for sample values.
-)
-
-var (
-	minNormal = math.Float64frombits(0x0010000000000000) // The smallest positive normal value of type float64.
-)
-
 func TestGetFingerprintsForLabelMatchers(t *testing.T) {
 	storage, closer := NewTestStorage(t, 1)
 	defer closer.Close()
@ -221,7 +212,7 @@ func testChunk(t *testing.T, chunkType byte) {
 			if samples[i].Timestamp != v.Timestamp {
 				t.Errorf("%d. Got %v; want %v", i, v.Timestamp, samples[i].Timestamp)
 			}
-			if !almostEqual(samples[i].Value, v.Value) {
+			if samples[i].Value != v.Value {
 				t.Errorf("%d. Got %v; want %v", i, v.Value, samples[i].Value)
 			}
 		}
@ -655,7 +646,7 @@ func TestFuzzChunkType1(t *testing.T) {
 //
 // go test -race -cpu 8 -test=short -bench BenchmarkFuzzChunkType
 func benchmarkFuzz(b *testing.B, chunkType byte) {
-	const samplesPerRun = 20000
+	const samplesPerRun = 100000
 	rand.Seed(42)
 	directory := test.NewTemporaryDirectory("test_storage", b)
 	defer directory.Close()
@ -837,7 +828,7 @@ func verifyStorage(t testing.TB, s Storage, samples clientmodel.Samples, maxAge
 		}
 		want := sample.Value
 		got := found[0].Value
-		if !almostEqual(want, got) || sample.Timestamp != found[0].Timestamp {
+		if want != got || sample.Timestamp != found[0].Timestamp {
 			t.Errorf(
 				"Value (or timestamp) mismatch, want %f (at time %v), got %f (at time %v).",
 				want, sample.Timestamp, got, found[0].Timestamp,
@ -938,15 +929,3 @@ func TestChunkMaps(t *testing.T) {
 	}

 }
-
-func almostEqual(a, b clientmodel.SampleValue) bool {
-	// Cf. http://floating-point-gui.de/errors/comparison/
-	if a == b {
-		return true
-	}
-	diff := math.Abs(float64(a - b))
-	if a == 0 || b == 0 || diff < minNormal {
-		return diff < epsilon*minNormal
-	}
-	return diff/(math.Abs(float64(a))+math.Abs(float64(b))) < epsilon
-}