enhance promtool tsdb analyze command (#12869)

Improve promtool tsdb analyze - Make it more suitable for variable size float chunks. - Add support for histogram chunks. --------- Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
2023-10-15 02:34:50 +08:00 · 2023-10-15 02:34:50 +08:00 · 1a6edff882
parent c9fce2c6c6
commit 1a6edff882
2 changed files with 131 additions and 18 deletions
--- a/cmd/promtool/tsdb.go
+++ b/cmd/promtool/tsdb.go
@ -18,7 +18,6 @@ import (
 	"context"
 	"fmt"
 	"io"
 	"math"
 	"os"
 	"path/filepath"
 	"runtime"
@ -620,10 +619,12 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
 		err = tsdb_errors.NewMulti(err, chunkr.Close()).Err()
 	}()
 	const maxSamplesPerChunk = 120
 	nBuckets := 10
 	histogram := make([]int, nBuckets)
 	totalChunks := 0
 	floatChunkSamplesCount := make([]int, 0)
 	floatChunkSize := make([]int, 0)
 	histogramChunkSamplesCount := make([]int, 0)
 	histogramChunkSize := make([]int, 0)
 	histogramChunkBucketsCount := make([]int, 0)
 	var builder labels.ScratchBuilder
 	for postingsr.Next() {
 		var chks []chunks.Meta
@ -637,26 +638,56 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
 			if err != nil {
 				return err
 			}
-			chunkSize := math.Min(float64(chk.NumSamples()), maxSamplesPerChunk)
+			switch chk.Encoding() {
-			// Calculate the bucket for the chunk and increment it in the histogram.
+			case chunkenc.EncXOR:
-			bucket := int(math.Ceil(float64(nBuckets)*chunkSize/maxSamplesPerChunk)) - 1
+				floatChunkSamplesCount = append(floatChunkSamplesCount, chk.NumSamples())
-			histogram[bucket]++
+				floatChunkSize = append(floatChunkSize, len(chk.Bytes()))
 			case chunkenc.EncFloatHistogram:
 				histogramChunkSamplesCount = append(histogramChunkSamplesCount, chk.NumSamples())
 				histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
 				fhchk, ok := chk.(*chunkenc.FloatHistogramChunk)
 				if !ok {
 					return fmt.Errorf("chunk is not FloatHistogramChunk")
 				}
 				it := fhchk.Iterator(nil)
 				bucketCount := 0
 				for it.Next() == chunkenc.ValFloatHistogram {
 					_, f := it.AtFloatHistogram()
 					bucketCount += len(f.PositiveBuckets)
 					bucketCount += len(f.NegativeBuckets)
 				}
 				histogramChunkBucketsCount = append(histogramChunkBucketsCount, bucketCount)
 			case chunkenc.EncHistogram:
 				histogramChunkSamplesCount = append(histogramChunkSamplesCount, chk.NumSamples())
 				histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
 				hchk, ok := chk.(*chunkenc.HistogramChunk)
 				if !ok {
 					return fmt.Errorf("chunk is not HistogramChunk")
 				}
 				it := hchk.Iterator(nil)
 				bucketCount := 0
 				for it.Next() == chunkenc.ValHistogram {
 					_, f := it.AtHistogram()
 					bucketCount += len(f.PositiveBuckets)
 					bucketCount += len(f.NegativeBuckets)
 				}
 				histogramChunkBucketsCount = append(histogramChunkBucketsCount, bucketCount)
 			}
 			totalChunks++
 		}
 	}
 	fmt.Printf("\nCompaction analysis:\n")
-	fmt.Println("Fullness: Amount of samples in chunks (100% is 120 samples)")
+	fmt.Println()
-	// Normalize absolute counts to percentages and print them out.
+	displayHistogram("samples per float chunk", floatChunkSamplesCount, totalChunks)
 	for bucket, count := range histogram {
 		percentage := 100.0 * count / totalChunks
 		fmt.Printf("%7d%%: ", (bucket+1)*10)
 		for j := 0; j < percentage; j++ {
 			fmt.Printf("#")
 		}
 		fmt.Println()
 	}
 	displayHistogram("bytes per float chunk", floatChunkSize, totalChunks)
 	displayHistogram("samples per histogram chunk", histogramChunkSamplesCount, totalChunks)
 	displayHistogram("bytes per histogram chunk", histogramChunkSize, totalChunks)
 	displayHistogram("buckets per histogram chunk", histogramChunkBucketsCount, totalChunks)
 	return nil
 }
@ -732,3 +763,42 @@ func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxB
 	return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration))
 }
 func displayHistogram(dataType string, datas []int, total int) {
 	slices.Sort(datas)
 	start, end, step := generateBucket(datas[0], datas[len(datas)-1])
 	sum := 0
 	buckets := make([]int, (end-start)/step+1)
 	maxCount := 0
 	for _, c := range datas {
 		sum += c
 		buckets[(c-start)/step]++
 		if buckets[(c-start)/step] > maxCount {
 			maxCount = buckets[(c-start)/step]
 		}
 	}
 	avg := sum / len(datas)
 	fmt.Printf("%s (min/avg/max): %d/%d/%d\n", dataType, datas[0], avg, datas[len(datas)-1])
 	maxLeftLen := strconv.Itoa(len(fmt.Sprintf("%d", end)))
 	maxRightLen := strconv.Itoa(len(fmt.Sprintf("%d", end+step)))
 	maxCountLen := strconv.Itoa(len(fmt.Sprintf("%d", maxCount)))
 	for bucket, count := range buckets {
 		percentage := 100.0 * count / total
 		fmt.Printf("[%"+maxLeftLen+"d, %"+maxRightLen+"d]: %"+maxCountLen+"d %s\n", bucket*step+start+1, (bucket+1)*step+start, count, strings.Repeat("#", percentage))
 	}
 	fmt.Println()
 }
 func generateBucket(min, max int) (start, end, step int) {
 	s := (max - min) / 10
 	step = 10
 	for step < s && step <= 10000 {
 		step *= 10
 	}
 	start = min - min%step
 	end = max - max%step + step
 	return
 }
--- a/cmd/promtool/tsdb_test.go
+++ b/cmd/promtool/tsdb_test.go
@ -0,0 +1,43 @@
 // Copyright 2017 The Prometheus Authors
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 // http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
 package main
 import (
 	"testing"
 	"github.com/stretchr/testify/require"
 )
 func TestGenerateBucket(t *testing.T) {
 	tcs := []struct {
 		min, max         int
 		start, end, step int
 	}{
 		{
 			min:   101,
 			max:   141,
 			start: 100,
 			end:   150,
 			step:  10,
 		},
 	}
 	for _, tc := range tcs {
 		start, end, step := generateBucket(tc.min, tc.max)
 		require.Equal(t, tc.start, start)
 		require.Equal(t, tc.end, end)
 		require.Equal(t, tc.step, step)
 	}
 }