enhance promtool tsdb analyze command (#12869)

Improve promtool tsdb analyze

- Make it more suitable for variable size float chunks.
- Add support for histogram chunks.

---------

Signed-off-by: Ziqi Zhao <zhaoziqi9146@gmail.com>
This commit is contained in:
Ziqi Zhao 2023-10-15 02:34:50 +08:00 committed by GitHub
parent c9fce2c6c6
commit 1a6edff882
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 131 additions and 18 deletions

View File

@ -18,7 +18,6 @@ import (
"context"
"fmt"
"io"
"math"
"os"
"path/filepath"
"runtime"
@ -620,10 +619,12 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
err = tsdb_errors.NewMulti(err, chunkr.Close()).Err()
}()
const maxSamplesPerChunk = 120
nBuckets := 10
histogram := make([]int, nBuckets)
totalChunks := 0
floatChunkSamplesCount := make([]int, 0)
floatChunkSize := make([]int, 0)
histogramChunkSamplesCount := make([]int, 0)
histogramChunkSize := make([]int, 0)
histogramChunkBucketsCount := make([]int, 0)
var builder labels.ScratchBuilder
for postingsr.Next() {
var chks []chunks.Meta
@ -637,26 +638,56 @@ func analyzeCompaction(ctx context.Context, block tsdb.BlockReader, indexr tsdb.
if err != nil {
return err
}
chunkSize := math.Min(float64(chk.NumSamples()), maxSamplesPerChunk)
// Calculate the bucket for the chunk and increment it in the histogram.
bucket := int(math.Ceil(float64(nBuckets)*chunkSize/maxSamplesPerChunk)) - 1
histogram[bucket]++
switch chk.Encoding() {
case chunkenc.EncXOR:
floatChunkSamplesCount = append(floatChunkSamplesCount, chk.NumSamples())
floatChunkSize = append(floatChunkSize, len(chk.Bytes()))
case chunkenc.EncFloatHistogram:
histogramChunkSamplesCount = append(histogramChunkSamplesCount, chk.NumSamples())
histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
fhchk, ok := chk.(*chunkenc.FloatHistogramChunk)
if !ok {
return fmt.Errorf("chunk is not FloatHistogramChunk")
}
it := fhchk.Iterator(nil)
bucketCount := 0
for it.Next() == chunkenc.ValFloatHistogram {
_, f := it.AtFloatHistogram()
bucketCount += len(f.PositiveBuckets)
bucketCount += len(f.NegativeBuckets)
}
histogramChunkBucketsCount = append(histogramChunkBucketsCount, bucketCount)
case chunkenc.EncHistogram:
histogramChunkSamplesCount = append(histogramChunkSamplesCount, chk.NumSamples())
histogramChunkSize = append(histogramChunkSize, len(chk.Bytes()))
hchk, ok := chk.(*chunkenc.HistogramChunk)
if !ok {
return fmt.Errorf("chunk is not HistogramChunk")
}
it := hchk.Iterator(nil)
bucketCount := 0
for it.Next() == chunkenc.ValHistogram {
_, f := it.AtHistogram()
bucketCount += len(f.PositiveBuckets)
bucketCount += len(f.NegativeBuckets)
}
histogramChunkBucketsCount = append(histogramChunkBucketsCount, bucketCount)
}
totalChunks++
}
}
fmt.Printf("\nCompaction analysis:\n")
fmt.Println("Fullness: Amount of samples in chunks (100% is 120 samples)")
// Normalize absolute counts to percentages and print them out.
for bucket, count := range histogram {
percentage := 100.0 * count / totalChunks
fmt.Printf("%7d%%: ", (bucket+1)*10)
for j := 0; j < percentage; j++ {
fmt.Printf("#")
}
fmt.Println()
}
fmt.Println()
displayHistogram("samples per float chunk", floatChunkSamplesCount, totalChunks)
displayHistogram("bytes per float chunk", floatChunkSize, totalChunks)
displayHistogram("samples per histogram chunk", histogramChunkSamplesCount, totalChunks)
displayHistogram("bytes per histogram chunk", histogramChunkSize, totalChunks)
displayHistogram("buckets per histogram chunk", histogramChunkBucketsCount, totalChunks)
return nil
}
@ -732,3 +763,42 @@ func backfillOpenMetrics(path, outputDir string, humanReadable, quiet bool, maxB
return checkErr(backfill(5000, inputFile.Bytes(), outputDir, humanReadable, quiet, maxBlockDuration))
}
func displayHistogram(dataType string, datas []int, total int) {
slices.Sort(datas)
start, end, step := generateBucket(datas[0], datas[len(datas)-1])
sum := 0
buckets := make([]int, (end-start)/step+1)
maxCount := 0
for _, c := range datas {
sum += c
buckets[(c-start)/step]++
if buckets[(c-start)/step] > maxCount {
maxCount = buckets[(c-start)/step]
}
}
avg := sum / len(datas)
fmt.Printf("%s (min/avg/max): %d/%d/%d\n", dataType, datas[0], avg, datas[len(datas)-1])
maxLeftLen := strconv.Itoa(len(fmt.Sprintf("%d", end)))
maxRightLen := strconv.Itoa(len(fmt.Sprintf("%d", end+step)))
maxCountLen := strconv.Itoa(len(fmt.Sprintf("%d", maxCount)))
for bucket, count := range buckets {
percentage := 100.0 * count / total
fmt.Printf("[%"+maxLeftLen+"d, %"+maxRightLen+"d]: %"+maxCountLen+"d %s\n", bucket*step+start+1, (bucket+1)*step+start, count, strings.Repeat("#", percentage))
}
fmt.Println()
}
func generateBucket(min, max int) (start, end, step int) {
s := (max - min) / 10
step = 10
for step < s && step <= 10000 {
step *= 10
}
start = min - min%step
end = max - max%step + step
return
}

43
cmd/promtool/tsdb_test.go Normal file
View File

@ -0,0 +1,43 @@
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestGenerateBucket(t *testing.T) {
tcs := []struct {
min, max int
start, end, step int
}{
{
min: 101,
max: 141,
start: 100,
end: 150,
step: 10,
},
}
for _, tc := range tcs {
start, end, step := generateBucket(tc.min, tc.max)
require.Equal(t, tc.start, start)
require.Equal(t, tc.end, end)
require.Equal(t, tc.step, step)
}
}