diff --git a/docs/feature_flags.md b/docs/feature_flags.md index 1cf54c47f..f580c959f 100644 --- a/docs/feature_flags.md +++ b/docs/feature_flags.md @@ -125,7 +125,61 @@ histogram (albeit via the text format). With this flag enabled, Prometheus will still ingest those conventional histograms that do not come with a corresponding native histogram. However, if a native histogram is present, Prometheus will ignore the corresponding conventional histogram, with the -notable exception of exemplars, which are always ingested. +notable exception of exemplars, which are always ingested. To keep the +conventional histograms as well, enable `scrape_classic_histograms` in the +scrape job. + +_Note about the format of `le` and `quantile` label values:_ + +In certain situations, the protobuf parsing changes the number formatting of +the `le` labels of conventional histograms and the `quantile` labels of +summaries. Typically, this happens if the scraped target is instrumented with +[client_golang](https://github.com/prometheus/client_golang) provided that +[promhttp.HandlerOpts.EnableOpenMetrics](https://pkg.go.dev/github.com/prometheus/client_golang/prometheus/promhttp#HandlerOpts) +is set to `false`. In such a case, integer label values are represented in the +text format as such, e.g. `quantile="1"` or `le="2"`. However, the protobuf parsing +changes the representation to float-like (following the OpenMetrics +specification), so the examples above become `quantile="1.0"` and `le="2.0"` after +ingestion into Prometheus, which changes the identity of the metric compared to +what was ingested before via the text format. + +The effect of this change is that alerts, recording rules and dashboards that +directly reference label values as whole numbers such as `le="1"` will stop +working. + +Aggregation by the `le` and `quantile` labels for vectors that contain the old and +new formatting will lead to unexpected results, and range vectors that span the +transition between the different formatting will contain additional series. +The most common use case for both is the quantile calculation via +`histogram_quantile`, e.g. +`histogram_quantile(0.95, sum by (le) (rate(histogram_bucket[10m])))`. +The `histogram_quantile` function already tries to mitigate the effects to some +extent, but there will be inaccuracies, in particular for shorter ranges that +cover only a few samples. + +Ways to deal with this change either globally or on a per metric basis: + +- Fix references to integer `le`, `quantile` label values, but otherwise do +nothing and accept that some queries that span the transition time will produce +inaccurate or unexpected results. +_This is the recommended solution, to get consistently normalized label values._ +Also Prometheus 3.0 is expected to enforce normalization of these label values. +- Use `metric_relabel_config` to retain the old labels when scraping targets. +This should **only** be applied to metrics that currently produce such labels. + + +```yaml + metric_relabel_configs: + - source_labels: + - quantile + target_label: quantile + regex: (\d+)\.0+ + - source_labels: + - le + - __name__ + target_label: le + regex: (\d+)\.0+;.*_bucket +``` ## OTLP Receiver diff --git a/scrape/scrape_test.go b/scrape/scrape_test.go index 7be3f3461..08cf37ede 100644 --- a/scrape/scrape_test.go +++ b/scrape/scrape_test.go @@ -3679,6 +3679,131 @@ func TestTargetScrapeIntervalAndTimeoutRelabel(t *testing.T) { require.Equal(t, "750ms", sp.ActiveTargets()[0].labels.Get(model.ScrapeTimeoutLabel)) } +// Testing whether we can remove trailing .0 from histogram 'le' and summary 'quantile' labels. +func TestLeQuantileReLabel(t *testing.T) { + simpleStorage := teststorage.New(t) + defer simpleStorage.Close() + + config := &config.ScrapeConfig{ + JobName: "test", + MetricRelabelConfigs: []*relabel.Config{ + { + SourceLabels: model.LabelNames{"le", "__name__"}, + Regex: relabel.MustNewRegexp("(\\d+)\\.0+;.*_bucket"), + Replacement: relabel.DefaultRelabelConfig.Replacement, + Separator: relabel.DefaultRelabelConfig.Separator, + TargetLabel: "le", + Action: relabel.Replace, + }, + { + SourceLabels: model.LabelNames{"quantile"}, + Regex: relabel.MustNewRegexp("(\\d+)\\.0+"), + Replacement: relabel.DefaultRelabelConfig.Replacement, + Separator: relabel.DefaultRelabelConfig.Separator, + TargetLabel: "quantile", + Action: relabel.Replace, + }, + }, + SampleLimit: 100, + Scheme: "http", + ScrapeInterval: model.Duration(100 * time.Millisecond), + ScrapeTimeout: model.Duration(100 * time.Millisecond), + } + + metricsText := ` +# HELP test_histogram This is a histogram with default buckets +# TYPE test_histogram histogram +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.005"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.01"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.025"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.05"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.1"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.25"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="0.5"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="1.0"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="2.5"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="5.0"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="10.0"} 0 +test_histogram_bucket{address="0.0.0.0",port="5001",le="+Inf"} 0 +test_histogram_sum{address="0.0.0.0",port="5001"} 0 +test_histogram_count{address="0.0.0.0",port="5001"} 0 +# HELP test_summary Number of inflight requests sampled at a regular interval. Quantile buckets keep track of inflight requests over the last 60s. +# TYPE test_summary summary +test_summary{quantile="0.5"} 0 +test_summary{quantile="0.9"} 0 +test_summary{quantile="0.95"} 0 +test_summary{quantile="0.99"} 0 +test_summary{quantile="1.0"} 1 +test_summary_sum 1 +test_summary_count 199 +` + + // The expected "le" values do not have the trailing ".0". + expectedLeValues := []string{"0.005", "0.01", "0.025", "0.05", "0.1", "0.25", "0.5", "1", "2.5", "5", "10", "+Inf"} + + // The expected "quantile" values do not have the trailing ".0". + expectedQuantileValues := []string{"0.5", "0.9", "0.95", "0.99", "1"} + + scrapeCount := 0 + scraped := make(chan bool) + + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprint(w, metricsText) + scrapeCount++ + if scrapeCount > 2 { + close(scraped) + } + })) + defer ts.Close() + + sp, err := newScrapePool(config, simpleStorage, 0, nil, &Options{}, newTestScrapeMetrics(t)) + require.NoError(t, err) + defer sp.stop() + + testURL, err := url.Parse(ts.URL) + require.NoError(t, err) + sp.Sync([]*targetgroup.Group{ + { + Targets: []model.LabelSet{{model.AddressLabel: model.LabelValue(testURL.Host)}}, + }, + }) + require.Equal(t, 1, len(sp.ActiveTargets())) + + select { + case <-time.After(5 * time.Second): + t.Fatalf("target was not scraped") + case <-scraped: + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + q, err := simpleStorage.Querier(time.Time{}.UnixNano(), time.Now().UnixNano()) + require.NoError(t, err) + defer q.Close() + + checkValues := func(labelName string, expectedValues []string, series storage.SeriesSet) { + foundLeValues := map[string]bool{} + + for series.Next() { + s := series.At() + v := s.Labels().Get(labelName) + require.NotContains(t, foundLeValues, v, "duplicate label value found") + foundLeValues[v] = true + } + + require.Equal(t, len(expectedValues), len(foundLeValues), "number of label values not as expected") + for _, v := range expectedValues { + require.Contains(t, foundLeValues, v, "label value not found") + } + } + + series := q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "test_histogram_bucket")) + checkValues("le", expectedLeValues, series) + + series = q.Select(ctx, false, nil, labels.MustNewMatcher(labels.MatchRegexp, "__name__", "test_summary")) + checkValues("quantile", expectedQuantileValues, series) +} + func TestScrapeLoopRunCreatesStaleMarkersOnFailedScrapeForTimestampedMetrics(t *testing.T) { appender := &collectResultAppender{} var (