Merge pull request #15164 from machine424/quantile

feat: normalize "le" and "quantile" labels values upon ingestion
This commit is contained in:
Ayoub Mrini 2024-10-19 21:13:03 +02:00 committed by GitHub
commit d8c1605930
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 73 additions and 61 deletions

View File

@ -86,58 +86,6 @@ the corresponding classic histogram, with the notable exception of exemplars,
which are always ingested. To keep the classic histograms as well, enable
`always_scrape_classic_histograms` in the scrape job.
_Note about the format of `le` and `quantile` label values:_
In certain situations, the protobuf parsing changes the number formatting of
the `le` labels of classic histograms and the `quantile` labels of
summaries. Typically, this happens if the scraped target is instrumented with
[client_golang](https://github.com/prometheus/client_golang) provided that
[promhttp.HandlerOpts.EnableOpenMetrics](https://pkg.go.dev/github.com/prometheus/client_golang/prometheus/promhttp#HandlerOpts)
is set to `false`. In such a case, integer label values are represented in the
text format as such, e.g. `quantile="1"` or `le="2"`. However, the protobuf parsing
changes the representation to float-like (following the OpenMetrics
specification), so the examples above become `quantile="1.0"` and `le="2.0"` after
ingestion into Prometheus, which changes the identity of the metric compared to
what was ingested before via the text format.
The effect of this change is that alerts, recording rules and dashboards that
directly reference label values as whole numbers such as `le="1"` will stop
working.
Aggregation by the `le` and `quantile` labels for vectors that contain the old and
new formatting will lead to unexpected results, and range vectors that span the
transition between the different formatting will contain additional series.
The most common use case for both is the quantile calculation via
`histogram_quantile`, e.g.
`histogram_quantile(0.95, sum by (le) (rate(histogram_bucket[10m])))`.
The `histogram_quantile` function already tries to mitigate the effects to some
extent, but there will be inaccuracies, in particular for shorter ranges that
cover only a few samples.
Ways to deal with this change either globally or on a per metric basis:
- Fix references to integer `le`, `quantile` label values, but otherwise do
nothing and accept that some queries that span the transition time will produce
inaccurate or unexpected results.
_This is the recommended solution, to get consistently normalized label values._
Also Prometheus 3.0 is expected to enforce normalization of these label values.
- Use `metric_relabel_config` to retain the old labels when scraping targets.
This should **only** be applied to metrics that currently produce such labels.
<!-- The following config snippet is unit tested in scrape/scrape_test.go. -->
```yaml
metric_relabel_configs:
- source_labels:
- quantile
target_label: quantile
regex: (\d+)\.0+
- source_labels:
- le
- __name__
target_label: le
regex: (\d+)\.0+;.*_bucket
```
## Experimental PromQL functions
`--enable-feature=promql-experimental-functions`

View File

@ -25,8 +25,7 @@ import (
"github.com/prometheus/prometheus/model/labels"
)
// Parser parses samples from a byte slice of samples in the official
// Prometheus and OpenMetrics text exposition formats.
// Parser parses samples from a byte slice of samples in different exposition formats.
type Parser interface {
// Series returns the bytes of a series with a simple float64 as a
// value, the timestamp if set, and the value of the current sample.
@ -60,6 +59,8 @@ type Parser interface {
// Metric writes the labels of the current sample into the passed labels.
// It returns the string from which the metric was parsed.
// The values of the "le" labels of classic histograms and "quantile" labels
// of summaries should follow the OpenMetrics formatting rules.
Metric(l *labels.Labels) string
// Exemplar writes the exemplar of the current sample into the passed

View File

@ -22,6 +22,7 @@ import (
"fmt"
"io"
"math"
"strconv"
"strings"
"unicode/utf8"
@ -210,7 +211,7 @@ func (p *OpenMetricsParser) Metric(l *labels.Labels) string {
label := unreplace(s[a:b])
c := p.offsets[i+2] - p.start
d := p.offsets[i+3] - p.start
value := unreplace(s[c:d])
value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d]))
p.builder.Add(label, value)
}
@ -723,3 +724,15 @@ func (p *OpenMetricsParser) getFloatValue(t token, after string) (float64, error
}
return val, nil
}
// normalizeFloatsInLabelValues ensures that values of the "le" labels of classic histograms and "quantile" labels
// of summaries follow OpenMetrics formatting rules.
func normalizeFloatsInLabelValues(t model.MetricType, l, v string) string {
if (t == model.MetricTypeSummary && l == model.QuantileLabel) || (t == model.MetricTypeHistogram && l == model.BucketLabel) {
f, err := strconv.ParseFloat(v, 64)
if err == nil {
return formatOpenMetricsFloat(f)
}
}
return v
}

View File

@ -74,6 +74,7 @@ foo_total{a="b"} 17.0 1520879607.789 # {id="counter-test"} 5
foo_created{a="b"} 1520872607.123
foo_total{le="c"} 21.0
foo_created{le="c"} 1520872621.123
foo_total{le="1"} 10.0
# HELP bar Summary with CT at the end, making sure we find CT even if it's multiple lines a far
# TYPE bar summary
bar_count 17.0
@ -97,6 +98,7 @@ something_count 18
something_sum 324789.4
something_created 1520430001
something_bucket{le="0.0"} 1
something_bucket{le="1"} 2
something_bucket{le="+Inf"} 18
# HELP yum Summary with _created between sum and quantiles
# TYPE yum summary
@ -130,7 +132,7 @@ foobar{quantile="0.99"} 150.1`
}, {
m: `go_gc_duration_seconds{quantile="0"}`,
v: 4.9351e-05,
lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"),
lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"),
}, {
m: `go_gc_duration_seconds{quantile="0.25"}`,
v: 7.424100000000001e-05,
@ -302,6 +304,10 @@ foobar{quantile="0.99"} 150.1`
v: 21.0,
lset: labels.FromStrings("__name__", "foo_total", "le", "c"),
ct: int64p(1520872621123),
}, {
m: `foo_total{le="1"}`,
v: 10.0,
lset: labels.FromStrings("__name__", "foo_total", "le", "1"),
}, {
m: "bar",
help: "Summary with CT at the end, making sure we find CT even if it's multiple lines a far",
@ -385,6 +391,11 @@ foobar{quantile="0.99"} 150.1`
v: 1,
lset: labels.FromStrings("__name__", "something_bucket", "le", "0.0"),
ct: int64p(1520430001000),
}, {
m: `something_bucket{le="1"}`,
v: 2,
lset: labels.FromStrings("__name__", "something_bucket", "le", "1.0"),
ct: int64p(1520430001000),
}, {
m: `something_bucket{le="+Inf"}`,
v: 18,
@ -492,7 +503,7 @@ func TestUTF8OpenMetricsParse(t *testing.T) {
}, {
m: `{"go.gc_duration_seconds",quantile="0"}`,
v: 4.9351e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"),
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.0"),
ct: int64p(1520872607123),
}, {
m: `{"go.gc_duration_seconds",quantile="0.25"}`,

View File

@ -239,7 +239,8 @@ func (p *PromParser) Metric(l *labels.Labels) string {
label := unreplace(s[a:b])
c := p.offsets[i+2] - p.start
d := p.offsets[i+3] - p.start
value := unreplace(s[c:d])
value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d]))
p.builder.Add(label, value)
}

View File

@ -31,6 +31,13 @@ go_gc_duration_seconds{quantile="0.25",} 7.424100000000001e-05
go_gc_duration_seconds{quantile="0.5",a="b"} 8.3835e-05
go_gc_duration_seconds{quantile="0.8", a="b"} 8.3835e-05
go_gc_duration_seconds{ quantile="0.9", a="b"} 8.3835e-05
# HELP prometheus_http_request_duration_seconds Histogram of latencies for HTTP requests.
# TYPE prometheus_http_request_duration_seconds histogram
prometheus_http_request_duration_seconds_bucket{handler="/",le="1"} 423
prometheus_http_request_duration_seconds_bucket{handler="/",le="2"} 1423
prometheus_http_request_duration_seconds_bucket{handler="/",le="+Inf"} 1423
prometheus_http_request_duration_seconds_sum{handler="/"} 2000
prometheus_http_request_duration_seconds_count{handler="/"} 1423
# Hrandom comment starting with prefix of HELP
#
wind_speed{A="2",c="3"} 12345
@ -50,7 +57,8 @@ some:aggregate:rate5m{a_b="c"} 1
go_goroutines 33 123123
_metric_starting_with_underscore 1
testmetric{_label_starting_with_underscore="foo"} 1
testmetric{label="\"bar\""} 1`
testmetric{label="\"bar\""} 1
testmetric{le="10"} 1`
input += "\n# HELP metric foo\x00bar"
input += "\nnull_byte_metric{a=\"abc\x00\"} 1"
@ -64,7 +72,7 @@ testmetric{label="\"bar\""} 1`
}, {
m: `go_gc_duration_seconds{quantile="0"}`,
v: 4.9351e-05,
lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0"),
lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.0"),
}, {
m: `go_gc_duration_seconds{quantile="0.25",}`,
v: 7.424100000000001e-05,
@ -81,6 +89,32 @@ testmetric{label="\"bar\""} 1`
m: `go_gc_duration_seconds{ quantile="0.9", a="b"}`,
v: 8.3835e-05,
lset: labels.FromStrings("__name__", "go_gc_duration_seconds", "quantile", "0.9", "a", "b"),
}, {
m: "prometheus_http_request_duration_seconds",
help: "Histogram of latencies for HTTP requests.",
}, {
m: "prometheus_http_request_duration_seconds",
typ: model.MetricTypeHistogram,
}, {
m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="1"}`,
v: 423,
lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "1.0"),
}, {
m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="2"}`,
v: 1423,
lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "2.0"),
}, {
m: `prometheus_http_request_duration_seconds_bucket{handler="/",le="+Inf"}`,
v: 1423,
lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_bucket", "handler", "/", "le", "+Inf"),
}, {
m: `prometheus_http_request_duration_seconds_sum{handler="/"}`,
v: 2000,
lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_sum", "handler", "/"),
}, {
m: `prometheus_http_request_duration_seconds_count{handler="/"}`,
v: 1423,
lset: labels.FromStrings("__name__", "prometheus_http_request_duration_seconds_count", "handler", "/"),
}, {
comment: "# Hrandom comment starting with prefix of HELP",
}, {
@ -151,6 +185,10 @@ testmetric{label="\"bar\""} 1`
m: "testmetric{label=\"\\\"bar\\\"\"}",
v: 1,
lset: labels.FromStrings("__name__", "testmetric", "label", `"bar"`),
}, {
m: `testmetric{le="10"}`,
v: 1,
lset: labels.FromStrings("__name__", "testmetric", "le", "10"),
}, {
m: "metric",
help: "foo\x00bar",
@ -197,7 +235,7 @@ func TestUTF8PromParse(t *testing.T) {
}, {
m: `{"go.gc_duration_seconds",quantile="0"}`,
v: 4.9351e-05,
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0"),
lset: labels.FromStrings("__name__", "go.gc_duration_seconds", "quantile", "0.0"),
}, {
m: `{"go.gc_duration_seconds",quantile="0.25",}`,
v: 7.424100000000001e-05,