From b50c5d42feb66dfb74d70fcdc0eac9d1a15006e2 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 21 Aug 2024 15:22:38 +0200 Subject: [PATCH] OTLP receiver: Warn when encountering invalid exponential histograms Signed-off-by: Arve Knudsen --- CHANGELOG.md | 1 + .../prometheusremotewrite/histograms.go | 21 ++++++---- .../prometheusremotewrite/metrics_to_prw.go | 11 ++++-- .../metrics_to_prw_test.go | 39 ++++++++++++++++++- storage/remote/write_handler.go | 9 ++++- 5 files changed, 67 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0036a4a76..6fa8410c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## unreleased * [FEATURE] OTLP receiver: Add new option `otlp.promote_resource_attributes`, for any OTel resource attributes that should be promoted to metric labels. #14200 +* [ENHANCEMENT] OTLP receiver: Warn when encountering exponential histograms with zero count and non-zero sum. #14706 * [BUGFIX] tsdb/wlog.Watcher.readSegmentForGC: Only count unknown record types against record_decode_failures_total metric. #14042 ## 2.54.0-rc.1 / 2024-08-05 diff --git a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go index 73528019d..ec93387fc 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/histograms.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/histograms.go @@ -26,6 +26,7 @@ import ( "github.com/prometheus/prometheus/model/value" "github.com/prometheus/prometheus/prompb" + "github.com/prometheus/prometheus/util/annotations" ) const defaultZeroThreshold = 1e-128 @@ -33,13 +34,15 @@ const defaultZeroThreshold = 1e-128 // addExponentialHistogramDataPoints adds OTel exponential histogram data points to the corresponding time series // as native histogram samples. func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetric.ExponentialHistogramDataPointSlice, - resource pcommon.Resource, settings Settings, promName string) error { + resource pcommon.Resource, settings Settings, promName string) (annotations.Annotations, error) { + var annots annotations.Annotations for x := 0; x < dataPoints.Len(); x++ { pt := dataPoints.At(x) - histogram, err := exponentialToNativeHistogram(pt) + histogram, ws, err := exponentialToNativeHistogram(pt) + annots.Merge(ws) if err != nil { - return err + return annots, err } lbls := createAttributes( @@ -58,15 +61,16 @@ func (c *PrometheusConverter) addExponentialHistogramDataPoints(dataPoints pmetr ts.Exemplars = append(ts.Exemplars, exemplars...) } - return nil + return annots, nil } // exponentialToNativeHistogram translates OTel Exponential Histogram data point // to Prometheus Native Histogram. -func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, error) { +func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prompb.Histogram, annotations.Annotations, error) { + var annots annotations.Annotations scale := p.Scale() if scale < -4 { - return prompb.Histogram{}, + return prompb.Histogram{}, annots, fmt.Errorf("cannot convert exponential to native histogram."+ " Scale must be >= -4, was %d", scale) } @@ -114,8 +118,11 @@ func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prom h.Sum = p.Sum() } h.Count = &prompb.Histogram_CountInt{CountInt: p.Count()} + if p.Count() == 0 && h.Sum != 0 { + annots.Add(fmt.Errorf("exponential histogram data point has zero count, but non-zero sum: %f", h.Sum)) + } } - return h, nil + return h, annots, nil } // convertBucketsLayout translates OTel Exponential Histogram dense buckets diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go index a3a789723..9d7680080 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw.go @@ -27,6 +27,7 @@ import ( "github.com/prometheus/prometheus/prompb" prometheustranslator "github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheus" + "github.com/prometheus/prometheus/util/annotations" ) type Settings struct { @@ -53,7 +54,7 @@ func NewPrometheusConverter() *PrometheusConverter { } // FromMetrics converts pmetric.Metrics to Prometheus remote write format. -func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (errs error) { +func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) (annots annotations.Annotations, errs error) { resourceMetricsSlice := md.ResourceMetrics() for i := 0; i < resourceMetricsSlice.Len(); i++ { resourceMetrics := resourceMetricsSlice.At(i) @@ -107,12 +108,14 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) errs = multierr.Append(errs, fmt.Errorf("empty data points. %s is dropped", metric.Name())) break } - errs = multierr.Append(errs, c.addExponentialHistogramDataPoints( + ws, err := c.addExponentialHistogramDataPoints( dataPoints, resource, settings, promName, - )) + ) + annots.Merge(ws) + errs = multierr.Append(errs, err) case pmetric.MetricTypeSummary: dataPoints := metric.Summary().DataPoints() if dataPoints.Len() == 0 { @@ -128,7 +131,7 @@ func (c *PrometheusConverter) FromMetrics(md pmetric.Metrics, settings Settings) addResourceTargetInfo(resource, settings, mostRecentTimestamp, c) } - return + return annots, errs } func isSameMetric(ts *prompb.TimeSeries, lbls []prompb.Label) bool { diff --git a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go index 37ac67774..bdc1c9d0b 100644 --- a/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go +++ b/storage/remote/otlptranslator/prometheusremotewrite/metrics_to_prw_test.go @@ -27,6 +27,41 @@ import ( "go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp" ) +func TestFromMetrics(t *testing.T) { + t.Run("exponential histogram warnings for zero count and non-zero sum", func(t *testing.T) { + request := pmetricotlp.NewExportRequest() + rm := request.Metrics().ResourceMetrics().AppendEmpty() + generateAttributes(rm.Resource().Attributes(), "resource", 10) + + metrics := rm.ScopeMetrics().AppendEmpty().Metrics() + ts := pcommon.NewTimestampFromTime(time.Now()) + + for i := 1; i <= 10; i++ { + m := metrics.AppendEmpty() + m.SetEmptyExponentialHistogram() + m.SetName(fmt.Sprintf("histogram-%d", i)) + m.ExponentialHistogram().SetAggregationTemporality(pmetric.AggregationTemporalityCumulative) + h := m.ExponentialHistogram().DataPoints().AppendEmpty() + h.SetTimestamp(ts) + + h.SetCount(0) + h.SetSum(155) + + generateAttributes(h.Attributes(), "series", 10) + } + + converter := NewPrometheusConverter() + annots, err := converter.FromMetrics(request.Metrics(), Settings{}) + require.NoError(t, err) + require.NotEmpty(t, annots) + ws, infos := annots.AsStrings("", 0, 0) + require.Empty(t, infos) + require.Equal(t, []string{ + "exponential histogram data point has zero count, but non-zero sum: 155.000000", + }, ws) + }) +} + func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) { for _, resourceAttributeCount := range []int{0, 5, 50} { b.Run(fmt.Sprintf("resource attribute count: %v", resourceAttributeCount), func(b *testing.B) { @@ -49,7 +84,9 @@ func BenchmarkPrometheusConverter_FromMetrics(b *testing.B) { for i := 0; i < b.N; i++ { converter := NewPrometheusConverter() - require.NoError(b, converter.FromMetrics(payload.Metrics(), Settings{})) + annots, err := converter.FromMetrics(payload.Metrics(), Settings{}) + require.NoError(b, err) + require.Empty(b, annots) require.NotNil(b, converter.TimeSeries()) } }) diff --git a/storage/remote/write_handler.go b/storage/remote/write_handler.go index aba79a561..8ac759046 100644 --- a/storage/remote/write_handler.go +++ b/storage/remote/write_handler.go @@ -502,12 +502,17 @@ func (h *otlpWriteHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { otlpCfg := h.configFunc().OTLPConfig converter := otlptranslator.NewPrometheusConverter() - if err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{ + annots, err := converter.FromMetrics(req.Metrics(), otlptranslator.Settings{ AddMetricSuffixes: true, PromoteResourceAttributes: otlpCfg.PromoteResourceAttributes, - }); err != nil { + }) + if err != nil { level.Warn(h.logger).Log("msg", "Error translating OTLP metrics to Prometheus write request", "err", err) } + ws, _ := annots.AsStrings("", 0, 0) + if len(ws) > 0 { + level.Warn(h.logger).Log("msg", "Warnings translating OTLP metrics to Prometheus write request", "warnings", ws) + } err = h.rwHandler.write(r.Context(), &prompb.WriteRequest{ Timeseries: converter.TimeSeries(),