feature: add native histogram support to latency metrics (#3737)

Note that this does not stop showing classic metrics, for now
it is up to the scrape config to decide whether to keep those instead or
both.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
This commit is contained in:
George Krajcsovits 2024-02-29 15:53:47 +01:00 committed by GitHub
parent d1fe4b7f6f
commit d85bef20d9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 38 additions and 17 deletions

View File

@ -70,9 +70,13 @@ func NewChannel(
ConstLabels: prometheus.Labels{"key": key},
})
oversizeGossipDuration := prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "alertmanager_oversize_gossip_message_duration_seconds",
Help: "Duration of oversized gossip message requests.",
ConstLabels: prometheus.Labels{"key": key},
Name: "alertmanager_oversize_gossip_message_duration_seconds",
Help: "Duration of oversized gossip message requests.",
ConstLabels: prometheus.Labels{"key": key},
Buckets: prometheus.DefBuckets,
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
})
reg.MustRegister(oversizeGossipDuration, oversizeGossipMessageFailureTotal, oversizeGossipMessageDroppedTotal, oversizeGossipMessageSentTotal)

View File

@ -104,9 +104,12 @@ func newDelegate(l log.Logger, reg prometheus.Registerer, p *Peer, retransmit in
}, []string{"peer"},
)
nodePingDuration := prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "alertmanager_cluster_pings_seconds",
Help: "Histogram of latencies for ping messages.",
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5},
Name: "alertmanager_cluster_pings_seconds",
Help: "Histogram of latencies for ping messages.",
Buckets: []float64{.005, .01, .025, .05, .1, .25, .5},
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
}, []string{"peer"},
)

View File

@ -64,9 +64,12 @@ import (
var (
requestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "alertmanager_http_request_duration_seconds",
Help: "Histogram of latencies for HTTP requests.",
Buckets: []float64{.05, 0.1, .25, .5, .75, 1, 2, 5, 20, 60},
Name: "alertmanager_http_request_duration_seconds",
Help: "Histogram of latencies for HTTP requests.",
Buckets: []float64{.05, 0.1, .25, .5, .75, 1, 2, 5, 20, 60},
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
},
[]string{"handler", "method"},
)

View File

@ -139,8 +139,12 @@ func newMetrics(r prometheus.Registerer) *metrics {
Help: "Number notification log received queries that failed.",
})
m.queryDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "alertmanager_nflog_query_duration_seconds",
Help: "Duration of notification log query evaluation.",
Name: "alertmanager_nflog_query_duration_seconds",
Help: "Duration of notification log query evaluation.",
Buckets: prometheus.DefBuckets,
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
})
m.propagatedMessagesTotal = prometheus.NewCounter(prometheus.CounterOpts{
Name: "alertmanager_nflog_gossip_messages_propagated_total",

View File

@ -291,10 +291,13 @@ func NewMetrics(r prometheus.Registerer, ff featurecontrol.Flagger) *Metrics {
Help: "The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.",
}, []string{"reason"}),
notificationLatencySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "alertmanager",
Name: "notification_latency_seconds",
Help: "The latency of notifications in seconds.",
Buckets: []float64{1, 5, 10, 15, 20},
Namespace: "alertmanager",
Name: "notification_latency_seconds",
Help: "The latency of notifications in seconds.",
Buckets: []float64{1, 5, 10, 15, 20},
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
}, labels),
ff: ff,
}

View File

@ -271,8 +271,12 @@ func newMetrics(r prometheus.Registerer, s *Silences) *metrics {
Help: "How many silence received queries did not succeed.",
})
m.queryDuration = prometheus.NewHistogram(prometheus.HistogramOpts{
Name: "alertmanager_silences_query_duration_seconds",
Help: "Duration of silence query evaluation.",
Name: "alertmanager_silences_query_duration_seconds",
Help: "Duration of silence query evaluation.",
Buckets: prometheus.DefBuckets,
NativeHistogramBucketFactor: 1.1,
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: 1 * time.Hour,
})
m.propagatedMessagesTotal = prometheus.NewCounter(prometheus.CounterOpts{
Name: "alertmanager_silences_gossip_messages_propagated_total",