From 48a101be1b59dce6f9285b40d686be24f7793473 Mon Sep 17 00:00:00 2001 From: Julien Pivotto Date: Wed, 8 Sep 2021 13:57:33 +0200 Subject: [PATCH] Allow to tune the scrape tolerance (#9283) * Allow to tune the scrape tolerance In most of the classic monitoring use cases, a few milliseconds difference can be omitted. In Prometheus, a few millisecond difference can however make a big difference. Currently, Prometheus will ignore up to 2 ms difference in the alignments. It turns out that for users who can afford a 10ms difference, there is a lot of resources and disk space to win, as shown in this graph, which shows the bytes / samples over a production Prometheus server. You can clearly see the switch from 2ms to 10ms tolerance. This pull request enables the adjustment of the scrape timestamp alignment tolerance. Signed-off-by: Julien Pivotto * Fix golint Signed-off-by: Julien Pivotto --- cmd/prometheus/main.go | 5 ++++- scrape/scrape.go | 10 +++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/cmd/prometheus/main.go b/cmd/prometheus/main.go index 9251765cf..6eb850f38 100644 --- a/cmd/prometheus/main.go +++ b/cmd/prometheus/main.go @@ -295,9 +295,12 @@ func main() { a.Flag("rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager."). Default("1m").SetValue(&cfg.resendDelay) - a.Flag("scrape.adjust-timestamps", "Adjust scrape timestamps by up to 2ms to align them to the intended schedule. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release."). + a.Flag("scrape.adjust-timestamps", "Adjust scrape timestamps by up to `scrape.timestamp-tolerance` to align them to the intended schedule. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release."). Hidden().Default("true").BoolVar(&scrape.AlignScrapeTimestamps) + a.Flag("scrape.timestamp-tolerance", "Timestamp tolerance. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release."). + Hidden().Default("2ms").DurationVar(&scrape.ScrapeTimestampTolerance) + a.Flag("alertmanager.notification-queue-capacity", "The capacity of the queue for pending Alertmanager notifications."). Default("10000").IntVar(&cfg.notifier.QueueCapacity) diff --git a/scrape/scrape.go b/scrape/scrape.go index ad0d632a3..9ae9932c6 100644 --- a/scrape/scrape.go +++ b/scrape/scrape.go @@ -49,10 +49,10 @@ import ( "github.com/prometheus/prometheus/storage" ) -// Temporary tolerance for scrape appends timestamps alignment, to enable better -// compression at the TSDB level. +// ScrapeTimestampTolerance is the tolerance for scrape appends timestamps +// alignment, to enable better compression at the TSDB level. // See https://github.com/prometheus/prometheus/issues/7846 -const scrapeTimestampTolerance = 2 * time.Millisecond +var ScrapeTimestampTolerance = 2 * time.Millisecond // AlignScrapeTimestamps enables the tolerance for scrape appends timestamps described above. var AlignScrapeTimestamps = true @@ -1159,14 +1159,14 @@ mainLoop: // Calling Round ensures the time used is the wall clock, as otherwise .Sub // and .Add on time.Time behave differently (see time package docs). scrapeTime := time.Now().Round(0) - if AlignScrapeTimestamps && sl.interval > 100*scrapeTimestampTolerance { + if AlignScrapeTimestamps && sl.interval > 100*ScrapeTimestampTolerance { // For some reason, a tick might have been skipped, in which case we // would call alignedScrapeTime.Add(interval) multiple times. for scrapeTime.Sub(alignedScrapeTime) >= sl.interval { alignedScrapeTime = alignedScrapeTime.Add(sl.interval) } // Align the scrape time if we are in the tolerance boundaries. - if scrapeTime.Sub(alignedScrapeTime) <= scrapeTimestampTolerance { + if scrapeTime.Sub(alignedScrapeTime) <= ScrapeTimestampTolerance { scrapeTime = alignedScrapeTime } }