Allow to tune the scrape tolerance (#9283)
* Allow to tune the scrape tolerance In most of the classic monitoring use cases, a few milliseconds difference can be omitted. In Prometheus, a few millisecond difference can however make a big difference. Currently, Prometheus will ignore up to 2 ms difference in the alignments. It turns out that for users who can afford a 10ms difference, there is a lot of resources and disk space to win, as shown in this graph, which shows the bytes / samples over a production Prometheus server. You can clearly see the switch from 2ms to 10ms tolerance. This pull request enables the adjustment of the scrape timestamp alignment tolerance. Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu> * Fix golint Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
This commit is contained in:
parent
2327236bb5
commit
48a101be1b
|
@ -295,9 +295,12 @@ func main() {
|
|||
a.Flag("rules.alert.resend-delay", "Minimum amount of time to wait before resending an alert to Alertmanager.").
|
||||
Default("1m").SetValue(&cfg.resendDelay)
|
||||
|
||||
a.Flag("scrape.adjust-timestamps", "Adjust scrape timestamps by up to 2ms to align them to the intended schedule. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release.").
|
||||
a.Flag("scrape.adjust-timestamps", "Adjust scrape timestamps by up to `scrape.timestamp-tolerance` to align them to the intended schedule. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release.").
|
||||
Hidden().Default("true").BoolVar(&scrape.AlignScrapeTimestamps)
|
||||
|
||||
a.Flag("scrape.timestamp-tolerance", "Timestamp tolerance. See https://github.com/prometheus/prometheus/issues/7846 for more context. Experimental. This flag will be removed in a future release.").
|
||||
Hidden().Default("2ms").DurationVar(&scrape.ScrapeTimestampTolerance)
|
||||
|
||||
a.Flag("alertmanager.notification-queue-capacity", "The capacity of the queue for pending Alertmanager notifications.").
|
||||
Default("10000").IntVar(&cfg.notifier.QueueCapacity)
|
||||
|
||||
|
|
|
@ -49,10 +49,10 @@ import (
|
|||
"github.com/prometheus/prometheus/storage"
|
||||
)
|
||||
|
||||
// Temporary tolerance for scrape appends timestamps alignment, to enable better
|
||||
// compression at the TSDB level.
|
||||
// ScrapeTimestampTolerance is the tolerance for scrape appends timestamps
|
||||
// alignment, to enable better compression at the TSDB level.
|
||||
// See https://github.com/prometheus/prometheus/issues/7846
|
||||
const scrapeTimestampTolerance = 2 * time.Millisecond
|
||||
var ScrapeTimestampTolerance = 2 * time.Millisecond
|
||||
|
||||
// AlignScrapeTimestamps enables the tolerance for scrape appends timestamps described above.
|
||||
var AlignScrapeTimestamps = true
|
||||
|
@ -1159,14 +1159,14 @@ mainLoop:
|
|||
// Calling Round ensures the time used is the wall clock, as otherwise .Sub
|
||||
// and .Add on time.Time behave differently (see time package docs).
|
||||
scrapeTime := time.Now().Round(0)
|
||||
if AlignScrapeTimestamps && sl.interval > 100*scrapeTimestampTolerance {
|
||||
if AlignScrapeTimestamps && sl.interval > 100*ScrapeTimestampTolerance {
|
||||
// For some reason, a tick might have been skipped, in which case we
|
||||
// would call alignedScrapeTime.Add(interval) multiple times.
|
||||
for scrapeTime.Sub(alignedScrapeTime) >= sl.interval {
|
||||
alignedScrapeTime = alignedScrapeTime.Add(sl.interval)
|
||||
}
|
||||
// Align the scrape time if we are in the tolerance boundaries.
|
||||
if scrapeTime.Sub(alignedScrapeTime) <= scrapeTimestampTolerance {
|
||||
if scrapeTime.Sub(alignedScrapeTime) <= ScrapeTimestampTolerance {
|
||||
scrapeTime = alignedScrapeTime
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue