// Copyright 2015 The Prometheus Authors // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // +build !nontp package collector import ( "fmt" "net" "sync" "time" "github.com/beevik/ntp" "github.com/go-kit/kit/log" "github.com/prometheus/client_golang/prometheus" "gopkg.in/alecthomas/kingpin.v2" ) const ( hour24 = 24 * time.Hour // `time` does not export `Day` as Day != 24h because of DST ntpSubsystem = "ntp" ) var ( ntpServer = kingpin.Flag("collector.ntp.server", "NTP server to use for ntp collector").Default("127.0.0.1").String() ntpProtocolVersion = kingpin.Flag("collector.ntp.protocol-version", "NTP protocol version").Default("4").Int() ntpServerIsLocal = kingpin.Flag("collector.ntp.server-is-local", "Certify that collector.ntp.server address is not a public ntp server").Default("false").Bool() ntpIPTTL = kingpin.Flag("collector.ntp.ip-ttl", "IP TTL to use while sending NTP query").Default("1").Int() // 3.46608s ~ 1.5s + PHI * (1 << maxPoll), where 1.5s is MAXDIST from ntp.org, it is 1.0 in RFC5905 // max-distance option is used as-is without phi*(1< 4 { return nil, fmt.Errorf("invalid NTP protocol version %d; must be 2, 3, or 4", *ntpProtocolVersion) } if *ntpOffsetTolerance < 0 { return nil, fmt.Errorf("offset tolerance must be non-negative") } return &ntpCollector{ stratum: typedDesc{prometheus.NewDesc( prometheus.BuildFQName(namespace, ntpSubsystem, "stratum"), "NTPD stratum.", nil, nil, ), prometheus.GaugeValue}, leap: typedDesc{prometheus.NewDesc( prometheus.BuildFQName(namespace, ntpSubsystem, "leap"), "NTPD leap second indicator, 2 bits.", nil, nil, ), prometheus.GaugeValue}, rtt: typedDesc{prometheus.NewDesc( prometheus.BuildFQName(namespace, ntpSubsystem, "rtt_seconds"), "RTT to NTPD.", nil, nil, ), prometheus.GaugeValue}, offset: typedDesc{prometheus.NewDesc( prometheus.BuildFQName(namespace, ntpSubsystem, "offset_seconds"), "ClockOffset between NTP and local clock.", nil, nil, ), prometheus.GaugeValue}, reftime: typedDesc{prometheus.NewDesc( prometheus.BuildFQName(namespace, ntpSubsystem, "reference_timestamp_seconds"), "NTPD ReferenceTime, UNIX timestamp.", nil, nil, ), prometheus.GaugeValue}, rootDelay: typedDesc{prometheus.NewDesc( prometheus.BuildFQName(namespace, ntpSubsystem, "root_delay_seconds"), "NTPD RootDelay.", nil, nil, ), prometheus.GaugeValue}, rootDispersion: typedDesc{prometheus.NewDesc( prometheus.BuildFQName(namespace, ntpSubsystem, "root_dispersion_seconds"), "NTPD RootDispersion.", nil, nil, ), prometheus.GaugeValue}, sanity: typedDesc{prometheus.NewDesc( prometheus.BuildFQName(namespace, ntpSubsystem, "sanity"), "NTPD sanity according to RFC5905 heuristics and configured limits.", nil, nil, ), prometheus.GaugeValue}, logger: logger, }, nil } func (c *ntpCollector) Update(ch chan<- prometheus.Metric) error { resp, err := ntp.QueryWithOptions(*ntpServer, ntp.QueryOptions{ Version: *ntpProtocolVersion, TTL: *ntpIPTTL, Timeout: time.Second, // default `ntpdate` timeout }) if err != nil { return fmt.Errorf("couldn't get SNTP reply: %w", err) } ch <- c.stratum.mustNewConstMetric(float64(resp.Stratum)) ch <- c.leap.mustNewConstMetric(float64(resp.Leap)) ch <- c.rtt.mustNewConstMetric(resp.RTT.Seconds()) ch <- c.offset.mustNewConstMetric(resp.ClockOffset.Seconds()) if resp.ReferenceTime.Unix() > 0 { // Go Zero is 0001-01-01 00:00:00 UTC // NTP Zero is 1900-01-01 00:00:00 UTC // UNIX Zero is 1970-01-01 00:00:00 UTC // so let's keep ALL ancient `reftime` values as zero ch <- c.reftime.mustNewConstMetric(float64(resp.ReferenceTime.UnixNano()) / 1e9) } else { ch <- c.reftime.mustNewConstMetric(0) } ch <- c.rootDelay.mustNewConstMetric(resp.RootDelay.Seconds()) ch <- c.rootDispersion.mustNewConstMetric(resp.RootDispersion.Seconds()) // Here is SNTP packet sanity check that is exposed to move burden of // configuration from node_exporter user to the developer. maxerr := *ntpOffsetTolerance leapMidnightMutex.Lock() if resp.Leap == ntp.LeapAddSecond || resp.Leap == ntp.LeapDelSecond { // state of leapMidnight is cached as leap flag is dropped right after midnight leapMidnight = resp.Time.Truncate(hour24).Add(hour24) } if leapMidnight.Add(-hour24).Before(resp.Time) && resp.Time.Before(leapMidnight.Add(hour24)) { // tolerate leap smearing maxerr += time.Second } leapMidnightMutex.Unlock() if resp.Validate() == nil && resp.RootDistance <= *ntpMaxDistance && resp.MinError <= maxerr { ch <- c.sanity.mustNewConstMetric(1) } else { ch <- c.sanity.mustNewConstMetric(0) } return nil }