diff --git a/AUTHORS.md b/AUTHORS.md index 84944206..bc6c4c16 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -16,6 +16,7 @@ The following individuals have contributed code to this repository * Björn Rabenstein * Brian Brazil * Daniel Speichert +* Ed Schouten * Eric Ripa * Fabian Reinartz * Franklin Wise diff --git a/README.md b/README.md index f20fc720..ee806e1a 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ Name | Description | OS ---------|-------------|---- bonding | Exposes the number of configured and active slaves of Linux bonding interfaces. | Linux devstat | Exposes device statistics | Dragonfly, FreeBSD +drbd | Exposes Distributed Replicated Block Device statistics | Linux gmond | Exposes statistics from Ganglia. | _any_ interrupts | Exposes detailed interrupts statistics. | Linux, OpenBSD ipvs | Exposes IPVS status from `/proc/net/ip_vs` and stats from `/proc/net/ip_vs_stats`. | Linux diff --git a/collector/drbd_linux.go b/collector/drbd_linux.go new file mode 100644 index 00000000..d5f4cd5e --- /dev/null +++ b/collector/drbd_linux.go @@ -0,0 +1,213 @@ +// Copyright 2016 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "bufio" + "fmt" + "os" + "strconv" + "strings" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/common/log" +) + +// Numerical metric provided by /proc/drbd. +type drbdNumericalMetric struct { + desc *prometheus.Desc + valueType prometheus.ValueType + multiplier float64 +} + +func newDRBDNumericalMetric(name string, desc string, valueType prometheus.ValueType, multiplier float64) drbdNumericalMetric { + return drbdNumericalMetric{ + desc: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "drbd", name), + desc, + []string{"device"}, nil), + valueType: valueType, + multiplier: multiplier, + } +} + +// String pair metric provided by /proc/drbd. +type drbdStringPairMetric struct { + desc *prometheus.Desc + valueOkay string +} + +func (metric *drbdStringPairMetric) isOkay(value string) float64 { + if value == metric.valueOkay { + return 1 + } + return 0 +} + +func newDRBDStringPairMetric(name string, desc string, valueOkay string) drbdStringPairMetric { + return drbdStringPairMetric{ + desc: prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "drbd", name), + desc, + []string{"device", "node"}, nil), + valueOkay: valueOkay, + } +} + +var ( + drbdNumericalMetrics = map[string]drbdNumericalMetric{ + "ns": newDRBDNumericalMetric( + "network_sent_bytes_total", + "Total number of bytes sent via the network.", + prometheus.CounterValue, + 1024), + "nr": newDRBDNumericalMetric( + "network_received_bytes_total", + "Total number of bytes received via the network.", + prometheus.CounterValue, + 1), + "dw": newDRBDNumericalMetric( + "disk_written_bytes_total", + "Net data written on local hard disk; in bytes.", + prometheus.CounterValue, + 1024), + "dr": newDRBDNumericalMetric( + "disk_read_bytes_total", + "Net data read from local hard disk; in bytes.", + prometheus.CounterValue, + 1024), + "al": newDRBDNumericalMetric( + "activitylog_writes_total", + "Number of updates of the activity log area of the meta data.", + prometheus.CounterValue, + 1), + "bm": newDRBDNumericalMetric( + "bitmap_writes_total", + "Number of updates of the bitmap area of the meta data.", + prometheus.CounterValue, + 1), + "lo": newDRBDNumericalMetric( + "local_pending", + "Number of open requests to the local I/O sub-system.", + prometheus.GaugeValue, + 1), + "pe": newDRBDNumericalMetric( + "remote_pending", + "Number of requests sent to the peer, but that have not yet been answered by the latter.", + prometheus.GaugeValue, + 1), + "ua": newDRBDNumericalMetric( + "remote_unacknowledged", + "Number of requests received by the peer via the network connection, but that have not yet been answered.", + prometheus.GaugeValue, + 1), + "ap": newDRBDNumericalMetric( + "application_pending", + "Number of block I/O requests forwarded to DRBD, but not yet answered by DRBD.", + prometheus.GaugeValue, + 1), + "ep": newDRBDNumericalMetric( + "epochs", + "Number of Epochs currently on the fly.", + prometheus.GaugeValue, + 1), + "oos": newDRBDNumericalMetric( + "out_of_sync_bytes", + "Amount of data known to be out of sync; in bytes.", + prometheus.GaugeValue, + 1024), + } + drbdStringPairMetrics = map[string]drbdStringPairMetric{ + "ro": newDRBDStringPairMetric( + "node_role_is_primary", + "Whether the role of the node is in the primary state.", + "Primary"), + "ds": newDRBDStringPairMetric( + "disk_state_is_up_to_date", + "Whether the disk of the node is up to date.", + "UpToDate"), + } + + drbdConnected = prometheus.NewDesc( + prometheus.BuildFQName(Namespace, "drbd", "connected"), + "Whether DRBD is connected to the peer.", + []string{"device"}, nil) +) + +type drbdCollector struct{} + +func init() { + Factories["drbd"] = newDRBDCollector +} + +func newDRBDCollector() (Collector, error) { + return &drbdCollector{}, nil +} + +func (c *drbdCollector) Update(ch chan<- prometheus.Metric) (err error) { + statsFile := procFilePath("drbd") + file, err := os.Open(statsFile) + if err != nil { + if os.IsNotExist(err) { + log.Debugf("Not collecting DRBD statistics, as %s does not exist: %s", statsFile, err) + return nil + } + return err + } + defer file.Close() + + scanner := bufio.NewScanner(file) + scanner.Split(bufio.ScanWords) + device := "unknown" + for scanner.Scan() { + field := scanner.Text() + if kv := strings.Split(field, ":"); len(kv) == 2 { + if id, err := strconv.ParseUint(kv[0], 10, 64); err == nil && kv[1] == "" { + device = fmt.Sprintf("drbd%d", id) + } else if metric, ok := drbdNumericalMetrics[kv[0]]; ok { + // Numerical value. + value, err := strconv.ParseFloat(kv[1], 64) + if err != nil { + return err + } + ch <- prometheus.MustNewConstMetric( + metric.desc, metric.valueType, + value*metric.multiplier, device) + } else if metric, ok := drbdStringPairMetrics[kv[0]]; ok { + // String pair value. + values := strings.Split(kv[1], "/") + ch <- prometheus.MustNewConstMetric( + metric.desc, prometheus.GaugeValue, + metric.isOkay(values[0]), device, "local") + ch <- prometheus.MustNewConstMetric( + metric.desc, prometheus.GaugeValue, + metric.isOkay(values[1]), device, "remote") + } else if kv[0] == "cs" { + // Connection state. + var connected float64 + if kv[1] == "Connected" { + connected = 1 + } + ch <- prometheus.MustNewConstMetric( + drbdConnected, prometheus.GaugeValue, + connected, device) + } else { + log.Debugf("Don't know how to process key-value pair [%s: %q]", kv[0], kv[1]) + } + } else { + log.Debugf("Don't know how to process string %q", field) + } + } + return scanner.Err() +} diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index c2c8560e..9276aad6 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -375,6 +375,53 @@ node_disk_writes_merged{device="nvme0n1"} 43950 node_disk_writes_merged{device="sda"} 1.1134226e+07 node_disk_writes_merged{device="sr0"} 0 node_disk_writes_merged{device="vda"} 2.0711856e+07 +# HELP node_drbd_activitylog_writes_total Number of updates of the activity log area of the meta data. +# TYPE node_drbd_activitylog_writes_total counter +node_drbd_activitylog_writes_total{device="drbd1"} 1100 +# HELP node_drbd_application_pending Number of block I/O requests forwarded to DRBD, but not yet answered by DRBD. +# TYPE node_drbd_application_pending gauge +node_drbd_application_pending{device="drbd1"} 12348 +# HELP node_drbd_bitmap_writes_total Number of updates of the bitmap area of the meta data. +# TYPE node_drbd_bitmap_writes_total counter +node_drbd_bitmap_writes_total{device="drbd1"} 221 +# HELP node_drbd_connected Whether DRBD is connected to the peer. +# TYPE node_drbd_connected gauge +node_drbd_connected{device="drbd1"} 1 +# HELP node_drbd_disk_read_bytes_total Net data read from local hard disk; in bytes. +# TYPE node_drbd_disk_read_bytes_total counter +node_drbd_disk_read_bytes_total{device="drbd1"} 1.2154539008e+11 +# HELP node_drbd_disk_state_is_up_to_date Whether the disk of the node is up to date. +# TYPE node_drbd_disk_state_is_up_to_date gauge +node_drbd_disk_state_is_up_to_date{device="drbd1",node="local"} 1 +node_drbd_disk_state_is_up_to_date{device="drbd1",node="remote"} 1 +# HELP node_drbd_disk_written_bytes_total Net data written on local hard disk; in bytes. +# TYPE node_drbd_disk_written_bytes_total counter +node_drbd_disk_written_bytes_total{device="drbd1"} 2.8941845504e+10 +# HELP node_drbd_epochs Number of Epochs currently on the fly. +# TYPE node_drbd_epochs gauge +node_drbd_epochs{device="drbd1"} 1 +# HELP node_drbd_local_pending Number of open requests to the local I/O sub-system. +# TYPE node_drbd_local_pending gauge +node_drbd_local_pending{device="drbd1"} 12345 +# HELP node_drbd_network_received_bytes_total Total number of bytes received via the network. +# TYPE node_drbd_network_received_bytes_total counter +node_drbd_network_received_bytes_total{device="drbd1"} 1.0961011e+07 +# HELP node_drbd_network_sent_bytes_total Total number of bytes sent via the network. +# TYPE node_drbd_network_sent_bytes_total counter +node_drbd_network_sent_bytes_total{device="drbd1"} 1.7740228608e+10 +# HELP node_drbd_node_role_is_primary Whether the role of the node is in the primary state. +# TYPE node_drbd_node_role_is_primary gauge +node_drbd_node_role_is_primary{device="drbd1",node="local"} 1 +node_drbd_node_role_is_primary{device="drbd1",node="remote"} 1 +# HELP node_drbd_out_of_sync_bytes Amount of data known to be out of sync; in bytes. +# TYPE node_drbd_out_of_sync_bytes gauge +node_drbd_out_of_sync_bytes{device="drbd1"} 1.2645376e+07 +# HELP node_drbd_remote_pending Number of requests sent to the peer, but that have not yet been answered by the latter. +# TYPE node_drbd_remote_pending gauge +node_drbd_remote_pending{device="drbd1"} 12346 +# HELP node_drbd_remote_unacknowledged Number of requests received by the peer via the network connection, but that have not yet been answered. +# TYPE node_drbd_remote_unacknowledged gauge +node_drbd_remote_unacknowledged{device="drbd1"} 12347 # HELP node_entropy_available_bits Bits of available entropy. # TYPE node_entropy_available_bits gauge node_entropy_available_bits 1337 diff --git a/collector/fixtures/proc/drbd b/collector/fixtures/proc/drbd new file mode 100644 index 00000000..77d16440 --- /dev/null +++ b/collector/fixtures/proc/drbd @@ -0,0 +1,5 @@ +version: 8.4.3 (api:1/proto:86-101) +srcversion: 1A9F77B1CA5FF92235C2213 + + 1: cs:Connected ro:Primary/Primary ds:UpToDate/UpToDate C r----- + ns:17324442 nr:10961011 dw:28263521 dr:118696670 al:1100 bm:221 lo:12345 pe:12346 ua:12347 ap:12348 ep:1 wo:d oos:12349 diff --git a/end-to-end-test.sh b/end-to-end-test.sh index 3faf104f..37e72b8a 100755 --- a/end-to-end-test.sh +++ b/end-to-end-test.sh @@ -5,6 +5,7 @@ set -euf -o pipefail collectors=$(cat << COLLECTORS conntrack diskstats + drbd entropy filefd hwmon