Merge pull request #109 from cherti/mdadm
adding collector for linux-mdadm-software-raid
This commit is contained in:
commit
a10f5b8093
|
@ -34,6 +34,7 @@ netstat | Exposes network statistics from `/proc/net/netstat`. This is the same
|
|||
stat | Exposes various statistics from `/proc/stat`. This includes CPU usage, boot time, forks and interrupts.
|
||||
textfile | Exposes statistics read from local disk. The `--collector.textfile.directory` flag must be set.
|
||||
time | Exposes the current system time.
|
||||
mdadm | Exposes statistics about devices in `/proc/mdstat` (does nothing if no /proc/mdstat present)
|
||||
|
||||
|
||||
### Disabled by default
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5] [raid4] [raid10]
|
||||
md3 : active raid6 sda1[8] sdh1[7] sdg1[6] sdf1[5] sde1[11] sdd1[3] sdc1[10] sdb1[9]
|
||||
5853468288 blocks super 1.2 level 6, 64k chunk, algorithm 2 [8/8] [UUUUUUUU]
|
||||
|
||||
md127 : active raid1 sdi2[0] sdj2[1]
|
||||
312319552 blocks [2/2] [UU]
|
||||
|
||||
md0 : active raid1 sdi1[0] sdj1[1]
|
||||
248896 blocks [2/2] [UU]
|
||||
|
||||
md4 : inactive raid1 sda3[0] sdb3[1]
|
||||
4883648 blocks [2/2] [UU]
|
||||
|
||||
md6 : active raid1 sdb2[2] sda2[0]
|
||||
195310144 blocks [2/1] [U_]
|
||||
[=>...................] recovery = 8.5% (16775552/195310144) finish=17.0min speed=259783K/sec
|
||||
|
||||
md8 : active raid1 sdb1[1] sda1[0]
|
||||
195310144 blocks [2/2] [UU]
|
||||
[=>...................] resync = 8.5% (16775552/195310144) finish=17.0min speed=259783K/sec
|
||||
|
||||
md7 : active raid6 sdb1[0] sde1[3] sdd1[2] sdc1[1]
|
||||
7813735424 blocks super 1.2 level 6, 512k chunk, algorithm 2 [4/3] [U_UU]
|
||||
bitmap: 0/30 pages [0KB], 65536KB chunk
|
||||
|
||||
unused devices: <none>
|
|
@ -0,0 +1,279 @@
|
|||
// +build !nomdadm
|
||||
|
||||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/log"
|
||||
)
|
||||
|
||||
var (
|
||||
statusfile = "/proc/mdstat"
|
||||
statuslineRE = regexp.MustCompile(`(\d+) blocks .*\[(\d+)/(\d+)\] \[[U_]+\]`)
|
||||
buildlineRE = regexp.MustCompile(`\((\d+)/\d+\)`)
|
||||
)
|
||||
|
||||
type mdStatus struct {
|
||||
mdName string
|
||||
isActive bool
|
||||
disksActive int64
|
||||
disksTotal int64
|
||||
blocksTotal int64
|
||||
blocksSynced int64
|
||||
}
|
||||
|
||||
type mdadmCollector struct{}
|
||||
|
||||
func init() {
|
||||
Factories["mdadm"] = NewMdadmCollector
|
||||
}
|
||||
|
||||
func evalStatusline(statusline string) (active, total, size int64, err error) {
|
||||
matches := statuslineRE.FindStringSubmatch(statusline)
|
||||
|
||||
// +1 to make it more obvious that the whole string containing the info is also returned as matches[0].
|
||||
if len(matches) < 3+1 {
|
||||
return 0, 0, 0, fmt.Errorf("too few matches found in statusline: %s", statusline)
|
||||
} else {
|
||||
if len(matches) > 3+1 {
|
||||
return 0, 0, 0, fmt.Errorf("too many matches found in statusline: %s", statusline)
|
||||
}
|
||||
}
|
||||
|
||||
size, err = strconv.ParseInt(matches[1], 10, 64)
|
||||
if err != nil {
|
||||
return 0, 0, 0, fmt.Errorf("%s in statusline: %s", err, statusline)
|
||||
}
|
||||
|
||||
total, err = strconv.ParseInt(matches[2], 10, 64)
|
||||
if err != nil {
|
||||
return 0, 0, 0, fmt.Errorf("%s in statusline: %s", err, statusline)
|
||||
}
|
||||
active, err = strconv.ParseInt(matches[3], 10, 64)
|
||||
if err != nil {
|
||||
return 0, 0, 0, fmt.Errorf("%s in statusline: %s", err, statusline)
|
||||
}
|
||||
|
||||
return active, total, size, nil
|
||||
}
|
||||
|
||||
// Gets the size that has already been synced out of the sync-line.
|
||||
func evalBuildline(buildline string) (int64, error) {
|
||||
matches := buildlineRE.FindStringSubmatch(buildline)
|
||||
|
||||
// +1 to make it more obvious that the whole string containing the info is also returned as matches[0].
|
||||
if len(matches) < 1+1 {
|
||||
return 0, fmt.Errorf("too few matches found in buildline: %s", buildline)
|
||||
}
|
||||
|
||||
if len(matches) > 1+1 {
|
||||
return 0, fmt.Errorf("too many matches found in buildline: %s", buildline)
|
||||
}
|
||||
|
||||
syncedSize, err := strconv.ParseInt(matches[1], 10, 64)
|
||||
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("%s in buildline: %s", err, buildline)
|
||||
}
|
||||
|
||||
return syncedSize, nil
|
||||
}
|
||||
|
||||
// Parses an mdstat-file and returns a struct with the relevant infos.
|
||||
func parseMdstat(mdStatusFilePath string) ([]mdStatus, error) {
|
||||
content, err := ioutil.ReadFile(mdStatusFilePath)
|
||||
if err != nil {
|
||||
return []mdStatus{}, fmt.Errorf("error parsing %s: %s", statusfile, err)
|
||||
}
|
||||
|
||||
mdStatusFile := string(content)
|
||||
|
||||
lines := strings.Split(mdStatusFile, "\n")
|
||||
var currentMD string
|
||||
|
||||
// Each md has at least the deviceline, statusline and one empty line afterwards
|
||||
// so we will have probably something of the order len(lines)/3 devices
|
||||
// so we use that for preallocation.
|
||||
estimateMDs := len(lines) / 3
|
||||
mdStates := make([]mdStatus, 0, estimateMDs)
|
||||
|
||||
for i, l := range lines {
|
||||
if l == "" {
|
||||
// Skip entirely empty lines.
|
||||
continue
|
||||
}
|
||||
|
||||
if l[0] == ' ' {
|
||||
// Those lines are not the beginning of a md-section.
|
||||
continue
|
||||
}
|
||||
|
||||
if strings.HasPrefix(l, "Personalities") || strings.HasPrefix(l, "unused") {
|
||||
// We aren't interested in lines with general info.
|
||||
continue
|
||||
}
|
||||
|
||||
mainLine := strings.Split(l, " ")
|
||||
if len(mainLine) < 3 {
|
||||
return mdStates, fmt.Errorf("error parsing mdline: %s", l)
|
||||
}
|
||||
currentMD = mainLine[0] // name of md-device
|
||||
isActive := (mainLine[2] == "active") // activity status of said md-device
|
||||
|
||||
if len(lines) <= i+3 {
|
||||
return mdStates, fmt.Errorf("error parsing %s: entry for %s has fewer lines than expected", statusfile, currentMD)
|
||||
}
|
||||
|
||||
active, total, size, err := evalStatusline(lines[i+1]) // parse statusline, always present
|
||||
|
||||
if err != nil {
|
||||
return mdStates, fmt.Errorf("error parsing %s: %s", statusfile, err)
|
||||
}
|
||||
|
||||
// Now get the number of synced blocks.
|
||||
var syncedBlocks int64
|
||||
|
||||
// Get the line number of the syncing-line.
|
||||
var j int
|
||||
if strings.Contains(lines[i+2], "bitmap") { // then skip the bitmap line
|
||||
j = i + 3
|
||||
} else {
|
||||
j = i + 2
|
||||
}
|
||||
|
||||
// If device is syncing at the moment, get the number of currently synced bytes,
|
||||
// otherwise that number equals the size of the device.
|
||||
if strings.Contains(lines[j], "recovery") || strings.Contains(lines[j], "resync") {
|
||||
syncedBlocks, err = evalBuildline(lines[j])
|
||||
if err != nil {
|
||||
return mdStates, fmt.Errorf("error parsing %s: %s", statusfile, err)
|
||||
}
|
||||
} else {
|
||||
syncedBlocks = size
|
||||
}
|
||||
|
||||
mdStates = append(mdStates, mdStatus{currentMD, isActive, active, total, size, syncedBlocks})
|
||||
|
||||
}
|
||||
|
||||
return mdStates, nil
|
||||
}
|
||||
|
||||
// Just returns the pointer to an empty struct as we only use throwaway-metrics.
|
||||
func NewMdadmCollector() (Collector, error) {
|
||||
return &mdadmCollector{}, nil
|
||||
}
|
||||
|
||||
var (
|
||||
isActiveDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, "md", "is_active"),
|
||||
"Indicator whether the md-device is active or not.",
|
||||
[]string{"device"},
|
||||
nil,
|
||||
)
|
||||
|
||||
disksActiveDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, "md", "disks_active"),
|
||||
"Number of active disks of device.",
|
||||
[]string{"device"},
|
||||
nil,
|
||||
)
|
||||
|
||||
disksTotalDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, "md", "disks"),
|
||||
"Total number of disks of device.",
|
||||
[]string{"device"},
|
||||
nil,
|
||||
)
|
||||
|
||||
blocksTotalDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, "md", "blocks"),
|
||||
"Total number of blocks on device.",
|
||||
[]string{"device"},
|
||||
nil,
|
||||
)
|
||||
|
||||
blocksSyncedDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(Namespace, "md", "blocks_synced"),
|
||||
"Number of blocks synced on device.",
|
||||
[]string{"device"},
|
||||
nil,
|
||||
)
|
||||
)
|
||||
|
||||
func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) (err error) {
|
||||
// take care we don't crash on non-existent statusfiles
|
||||
_, err = os.Stat(statusfile)
|
||||
if os.IsNotExist(err) {
|
||||
// no such file or directory, nothing to do, just return
|
||||
return nil
|
||||
}
|
||||
|
||||
if err != nil { // now things get weird, better to return
|
||||
return err
|
||||
}
|
||||
|
||||
// First parse mdstat-file...
|
||||
mdstate, err := parseMdstat(statusfile)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error parsing %s: %s", statusfile, err)
|
||||
}
|
||||
|
||||
// ... and then plug the result into the metrics to be exported.
|
||||
var isActiveFloat float64
|
||||
for _, mds := range mdstate {
|
||||
|
||||
log.Debugf("collecting metrics for device %s", mds.mdName)
|
||||
|
||||
if mds.isActive {
|
||||
isActiveFloat = 1
|
||||
} else {
|
||||
isActiveFloat = 0
|
||||
}
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
isActiveDesc,
|
||||
prometheus.GaugeValue,
|
||||
isActiveFloat,
|
||||
mds.mdName,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
disksActiveDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(mds.disksActive),
|
||||
mds.mdName,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
disksTotalDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(mds.disksTotal),
|
||||
mds.mdName,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
blocksTotalDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(mds.blocksTotal),
|
||||
mds.mdName,
|
||||
)
|
||||
|
||||
ch <- prometheus.MustNewConstMetric(
|
||||
blocksSyncedDesc,
|
||||
prometheus.GaugeValue,
|
||||
float64(mds.blocksSynced),
|
||||
mds.mdName,
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -0,0 +1,33 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMdadm(t *testing.T) {
|
||||
mdStates, err := parseMdstat("fixtures/mdstat")
|
||||
|
||||
if err != nil {
|
||||
t.Fatalf("parsing of reference-file failed entirely: %s", err)
|
||||
}
|
||||
|
||||
refs := map[string]mdStatus{
|
||||
"md3": mdStatus{"md3", true, 8, 8, 5853468288, 5853468288},
|
||||
"md127": mdStatus{"md127", true, 2, 2, 312319552, 312319552},
|
||||
"md0": mdStatus{"md0", true, 2, 2, 248896, 248896},
|
||||
"md4": mdStatus{"md4", false, 2, 2, 4883648, 4883648},
|
||||
"md6": mdStatus{"md6", true, 1, 2, 195310144, 16775552},
|
||||
"md8": mdStatus{"md8", true, 2, 2, 195310144, 16775552},
|
||||
"md7": mdStatus{"md7", true, 3, 4, 7813735424, 7813735424},
|
||||
}
|
||||
|
||||
for _, md := range mdStates {
|
||||
if md != refs[md.mdName] {
|
||||
t.Errorf("failed parsing md-device %s correctly: want %v, got %v", md.mdName, refs[md.mdName], md)
|
||||
}
|
||||
}
|
||||
|
||||
if len(mdStates) != len(refs) {
|
||||
t.Errorf("expected number of parsed md-device to be %s, but was %s", len(refs), len(mdStates))
|
||||
}
|
||||
}
|
|
@ -28,7 +28,7 @@ var (
|
|||
memProfile = flag.String("debug.memprofile-file", "", "Write memory profile to this file upon receipt of SIGUSR1.")
|
||||
listenAddress = flag.String("web.listen-address", ":9100", "Address on which to expose metrics and web interface.")
|
||||
metricsPath = flag.String("web.telemetry-path", "/metrics", "Path under which to expose metrics.")
|
||||
enabledCollectors = flag.String("collectors.enabled", "diskstats,filefd,filesystem,loadavg,meminfo,netdev,netstat,sockstat,stat,textfile,time,uname", "Comma-separated list of collectors to use.")
|
||||
enabledCollectors = flag.String("collectors.enabled", "diskstats,filefd,filesystem,loadavg,mdadm,meminfo,netdev,netstat,sockstat,stat,textfile,time,uname", "Comma-separated list of collectors to use.")
|
||||
printCollectors = flag.Bool("collectors.print", false, "If true, print available collectors and exit.")
|
||||
authUser = flag.String("auth.user", "", "Username for basic auth.")
|
||||
authPass = flag.String("auth.pass", "", "Password for basic auth.")
|
||||
|
|
Loading…
Reference in New Issue