Merge pull request #12 from brian-brazil/seperate-modules

Split native collector into it's component parts and make them enablable...
This commit is contained in:
juliusv 2014-06-05 15:12:53 +02:00
commit 89a6a10bc8
14 changed files with 756 additions and 516 deletions

View File

@ -1,25 +1,26 @@
# node_exporter
Prometheus exporter with plugable metric collectors.
Prometheus exporter with pluggable metric collectors.
## Available collectors
By default it will only include the NativeCollector.
By default the build will only include the native collectors
that expose information from /proc.
To include other collectors, specify the build tags lile this:
go build -tags 'ganglia runit' node_exporter.go
Which collectors are used is controlled by the --enabledCollectors flag.
### NativeCollector
Provides metrics for load, seconds since last login and a list of tags
read from `node_exporter.conf`.
To disable the native collector, use build tag `nonative`.
### GmondCollector (tag: ganglia)

43
collector/attributes.go Normal file
View File

@ -0,0 +1,43 @@
// +build !noattributes
package collector
import (
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
)
var (
attributes = prometheus.NewGauge()
)
type attributesCollector struct {
registry prometheus.Registry
config Config
}
func init() {
Factories["attributes"] = NewAttributesCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// labels from the config.
func NewAttributesCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := attributesCollector{
config: config,
registry: registry,
}
registry.Register(
"node_attributes",
"node_exporter attributes",
prometheus.NilLabels,
attributes,
)
return &c, nil
}
func (c *attributesCollector) Update() (updates int, err error) {
glog.V(1).Info("Set node_attributes{%v}: 1", c.config.Attributes)
attributes.Set(c.config.Attributes, 1)
return updates, err
}

View File

@ -5,15 +5,12 @@ import (
"github.com/prometheus/client_golang/prometheus"
)
var Factories []func(Config, prometheus.Registry) (Collector, error)
var Factories = make(map[string]func(Config, prometheus.Registry) (Collector, error))
// Interface a collector has to implement.
type Collector interface {
// Get new metrics and expose them via prometheus registry.
Update() (n int, err error)
// Returns the name of the collector.
Name() string
}
type Config struct {

131
collector/diskstats.go Normal file
View File

@ -0,0 +1,131 @@
// +build !nonative
package collector
import (
"bufio"
"flag"
"fmt"
"io"
"os"
"regexp"
"strconv"
"strings"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
)
const (
procDiskStats = "/proc/diskstats"
)
type diskStat struct {
name string
metric prometheus.Metric
documentation string
}
var (
ignoredDevices = flag.String("diskstatsIgnoredDevices", "^(ram|loop|[hs]d[a-z])\\d+$", "Regexp of devices to ignore for diskstats.")
// Docs from https://www.kernel.org/doc/Documentation/iostats.txt
diskStatsMetrics = []diskStat{
{"reads_completed", prometheus.NewCounter(), "The total number of reads completed successfully."},
{"reads_merged", prometheus.NewCounter(), "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt"},
{"sectors_read", prometheus.NewCounter(), "The total number of sectors read successfully."},
{"read_time_ms", prometheus.NewCounter(), "the total number of milliseconds spent by all reads."},
{"writes_completed", prometheus.NewCounter(), "The total number of writes completed successfully."},
{"writes_merged", prometheus.NewCounter(), "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt"},
{"sectors_written", prometheus.NewCounter(), "The total number of sectors written successfully."},
{"write_time_ms", prometheus.NewCounter(), "This is the total number of milliseconds spent by all writes."},
{"io_now", prometheus.NewGauge(), "The number of I/Os currently in progress."},
{"io_time_ms", prometheus.NewCounter(), "Milliseconds spent doing I/Os."},
{"io_time_weighted", prometheus.NewCounter(), "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt"},
}
)
type diskstatsCollector struct {
registry prometheus.Registry
config Config
ignoredDevicesPattern *regexp.Regexp
}
func init() {
Factories["diskstats"] = NewDiskstatsCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// disk device stats.
func NewDiskstatsCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := diskstatsCollector{
config: config,
registry: registry,
ignoredDevicesPattern: regexp.MustCompile(*ignoredDevices),
}
for _, v := range diskStatsMetrics {
registry.Register(
"node_disk_"+v.name,
v.documentation,
prometheus.NilLabels,
v.metric,
)
}
return &c, nil
}
func (c *diskstatsCollector) Update() (updates int, err error) {
diskStats, err := getDiskStats()
if err != nil {
return updates, fmt.Errorf("Couldn't get diskstats: %s", err)
}
for dev, stats := range diskStats {
if c.ignoredDevicesPattern.MatchString(dev) {
glog.V(1).Infof("Ignoring device: %s", dev)
continue
}
for k, value := range stats {
updates++
v, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err)
}
labels := map[string]string{"device": dev}
counter, ok := diskStatsMetrics[k].metric.(prometheus.Counter)
if ok {
counter.Set(labels, v)
} else {
var gauge = diskStatsMetrics[k].metric.(prometheus.Gauge)
gauge.Set(labels, v)
}
}
}
return updates, err
}
func getDiskStats() (map[string]map[int]string, error) {
file, err := os.Open(procDiskStats)
if err != nil {
return nil, err
}
return parseDiskStats(file)
}
func parseDiskStats(r io.ReadCloser) (map[string]map[int]string, error) {
defer r.Close()
diskStats := map[string]map[int]string{}
scanner := bufio.NewScanner(r)
for scanner.Scan() {
parts := strings.Fields(string(scanner.Text()))
if len(parts) != len(diskStatsMetrics)+3 { // we strip major, minor and dev
return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text())
}
dev := parts[2]
diskStats[dev] = map[int]string{}
for i, v := range parts[3:] {
diskStats[dev][i] = v
}
}
return diskStats, nil
}

View File

@ -11,6 +11,7 @@ import (
"regexp"
"time"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/node_exporter/collector/ganglia"
)
@ -23,14 +24,13 @@ const (
)
type gmondCollector struct {
name string
Metrics map[string]prometheus.Gauge
config Config
registry prometheus.Registry
}
func init() {
Factories = append(Factories, NewGmondCollector)
Factories["gmond"] = NewGmondCollector
}
var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
@ -38,7 +38,6 @@ var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
// Takes a config struct and prometheus registry and returns a new Collector scraping ganglia.
func NewGmondCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := gmondCollector{
name: "gmond_collector",
config: config,
Metrics: make(map[string]prometheus.Gauge),
registry: registry,
@ -47,8 +46,6 @@ func NewGmondCollector(config Config, registry prometheus.Registry) (Collector,
return &c, nil
}
func (c *gmondCollector) Name() string { return c.name }
func (c *gmondCollector) setMetric(name string, labels map[string]string, metric ganglia.Metric) {
if _, ok := c.Metrics[name]; !ok {
var desc string
@ -64,18 +61,18 @@ func (c *gmondCollector) setMetric(name string, labels map[string]string, metric
break
}
}
debug(c.Name(), "Register %s: %s", name, desc)
glog.V(1).Infof("Register %s: %s", name, desc)
gauge := prometheus.NewGauge()
c.Metrics[name] = gauge
c.registry.Register(name, desc, prometheus.NilLabels, gauge) // one gauge per metric!
}
debug(c.Name(), "Set %s{%s}: %f", name, labels, metric.Value)
glog.V(1).Infof("Set %s{%s}: %f", name, labels, metric.Value)
c.Metrics[name].Set(labels, metric.Value)
}
func (c *gmondCollector) Update() (updates int, err error) {
conn, err := net.Dial(gangliaProto, gangliaAddress)
debug(c.Name(), "gmondCollector Update")
glog.V(1).Infof("gmondCollector Update")
if err != nil {
return updates, fmt.Errorf("Can't connect to gmond: %s", err)
}

View File

@ -1,22 +1,11 @@
package collector
import (
"flag"
"fmt"
"log"
"strconv"
"strings"
)
var verbose = flag.Bool("verbose", false, "Verbose output.")
func debug(name string, format string, a ...interface{}) {
if *verbose {
f := fmt.Sprintf("%s: %s", name, format)
log.Printf(f, a...)
}
}
func splitToInts(str string, sep string) (ints []int, err error) {
for _, part := range strings.Split(str, sep) {
i, err := strconv.Atoi(part)

116
collector/interrupts.go Normal file
View File

@ -0,0 +1,116 @@
// +build !nointerrupts
package collector
import (
"bufio"
"fmt"
"io"
"os"
"strconv"
"strings"
"github.com/prometheus/client_golang/prometheus"
)
const (
procInterrupts = "/proc/interrupts"
)
var (
interruptsMetric = prometheus.NewCounter()
)
type interruptsCollector struct {
registry prometheus.Registry
config Config
}
func init() {
Factories["interrupts"] = NewInterruptsCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// interrupts stats
func NewInterruptsCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := interruptsCollector{
config: config,
registry: registry,
}
registry.Register(
"node_interrupts",
"Interrupt details from /proc/interrupts",
prometheus.NilLabels,
interruptsMetric,
)
return &c, nil
}
func (c *interruptsCollector) Update() (updates int, err error) {
interrupts, err := getInterrupts()
if err != nil {
return updates, fmt.Errorf("Couldn't get interrupts: %s", err)
}
for name, interrupt := range interrupts {
for cpuNo, value := range interrupt.values {
updates++
fv, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err)
}
labels := map[string]string{
"CPU": strconv.Itoa(cpuNo),
"type": name,
"info": interrupt.info,
"devices": interrupt.devices,
}
interruptsMetric.Set(labels, fv)
}
}
return updates, err
}
type interrupt struct {
info string
devices string
values []string
}
func getInterrupts() (map[string]interrupt, error) {
file, err := os.Open(procInterrupts)
if err != nil {
return nil, err
}
return parseInterrupts(file)
}
func parseInterrupts(r io.ReadCloser) (map[string]interrupt, error) {
defer r.Close()
interrupts := map[string]interrupt{}
scanner := bufio.NewScanner(r)
if !scanner.Scan() {
return nil, fmt.Errorf("%s empty", procInterrupts)
}
cpuNum := len(strings.Fields(string(scanner.Text()))) // one header per cpu
for scanner.Scan() {
line := scanner.Text()
parts := strings.Fields(string(line))
if len(parts) < cpuNum+2 { // irq + one column per cpu + details,
continue // we ignore ERR and MIS for now
}
intName := parts[0][:len(parts[0])-1] // remove trailing :
intr := interrupt{
values: parts[1:cpuNum],
}
if _, err := strconv.Atoi(intName); err == nil { // numeral interrupt
intr.info = parts[cpuNum+1]
intr.devices = strings.Join(parts[cpuNum+2:], " ")
} else {
intr.info = strings.Join(parts[cpuNum+1:], " ")
}
interrupts[intName] = intr
}
return interrupts, nil
}

105
collector/lastlogin.go Normal file
View File

@ -0,0 +1,105 @@
// +build !nolastLogin
package collector
import (
"bufio"
"fmt"
"io"
"os/exec"
"strings"
"time"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
)
var (
lastSeen = prometheus.NewGauge()
)
type lastLoginCollector struct {
registry prometheus.Registry
config Config
}
func init() {
Factories["lastlogin"] = NewLastLoginCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// load, seconds since last login and a list of tags as specified by config.
func NewLastLoginCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := lastLoginCollector{
config: config,
registry: registry,
}
registry.Register(
"node_last_login_time",
"The time of the last login.",
prometheus.NilLabels,
lastSeen,
)
return &c, nil
}
func (c *lastLoginCollector) Update() (updates int, err error) {
last, err := getLastLoginTime()
if err != nil {
return updates, fmt.Errorf("Couldn't get last seen: %s", err)
}
updates++
glog.V(1).Infof("Set node_last_login_time: %f", last)
lastSeen.Set(nil, last)
return updates, err
}
func getLastLoginTime() (float64, error) {
who := exec.Command("who", "/var/log/wtmp", "-l", "-u", "-s")
output, err := who.StdoutPipe()
if err != nil {
return 0, err
}
err = who.Start()
if err != nil {
return 0, err
}
reader := bufio.NewReader(output)
var last time.Time
for {
line, isPrefix, err := reader.ReadLine()
if err == io.EOF {
break
}
if isPrefix {
return 0, fmt.Errorf("line to long: %s(...)", line)
}
fields := strings.Fields(string(line))
lastDate := fields[2]
lastTime := fields[3]
dateParts, err := splitToInts(lastDate, "-") // 2013-04-16
if err != nil {
return 0, fmt.Errorf("Couldn't parse date in line '%s': %s", fields, err)
}
timeParts, err := splitToInts(lastTime, ":") // 11:33
if err != nil {
return 0, fmt.Errorf("Couldn't parse time in line '%s': %s", fields, err)
}
last_t := time.Date(dateParts[0], time.Month(dateParts[1]), dateParts[2], timeParts[0], timeParts[1], 0, 0, time.UTC)
last = last_t
}
err = who.Wait()
if err != nil {
return 0, err
}
return float64(last.Unix()), nil
}

76
collector/loadavg.go Normal file
View File

@ -0,0 +1,76 @@
// +build !noloadavg
package collector
import (
"fmt"
"io/ioutil"
"strconv"
"strings"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
)
const (
procLoad = "/proc/loadavg"
)
var (
load1 = prometheus.NewGauge()
)
type loadavgCollector struct {
registry prometheus.Registry
config Config
}
func init() {
Factories["loadavg"] = NewLoadavgCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// load, seconds since last login and a list of tags as specified by config.
func NewLoadavgCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := loadavgCollector{
config: config,
registry: registry,
}
registry.Register(
"node_load1",
"1m load average",
prometheus.NilLabels,
load1,
)
return &c, nil
}
func (c *loadavgCollector) Update() (updates int, err error) {
load, err := getLoad1()
if err != nil {
return updates, fmt.Errorf("Couldn't get load: %s", err)
}
updates++
glog.V(1).Infof("Set node_load: %f", load)
load1.Set(nil, load)
return updates, err
}
func getLoad1() (float64, error) {
data, err := ioutil.ReadFile(procLoad)
if err != nil {
return 0, err
}
return parseLoad(string(data))
}
func parseLoad(data string) (float64, error) {
parts := strings.Fields(data)
load, err := strconv.ParseFloat(parts[0], 64)
if err != nil {
return 0, fmt.Errorf("Could not parse load '%s': %s", parts[0], err)
}
return load, nil
}

101
collector/meminfo.go Normal file
View File

@ -0,0 +1,101 @@
// +build !nomeminfo
package collector
import (
"bufio"
"fmt"
"io"
"os"
"regexp"
"strconv"
"strings"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
)
const (
procMemInfo = "/proc/meminfo"
)
var (
memInfoMetrics = map[string]prometheus.Gauge{}
)
type meminfoCollector struct {
registry prometheus.Registry
config Config
}
func init() {
Factories["meminfo"] = NewMeminfoCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// memory stats.
func NewMeminfoCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := meminfoCollector{
config: config,
registry: registry,
}
return &c, nil
}
func (c *meminfoCollector) Update() (updates int, err error) {
memInfo, err := getMemInfo()
if err != nil {
return updates, fmt.Errorf("Couldn't get meminfo: %s", err)
}
glog.V(1).Infof("Set node_mem: %#v", memInfo)
for k, v := range memInfo {
if _, ok := memInfoMetrics[k]; !ok {
memInfoMetrics[k] = prometheus.NewGauge()
c.registry.Register(
"node_memory_"+k,
k+" from /proc/meminfo",
prometheus.NilLabels,
memInfoMetrics[k],
)
}
updates++
memInfoMetrics[k].Set(nil, v)
}
return updates, err
}
func getMemInfo() (map[string]float64, error) {
file, err := os.Open(procMemInfo)
if err != nil {
return nil, err
}
return parseMemInfo(file)
}
func parseMemInfo(r io.ReadCloser) (map[string]float64, error) {
defer r.Close()
memInfo := map[string]float64{}
scanner := bufio.NewScanner(r)
re := regexp.MustCompile("\\((.*)\\)")
for scanner.Scan() {
line := scanner.Text()
parts := strings.Fields(string(line))
fv, err := strconv.ParseFloat(parts[1], 64)
if err != nil {
return nil, fmt.Errorf("Invalid value in meminfo: %s", err)
}
switch len(parts) {
case 2: // no unit
case 3: // has unit, we presume kB
fv *= 1024
default:
return nil, fmt.Errorf("Invalid line in %s: %s", procMemInfo, line)
}
key := parts[0][:len(parts[0])-1] // remove trailing : from key
// Active(anon) -> Active_anon
key = re.ReplaceAllString(key, "_${1}")
memInfo[key] = fv
}
return memInfo, nil
}

View File

@ -1,454 +0,0 @@
// +build !nonative
package collector
import (
"bufio"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"regexp"
"strconv"
"strings"
"time"
"github.com/prometheus/client_golang/prometheus"
)
const (
procLoad = "/proc/loadavg"
procMemInfo = "/proc/meminfo"
procInterrupts = "/proc/interrupts"
procNetDev = "/proc/net/dev"
procDiskStats = "/proc/diskstats"
)
type diskStat struct {
name string
metric prometheus.Metric
documentation string
}
var (
// Docs from https://www.kernel.org/doc/Documentation/iostats.txt
diskStatsMetrics = []diskStat{
{"reads_completed", prometheus.NewCounter(), "The total number of reads completed successfully."},
{"reads_merged", prometheus.NewCounter(), "The number of reads merged. See https://www.kernel.org/doc/Documentation/iostats.txt"},
{"sectors_read", prometheus.NewCounter(), "The total number of sectors read successfully."},
{"read_time_ms", prometheus.NewCounter(), "the total number of milliseconds spent by all reads."},
{"writes_completed", prometheus.NewCounter(), "The total number of writes completed successfully."},
{"writes_merged", prometheus.NewCounter(), "The number of writes merged. See https://www.kernel.org/doc/Documentation/iostats.txt"},
{"sectors_written", prometheus.NewCounter(), "The total number of sectors written successfully."},
{"write_time_ms", prometheus.NewCounter(), "This is the total number of milliseconds spent by all writes."},
{"io_now", prometheus.NewGauge(), "The number of I/Os currently in progress."},
{"io_time_ms", prometheus.NewCounter(), "Milliseconds spent doing I/Os."},
{"io_time_weighted", prometheus.NewCounter(), "The weighted # of milliseconds spent doing I/Os. See https://www.kernel.org/doc/Documentation/iostats.txt"},
}
lastSeen = prometheus.NewGauge()
load1 = prometheus.NewGauge()
attributes = prometheus.NewGauge()
memInfoMetrics = map[string]prometheus.Gauge{}
netStatsMetrics = map[string]prometheus.Gauge{}
interruptsMetric = prometheus.NewCounter()
)
type nativeCollector struct {
registry prometheus.Registry
name string
config Config
}
func init() {
Factories = append(Factories, NewNativeCollector)
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// load, seconds since last login and a list of tags as specified by config.
func NewNativeCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := nativeCollector{
name: "native_collector",
config: config,
registry: registry,
}
registry.Register(
"node_load1",
"1m load average",
prometheus.NilLabels,
load1,
)
registry.Register(
"node_last_login_time",
"The time of the last login.",
prometheus.NilLabels,
lastSeen,
)
registry.Register(
"node_attributes",
"node_exporter attributes",
prometheus.NilLabels,
attributes,
)
registry.Register(
"node_interrupts",
"Interrupt details from /proc/interrupts",
prometheus.NilLabels,
interruptsMetric,
)
for _, v := range diskStatsMetrics {
registry.Register(
"node_disk_"+v.name,
v.documentation,
prometheus.NilLabels,
v.metric,
)
}
return &c, nil
}
func (c *nativeCollector) Name() string { return c.name }
func (c *nativeCollector) Update() (updates int, err error) {
last, err := getLastLoginTime()
if err != nil {
return updates, fmt.Errorf("Couldn't get last seen: %s", err)
}
updates++
debug(c.Name(), "Set node_last_login_time: %f", last)
lastSeen.Set(nil, last)
load, err := getLoad1()
if err != nil {
return updates, fmt.Errorf("Couldn't get load: %s", err)
}
updates++
debug(c.Name(), "Set node_load: %f", load)
load1.Set(nil, load)
debug(c.Name(), "Set node_attributes{%v}: 1", c.config.Attributes)
attributes.Set(c.config.Attributes, 1)
memInfo, err := getMemInfo()
if err != nil {
return updates, fmt.Errorf("Couldn't get meminfo: %s", err)
}
debug(c.Name(), "Set node_mem: %#v", memInfo)
for k, v := range memInfo {
if _, ok := memInfoMetrics[k]; !ok {
memInfoMetrics[k] = prometheus.NewGauge()
c.registry.Register(
"node_memory_"+k,
k+" from /proc/meminfo",
prometheus.NilLabels,
memInfoMetrics[k],
)
}
updates++
memInfoMetrics[k].Set(nil, v)
}
interrupts, err := getInterrupts()
if err != nil {
return updates, fmt.Errorf("Couldn't get interrupts: %s", err)
}
for name, interrupt := range interrupts {
for cpuNo, value := range interrupt.values {
updates++
fv, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err)
}
labels := map[string]string{
"CPU": strconv.Itoa(cpuNo),
"type": name,
"info": interrupt.info,
"devices": interrupt.devices,
}
interruptsMetric.Set(labels, fv)
}
}
netStats, err := getNetStats()
if err != nil {
return updates, fmt.Errorf("Couldn't get netstats: %s", err)
}
for direction, devStats := range netStats {
for dev, stats := range devStats {
for t, value := range stats {
key := direction + "_" + t
if _, ok := netStatsMetrics[key]; !ok {
netStatsMetrics[key] = prometheus.NewGauge()
c.registry.Register(
"node_network_"+key,
t+" "+direction+" from /proc/net/dev",
prometheus.NilLabels,
netStatsMetrics[key],
)
}
updates++
v, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value %s in netstats: %s", value, err)
}
netStatsMetrics[key].Set(map[string]string{"device": dev}, v)
}
}
}
diskStats, err := getDiskStats()
if err != nil {
return updates, fmt.Errorf("Couldn't get diskstats: %s", err)
}
for dev, stats := range diskStats {
for k, value := range stats {
updates++
v, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value %s in diskstats: %s", value, err)
}
labels := map[string]string{"device": dev}
counter, ok := diskStatsMetrics[k].metric.(prometheus.Counter)
if ok {
counter.Set(labels, v)
} else {
var gauge = diskStatsMetrics[k].metric.(prometheus.Gauge)
gauge.Set(labels, v)
}
}
}
return updates, err
}
func getLoad1() (float64, error) {
data, err := ioutil.ReadFile(procLoad)
if err != nil {
return 0, err
}
return parseLoad(string(data))
}
func parseLoad(data string) (float64, error) {
parts := strings.Fields(data)
load, err := strconv.ParseFloat(parts[0], 64)
if err != nil {
return 0, fmt.Errorf("Could not parse load '%s': %s", parts[0], err)
}
return load, nil
}
func getLastLoginTime() (float64, error) {
who := exec.Command("who", "/var/log/wtmp", "-l", "-u", "-s")
output, err := who.StdoutPipe()
if err != nil {
return 0, err
}
err = who.Start()
if err != nil {
return 0, err
}
reader := bufio.NewReader(output)
var last time.Time
for {
line, isPrefix, err := reader.ReadLine()
if err == io.EOF {
break
}
if isPrefix {
return 0, fmt.Errorf("line to long: %s(...)", line)
}
fields := strings.Fields(string(line))
lastDate := fields[2]
lastTime := fields[3]
dateParts, err := splitToInts(lastDate, "-") // 2013-04-16
if err != nil {
return 0, fmt.Errorf("Couldn't parse date in line '%s': %s", fields, err)
}
timeParts, err := splitToInts(lastTime, ":") // 11:33
if err != nil {
return 0, fmt.Errorf("Couldn't parse time in line '%s': %s", fields, err)
}
last_t := time.Date(dateParts[0], time.Month(dateParts[1]), dateParts[2], timeParts[0], timeParts[1], 0, 0, time.UTC)
last = last_t
}
err = who.Wait()
if err != nil {
return 0, err
}
return float64(last.Unix()), nil
}
func getMemInfo() (map[string]float64, error) {
file, err := os.Open(procMemInfo)
if err != nil {
return nil, err
}
return parseMemInfo(file)
}
func parseMemInfo(r io.ReadCloser) (map[string]float64, error) {
defer r.Close()
memInfo := map[string]float64{}
scanner := bufio.NewScanner(r)
re := regexp.MustCompile("\\((.*)\\)")
for scanner.Scan() {
line := scanner.Text()
parts := strings.Fields(string(line))
fv, err := strconv.ParseFloat(parts[1], 64)
if err != nil {
return nil, fmt.Errorf("Invalid value in meminfo: %s", err)
}
switch len(parts) {
case 2: // no unit
case 3: // has unit, we presume kB
fv *= 1024
default:
return nil, fmt.Errorf("Invalid line in %s: %s", procMemInfo, line)
}
key := parts[0][:len(parts[0])-1] // remove trailing : from key
// Active(anon) -> Active_anon
key = re.ReplaceAllString(key, "_${1}")
memInfo[key] = fv
}
return memInfo, nil
}
type interrupt struct {
info string
devices string
values []string
}
func getInterrupts() (map[string]interrupt, error) {
file, err := os.Open(procInterrupts)
if err != nil {
return nil, err
}
return parseInterrupts(file)
}
func parseInterrupts(r io.ReadCloser) (map[string]interrupt, error) {
defer r.Close()
interrupts := map[string]interrupt{}
scanner := bufio.NewScanner(r)
if !scanner.Scan() {
return nil, fmt.Errorf("%s empty", procInterrupts)
}
cpuNum := len(strings.Fields(string(scanner.Text()))) // one header per cpu
for scanner.Scan() {
line := scanner.Text()
parts := strings.Fields(string(line))
if len(parts) < cpuNum+2 { // irq + one column per cpu + details,
continue // we ignore ERR and MIS for now
}
intName := parts[0][:len(parts[0])-1] // remove trailing :
intr := interrupt{
values: parts[1:cpuNum],
}
if _, err := strconv.Atoi(intName); err == nil { // numeral interrupt
intr.info = parts[cpuNum+1]
intr.devices = strings.Join(parts[cpuNum+2:], " ")
} else {
intr.info = strings.Join(parts[cpuNum+1:], " ")
}
interrupts[intName] = intr
}
return interrupts, nil
}
func getNetStats() (map[string]map[string]map[string]string, error) {
file, err := os.Open(procNetDev)
if err != nil {
return nil, err
}
return parseNetStats(file)
}
func parseNetStats(r io.ReadCloser) (map[string]map[string]map[string]string, error) {
defer r.Close()
netStats := map[string]map[string]map[string]string{}
netStats["transmit"] = map[string]map[string]string{}
netStats["receive"] = map[string]map[string]string{}
scanner := bufio.NewScanner(r)
scanner.Scan() // skip first header
scanner.Scan()
parts := strings.Split(string(scanner.Text()), "|")
if len(parts) != 3 { // interface + receive + transmit
return nil, fmt.Errorf("Invalid header line in %s: %s",
procNetDev, scanner.Text())
}
header := strings.Fields(parts[1])
for scanner.Scan() {
parts := strings.Fields(string(scanner.Text()))
if len(parts) != 2*len(header)+1 {
return nil, fmt.Errorf("Invalid line in %s: %s",
procNetDev, scanner.Text())
}
dev := parts[0][:len(parts[0])-1]
receive, err := parseNetDevLine(parts[1:len(header)+1], header)
if err != nil {
return nil, err
}
transmit, err := parseNetDevLine(parts[len(header)+1:], header)
if err != nil {
return nil, err
}
netStats["transmit"][dev] = transmit
netStats["receive"][dev] = receive
}
return netStats, nil
}
func parseNetDevLine(parts []string, header []string) (map[string]string, error) {
devStats := map[string]string{}
for i, v := range parts {
devStats[header[i]] = v
}
return devStats, nil
}
func getDiskStats() (map[string]map[int]string, error) {
file, err := os.Open(procDiskStats)
if err != nil {
return nil, err
}
return parseDiskStats(file)
}
func parseDiskStats(r io.ReadCloser) (map[string]map[int]string, error) {
defer r.Close()
diskStats := map[string]map[int]string{}
scanner := bufio.NewScanner(r)
for scanner.Scan() {
parts := strings.Fields(string(scanner.Text()))
if len(parts) != len(diskStatsMetrics)+3 { // we strip major, minor and dev
return nil, fmt.Errorf("Invalid line in %s: %s", procDiskStats, scanner.Text())
}
dev := parts[2]
diskStats[dev] = map[int]string{}
for i, v := range parts[3:] {
diskStats[dev][i] = v
}
}
return diskStats, nil
}

125
collector/netdev.go Normal file
View File

@ -0,0 +1,125 @@
// +build !nonetDev
package collector
import (
"bufio"
"fmt"
"io"
"os"
"strconv"
"strings"
"github.com/prometheus/client_golang/prometheus"
)
const (
procNetDev = "/proc/net/dev"
)
var (
netStatsMetrics = map[string]prometheus.Gauge{}
)
type netDevCollector struct {
registry prometheus.Registry
config Config
}
func init() {
Factories["netdev"] = NewNetDevCollector
}
// Takes a config struct and prometheus registry and returns a new Collector exposing
// network device stats.
func NewNetDevCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := netDevCollector{
config: config,
registry: registry,
}
return &c, nil
}
func (c *netDevCollector) Update() (updates int, err error) {
netStats, err := getNetStats()
if err != nil {
return updates, fmt.Errorf("Couldn't get netstats: %s", err)
}
for direction, devStats := range netStats {
for dev, stats := range devStats {
for t, value := range stats {
key := direction + "_" + t
if _, ok := netStatsMetrics[key]; !ok {
netStatsMetrics[key] = prometheus.NewGauge()
c.registry.Register(
"node_network_"+key,
t+" "+direction+" from /proc/net/dev",
prometheus.NilLabels,
netStatsMetrics[key],
)
}
updates++
v, err := strconv.ParseFloat(value, 64)
if err != nil {
return updates, fmt.Errorf("Invalid value %s in netstats: %s", value, err)
}
netStatsMetrics[key].Set(map[string]string{"device": dev}, v)
}
}
}
return updates, err
}
func getNetStats() (map[string]map[string]map[string]string, error) {
file, err := os.Open(procNetDev)
if err != nil {
return nil, err
}
return parseNetStats(file)
}
func parseNetStats(r io.ReadCloser) (map[string]map[string]map[string]string, error) {
defer r.Close()
netStats := map[string]map[string]map[string]string{}
netStats["transmit"] = map[string]map[string]string{}
netStats["receive"] = map[string]map[string]string{}
scanner := bufio.NewScanner(r)
scanner.Scan() // skip first header
scanner.Scan()
parts := strings.Split(string(scanner.Text()), "|")
if len(parts) != 3 { // interface + receive + transmit
return nil, fmt.Errorf("Invalid header line in %s: %s",
procNetDev, scanner.Text())
}
header := strings.Fields(parts[1])
for scanner.Scan() {
parts := strings.Fields(string(scanner.Text()))
if len(parts) != 2*len(header)+1 {
return nil, fmt.Errorf("Invalid line in %s: %s",
procNetDev, scanner.Text())
}
dev := parts[0][:len(parts[0])-1]
receive, err := parseNetDevLine(parts[1:len(header)+1], header)
if err != nil {
return nil, err
}
transmit, err := parseNetDevLine(parts[len(header)+1:], header)
if err != nil {
return nil, err
}
netStats["transmit"][dev] = transmit
netStats["receive"][dev] = receive
}
return netStats, nil
}
func parseNetDevLine(parts []string, header []string) (map[string]string, error) {
devStats := map[string]string{}
for i, v := range parts {
devStats[header[i]] = v
}
return devStats, nil
}

View File

@ -3,12 +3,12 @@
package collector
import (
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
"github.com/soundcloud/go-runit/runit"
)
type runitCollector struct {
name string
config Config
state prometheus.Gauge
stateDesired prometheus.Gauge
@ -16,12 +16,11 @@ type runitCollector struct {
}
func init() {
Factories = append(Factories, NewRunitCollector)
Factories["runit"] = NewRunitCollector
}
func NewRunitCollector(config Config, registry prometheus.Registry) (Collector, error) {
c := runitCollector{
name: "runit_collector",
config: config,
state: prometheus.NewGauge(),
stateDesired: prometheus.NewGauge(),
@ -52,8 +51,6 @@ func NewRunitCollector(config Config, registry prometheus.Registry) (Collector,
return &c, nil
}
func (c *runitCollector) Name() string { return c.name }
func (c *runitCollector) Update() (updates int, err error) {
services, err := runit.GetServices("/etc/service")
if err != nil {
@ -63,11 +60,11 @@ func (c *runitCollector) Update() (updates int, err error) {
for _, service := range services {
status, err := service.Status()
if err != nil {
debug(c.Name(), "Couldn't get status for %s: %s, skipping...", service.Name, err)
glog.V(1).Infof("Couldn't get status for %s: %s, skipping...", service.Name, err)
continue
}
debug(c.Name(), "%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration)
glog.V(1).Infof("%s is %d on pid %d for %d seconds", service.Name, status.State, status.Pid, status.Duration)
labels := map[string]string{
"service": service.Name,
}

View File

@ -3,32 +3,44 @@ package main
import (
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"log"
"net/http"
"os"
"os/signal"
"runtime/pprof"
"strings"
"sync"
"syscall"
"time"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/exp"
"github.com/prometheus/node_exporter/collector"
)
var (
configFile = flag.String("config", "node_exporter.conf", "config file.")
memProfile = flag.String("memprofile", "", "write memory profile to this file")
listeningAddress = flag.String("listen", ":8080", "address to listen on")
interval = flag.Duration("interval", 60*time.Second, "refresh interval")
scrapeDurations = prometheus.NewDefaultHistogram()
metricsUpdated = prometheus.NewGauge()
configFile = flag.String("config", "node_exporter.conf", "config file.")
memProfile = flag.String("memprofile", "", "write memory profile to this file")
listeningAddress = flag.String("listen", ":8080", "address to listen on")
enabledCollectors = flag.String("enabledCollectors", "attributes,diskstats,loadavg,meminfo,netdev", "comma-seperated list of collectors to use")
printCollectors = flag.Bool("printCollectors", false, "If true, print available collectors and exit")
interval = flag.Duration("interval", 60*time.Second, "refresh interval")
scrapeDurations = prometheus.NewDefaultHistogram()
metricsUpdated = prometheus.NewGauge()
)
func main() {
flag.Parse()
if *printCollectors {
fmt.Printf("Available collectors:\n")
for n, _ := range collector.Factories {
fmt.Printf(" - %s\n", n)
}
return
}
registry := prometheus.NewRegistry()
collectors, err := loadCollectors(*configFile, registry)
if err != nil {
@ -38,9 +50,9 @@ func main() {
registry.Register("node_exporter_scrape_duration_seconds", "node_exporter: Duration of a scrape job.", prometheus.NilLabels, scrapeDurations)
registry.Register("node_exporter_metrics_updated", "node_exporter: Number of metrics updated.", prometheus.NilLabels, metricsUpdated)
log.Printf("Registered collectors:")
for _, c := range collectors {
log.Print(" - ", c.Name())
glog.Infof("Enabled collectors:")
for n, _ := range collectors {
glog.Infof(" - %s", n)
}
sigHup := make(chan os.Signal)
@ -50,7 +62,7 @@ func main() {
go serveStatus(registry)
log.Printf("Starting initial collection")
glog.Infof("Starting initial collection")
collect(collectors)
tick := time.Tick(*interval)
@ -61,17 +73,17 @@ func main() {
if err != nil {
log.Fatalf("Couldn't load config and collectors: %s", err)
}
log.Printf("Reloaded collectors and config")
glog.Infof("Reloaded collectors and config")
tick = time.Tick(*interval)
case <-tick:
log.Printf("Starting new interval")
glog.Infof("Starting new interval")
collect(collectors)
case <-sigUsr1:
log.Printf("got signal")
glog.Infof("got signal")
if *memProfile != "" {
log.Printf("Writing memory profile to %s", *memProfile)
glog.Infof("Writing memory profile to %s", *memProfile)
f, err := os.Create(*memProfile)
if err != nil {
log.Fatal(err)
@ -84,25 +96,29 @@ func main() {
}
func loadCollectors(file string, registry prometheus.Registry) ([]collector.Collector, error) {
collectors := []collector.Collector{}
func loadCollectors(file string, registry prometheus.Registry) (map[string]collector.Collector, error) {
collectors := map[string]collector.Collector{}
config, err := getConfig(file)
if err != nil {
log.Fatalf("Couldn't read config %s: %s", file, err)
}
for _, fn := range collector.Factories {
for _, name := range strings.Split(*enabledCollectors, ",") {
fn, ok := collector.Factories[name]
if !ok {
log.Fatalf("Collector '%s' not available", name)
}
c, err := fn(*config, registry)
if err != nil {
return nil, err
}
collectors = append(collectors, c)
collectors[name] = c
}
return collectors, nil
}
func getConfig(file string) (*collector.Config, error) {
config := &collector.Config{}
log.Printf("Reading config %s", *configFile)
glog.Infof("Reading config %s", *configFile)
bytes, err := ioutil.ReadFile(*configFile)
if err != nil {
return nil, err
@ -115,31 +131,31 @@ func serveStatus(registry prometheus.Registry) {
http.ListenAndServe(*listeningAddress, exp.DefaultCoarseMux)
}
func collect(collectors []collector.Collector) {
func collect(collectors map[string]collector.Collector) {
wg := sync.WaitGroup{}
wg.Add(len(collectors))
for _, c := range collectors {
go func(c collector.Collector) {
Execute(c)
for n, c := range collectors {
go func(n string, c collector.Collector) {
Execute(n, c)
wg.Done()
}(c)
}(n, c)
}
wg.Wait()
}
func Execute(c collector.Collector) {
func Execute(name string, c collector.Collector) {
begin := time.Now()
updates, err := c.Update()
duration := time.Since(begin)
label := map[string]string{
"collector": c.Name(),
"collector": name,
}
if err != nil {
log.Printf("ERROR: %s failed after %fs: %s", c.Name(), duration.Seconds(), err)
glog.Infof("ERROR: %s failed after %fs: %s", name, duration.Seconds(), err)
label["result"] = "error"
} else {
log.Printf("OK: %s success after %fs.", c.Name(), duration.Seconds())
glog.Infof("OK: %s success after %fs.", name, duration.Seconds())
label["result"] = "success"
}
scrapeDurations.Add(label, duration.Seconds())