2013-05-07 14:40:10 +00:00
package main
import (
2014-02-18 11:35:11 +00:00
"encoding/json"
2013-05-07 14:40:10 +00:00
"flag"
2014-06-04 13:09:33 +00:00
"fmt"
2014-02-18 11:35:11 +00:00
"io/ioutil"
2013-05-07 14:40:10 +00:00
"log"
2014-02-18 11:35:11 +00:00
"net/http"
"os"
"os/signal"
"runtime/pprof"
2014-06-04 11:12:34 +00:00
"strings"
2014-02-18 11:35:11 +00:00
"sync"
"syscall"
"time"
2014-02-07 16:09:39 +00:00
2014-06-04 11:12:34 +00:00
"github.com/golang/glog"
2014-02-18 11:35:11 +00:00
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/exp"
"github.com/prometheus/node_exporter/collector"
2013-05-07 14:40:10 +00:00
)
var (
2014-06-04 11:12:34 +00:00
configFile = flag . String ( "config" , "node_exporter.conf" , "config file." )
memProfile = flag . String ( "memprofile" , "" , "write memory profile to this file" )
listeningAddress = flag . String ( "listen" , ":8080" , "address to listen on" )
2014-06-05 19:44:44 +00:00
enabledCollectors = flag . String ( "enabledCollectors" , "attributes,diskstats,filesystem,loadavg,meminfo,netdev" , "comma-seperated list of collectors to use" )
2014-06-04 13:09:33 +00:00
printCollectors = flag . Bool ( "printCollectors" , false , "If true, print available collectors and exit" )
2014-06-04 11:12:34 +00:00
interval = flag . Duration ( "interval" , 60 * time . Second , "refresh interval" )
scrapeDurations = prometheus . NewDefaultHistogram ( )
metricsUpdated = prometheus . NewGauge ( )
2013-05-07 14:40:10 +00:00
)
func main ( ) {
flag . Parse ( )
2014-06-04 13:09:33 +00:00
if * printCollectors {
fmt . Printf ( "Available collectors:\n" )
for n , _ := range collector . Factories {
fmt . Printf ( " - %s\n" , n )
}
return
}
2014-02-18 11:35:11 +00:00
registry := prometheus . NewRegistry ( )
collectors , err := loadCollectors ( * configFile , registry )
2013-05-07 14:40:10 +00:00
if err != nil {
2014-02-18 11:35:11 +00:00
log . Fatalf ( "Couldn't load config and collectors: %s" , err )
2013-05-07 14:40:10 +00:00
}
2014-02-18 11:35:11 +00:00
registry . Register ( "node_exporter_scrape_duration_seconds" , "node_exporter: Duration of a scrape job." , prometheus . NilLabels , scrapeDurations )
registry . Register ( "node_exporter_metrics_updated" , "node_exporter: Number of metrics updated." , prometheus . NilLabels , metricsUpdated )
2014-06-04 11:12:34 +00:00
glog . Infof ( "Enabled collectors:" )
for n , _ := range collectors {
glog . Infof ( " - %s" , n )
2014-02-07 16:09:39 +00:00
}
2014-02-18 11:35:11 +00:00
sigHup := make ( chan os . Signal )
sigUsr1 := make ( chan os . Signal )
signal . Notify ( sigHup , syscall . SIGHUP )
signal . Notify ( sigUsr1 , syscall . SIGUSR1 )
go serveStatus ( registry )
2014-06-04 11:12:34 +00:00
glog . Infof ( "Starting initial collection" )
2014-05-23 12:07:34 +00:00
collect ( collectors )
2014-02-18 11:35:11 +00:00
tick := time . Tick ( * interval )
for {
select {
case <- sigHup :
collectors , err = loadCollectors ( * configFile , registry )
if err != nil {
log . Fatalf ( "Couldn't load config and collectors: %s" , err )
}
2014-06-04 11:12:34 +00:00
glog . Infof ( "Reloaded collectors and config" )
2014-02-18 11:35:11 +00:00
tick = time . Tick ( * interval )
case <- tick :
2014-06-04 11:12:34 +00:00
glog . Infof ( "Starting new interval" )
2014-05-23 12:07:34 +00:00
collect ( collectors )
2014-02-18 11:35:11 +00:00
case <- sigUsr1 :
2014-06-04 11:12:34 +00:00
glog . Infof ( "got signal" )
2014-02-18 11:35:11 +00:00
if * memProfile != "" {
2014-06-04 11:12:34 +00:00
glog . Infof ( "Writing memory profile to %s" , * memProfile )
2014-02-18 11:35:11 +00:00
f , err := os . Create ( * memProfile )
if err != nil {
log . Fatal ( err )
}
pprof . WriteHeapProfile ( f )
f . Close ( )
}
}
}
}
2014-06-04 11:12:34 +00:00
func loadCollectors ( file string , registry prometheus . Registry ) ( map [ string ] collector . Collector , error ) {
collectors := map [ string ] collector . Collector { }
2014-02-18 11:35:11 +00:00
config , err := getConfig ( file )
if err != nil {
log . Fatalf ( "Couldn't read config %s: %s" , file , err )
}
2014-06-04 11:12:34 +00:00
for _ , name := range strings . Split ( * enabledCollectors , "," ) {
fn , ok := collector . Factories [ name ]
if ! ok {
log . Fatalf ( "Collector '%s' not available" , name )
}
2014-02-18 11:35:11 +00:00
c , err := fn ( * config , registry )
if err != nil {
return nil , err
}
2014-06-04 11:12:34 +00:00
collectors [ name ] = c
2014-02-18 11:35:11 +00:00
}
return collectors , nil
}
func getConfig ( file string ) ( * collector . Config , error ) {
config := & collector . Config { }
2014-06-04 11:12:34 +00:00
glog . Infof ( "Reading config %s" , * configFile )
2014-02-18 11:35:11 +00:00
bytes , err := ioutil . ReadFile ( * configFile )
if err != nil {
return nil , err
}
return config , json . Unmarshal ( bytes , & config )
}
func serveStatus ( registry prometheus . Registry ) {
exp . Handle ( prometheus . ExpositionResource , registry . Handler ( ) )
http . ListenAndServe ( * listeningAddress , exp . DefaultCoarseMux )
}
2014-06-04 11:12:34 +00:00
func collect ( collectors map [ string ] collector . Collector ) {
2014-05-23 12:07:34 +00:00
wg := sync . WaitGroup { }
wg . Add ( len ( collectors ) )
2014-06-04 11:12:34 +00:00
for n , c := range collectors {
go func ( n string , c collector . Collector ) {
Execute ( n , c )
2014-05-23 12:07:34 +00:00
wg . Done ( )
2014-06-04 11:12:34 +00:00
} ( n , c )
2014-05-23 12:07:34 +00:00
}
wg . Wait ( )
}
2014-06-04 11:12:34 +00:00
func Execute ( name string , c collector . Collector ) {
2014-02-18 11:35:11 +00:00
begin := time . Now ( )
updates , err := c . Update ( )
duration := time . Since ( begin )
label := map [ string ] string {
2014-06-04 11:12:34 +00:00
"collector" : name ,
2014-02-18 11:35:11 +00:00
}
if err != nil {
2014-06-04 11:12:34 +00:00
glog . Infof ( "ERROR: %s failed after %fs: %s" , name , duration . Seconds ( ) , err )
2014-02-18 11:35:11 +00:00
label [ "result" ] = "error"
} else {
2014-06-04 11:12:34 +00:00
glog . Infof ( "OK: %s success after %fs." , name , duration . Seconds ( ) )
2014-02-18 11:35:11 +00:00
label [ "result" ] = "success"
}
scrapeDurations . Add ( label , duration . Seconds ( ) )
metricsUpdated . Set ( label , float64 ( updates ) )
2013-05-07 14:40:10 +00:00
}